@inproceedings{5468c2664e0a49188da1397a472a704d,
title = "Induced Local Attention for Transformer Models in Speech Recognition",
abstract = "The transformer models and their variations currently are considered the prime model architectures in speech recognition since they yield state-of-the-art results on several datasets. Their main strength lies in the self-attention mechanism, where the models receive the ability to calculate a score over the whole input sequence and focus on essential aspects of the sequence. However, the attention score has some flaws. It is heavily global-dependent since it takes the whole sequence into account and normalizes along the sequence length. Our work presents a novel approach for a dynamic fusion between the global and a local attention score based on a Gaussian mask. The small networks for learning the fusion process and the Gaussian masks require only few additional parameters and are simple to add to current transformer architectures. With our exhaustive evaluation, we determine the effect of localness in the encoder layers and examine the most effective fusion approach. The results on the dataset TEDLIUMv2 demonstrate a steady improvement on the dev and the test set for the base transformer model equipped with our proposed fusion procedure for local attention.",
keywords = "Attention fusion, Local attention, Speech recognition, Transformer",
author = "Tobias Watzel and Ludwig K{\"u}rzinger and Lujun Li and Gerhard Rigoll",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; 23rd International Conference on Speech and Computer, SPECOM 2021 ; Conference date: 27-09-2021 Through 30-09-2021",
year = "2021",
doi = "10.1007/978-3-030-87802-3_71",
language = "English",
isbn = "9783030878016",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "795--806",
editor = "Alexey Karpov and Rodmonga Potapova",
booktitle = "Speech and Computer - 23rd International Conference, SPECOM 2021, Proceedings",
}