@inproceedings{370fab5cbe3748dc8ac1f3536a6959bb,
title = "Affect-robust speech recognition by dynamic emotional adaptation",
abstract = "Automatic Speech Recognition fails to a certain extent when confronted with highly affective speech. In order to cope with this problem we suggest dynamic adaptation to the actual user emotion. The ASR framework is built by a hybrid ANN/HMM mono-phone 5k bi-gram LM recognizer. Based hereon we show adaptation to the affective speaking style. Speech emotion recognition takes place prior to the actual recognition task to choose appropriate models. We therefore focus on fast emotion recognition based on low extra feature extraction effort. As databases for proof-of-concept we use a single digit task and sentences from the well-known WSJ-corpus. These have been re-recorded in acted neutral and angrily speaking style under ideal acoustic conditions to exclude other influences. Effectiveness of acoustic emotion recognition is also proved on the SUSAS corpus. We finally evaluate the need of adaptation and demonstrate significant superiority of our dynamic approach to static adaptation.",
author = "Bj{\"o}rn Schuller and Jan Stadermann and Gerhard Rigoll",
note = "Publisher Copyright: {\textcopyright} 2006 Proceedings of the International Conference on Speech Prosody.; 3rd International Conference on Speech Prosody, SP 2006 ; Conference date: 02-05-2006 Through 05-05-2006",
year = "2006",
language = "English",
series = "Proceedings of the International Conference on Speech Prosody",
publisher = "International Speech Communications Association",
editor = "R. Hoffmann and H. Mixdorff",
booktitle = "3rd International Conference on Speech Prosody 2006",
}