@inproceedings{329e51622f28494e98a767f2279aa0db,
title = "Combining frame and turn-level information for robust recognition ofemotions within speech",
abstract = "Current approaches to the recognition of emotion within speech usually use statistic feature information obtained by application of functionals on turn- or chunk levels. Yet, it is well known that thereby important information on temporal sub-layers as the frame-level is lost. We therefore investigate the benefits of integration of such information within turn-level feature space. For frame-level analysis we use GMM for classification and 39 MFCC and energy features with CMS. In a subsequent step output scores are fed forward into a 1.4k large-feature-space turn-level SVM emotion recognition engine. Thereby we use a variety of Low-Level-Descriptors and functionals to cover prosodic, speech quality, and articulatory aspects. Extensive test-runs are carried out on the public databases EMO-DB and SUSAS. Speaker-independent analysis is faced by speaker normalization. Overall results highly emphasize the benefits of feature integration on diverse time scales.",
keywords = "Emotion recognition, Frame-level analysis, Model fusion, Turn-level analysis",
author = "Bogdan Vlasenko and Bj{\"o}drn Schuller and Andreas Wendemuth and Gerhard Rigoll",
year = "2007",
language = "English",
isbn = "9781605603162",
series = "International Speech Communication Association - 8th Annual Conference of the International Speech Communication Association, Interspeech 2007",
pages = "2712--2715",
booktitle = "International Speech Communication Association - 8th Annual Conference of the International Speech Communication Association, Interspeech 2007",
note = "8th Annual Conference of the International Speech Communication Association, Interspeech 2007 ; Conference date: 27-08-2007 Through 31-08-2007",
}