@inproceedings{f1fbbd842a1644449c2b9f92e50d53d7,
title = "Non-negative matrix factorization as noise-robust feature extractor for speech recognition",
abstract = "We introduce a novel approach for noise-robust feature extraction in speech recognition, based on non-negative matrix factorization (NMF). While NMF has previously been used for speech denoising and speaker separation, we directly extract time-varying features from the NMF output. To this end we extend basic unsupervised NMF to a hybrid supervised/unsupervised algorithm. We present a Dynamic Bayesian Network (DBN) architecture that can exploit these features in a Tandem manner together with the maximum likelihood phoneme estimate of a bidirectional long short-term memory (BLSTM) re-current neural network. We show that addition of NMF features to spelling recognition systems can increase word accuracy by up to 7% absolute in a noisy car environment.",
keywords = "Dynamic bayesian networks, Long short-term memory, Noise robustness, Non-negative matrix factorization, Speech recognition",
author = "Bj{\"o}rn Schuller and Felix Weninger and Martin W{\"o}llmer and Yang Sun and Gerhard Rigoll",
year = "2010",
doi = "10.1109/ICASSP.2010.5495567",
language = "English",
isbn = "9781424442966",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4562--4565",
booktitle = "2010 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2010 - Proceedings",
note = "2010 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2010 ; Conference date: 14-03-2010 Through 19-03-2010",
}