@inproceedings{0122d31f995a4e1baa6a9f462d8760a9,
title = "The speaker-independent lipreading play-off; A survey of lipreading machines",
abstract = "Lipreading is a difficult gesture classification task. One problem in computer lipreading is speaker-independence. Speaker-independence means to achieve the same accuracy on test speakers not included in the training set as speakers within the training set. Current literature is limited on speaker-independent lipreading, the few independent test speaker accuracy scores are usually aggregated within dependent test speaker accuracies for an averaged performance. This leads to unclear independent results. Here we undertake a systematic survey of experiments with the TCD-TIMIT dataset using both conventional approaches and deep learning methods to provide a series of wholly speaker-independent benchmarks and show that the best speaker-independent machine scores 69.58% accuracy with CNN features and an SVM classifier. This is less than state-of-the-art speaker-dependent lipreading machines, but greater than previously reported in independence experiments.",
keywords = "Speaker-independent, lipreading, visual speech",
author = "Jake Burton and David Frank and Mahdi Saleh and Nassir Navab and Bear, {Helen L.}",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 3rd IEEE International Conference on Image Processing, Applications and Systems, IPAS 2018 ; Conference date: 12-12-2018 Through 14-12-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1109/IPAS.2018.8708874",
language = "English",
series = "IEEE 3rd International Conference on Image Processing, Applications and Systems, IPAS 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "125--130",
booktitle = "IEEE 3rd International Conference on Image Processing, Applications and Systems, IPAS 2018",
}