@inproceedings{098b2a897a5a4d7ba6cbdd631fb77103,
title = "Deep Boltzmann Machines for i-Vector Based Audio-Visual Person Identification",
abstract = "We propose an approach using DBM-DNNs for i-vector based audio-visual person identification. The unsupervised training of two Deep Boltzmann Machines DBMspeech and DBMface is performed using unlabeled audio and visual data from a set of background subjects. The DBMs are then used to initialize two corresponding DNNs for classification, referred to as the DBM-DNNspeech and DBM-DNNface in this paper. The DBM-DNNs are discriminatively fine-tuned using the back- propagation on a set of training data and evaluated on a set of test data from the target subjects. We compared their performance with the cosine distance (cosDist) and the state-of-the-art DBN-DNN classifier. We also tested three different configurations of the DBM-DNNs. We show that DBM-DNNs with two hidden layers and 800 units in each hidden layer achieved best identification performance for 400 dimensional i-vectors as input. Our experiments were carried out on the challenging MOBIO dataset. {\textcopyright} Springer International Publishing Switzerland 2016.",
author = "M.R. Alam and Mohammed Bennamoun and Roberto Togneri and Ferdous Sohel",
year = "2016",
doi = "10.1007/978-3-319-29451-3_50",
language = "English",
isbn = "9783319294506",
volume = "9431",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag London Ltd.",
pages = "631--641",
editor = "Br{\"a}unl, {Thomas } and McCane, {Brendan } and Rivera, {Mariano } and Yu, {Xinguo }",
booktitle = "Image and Video Technology, PSIVT 2015",
address = "Germany",
note = "7th Pacific-Rim Symposium on Image and Video Technology : PSIVT 2015, PSIVT 2015 ; Conference date: 23-11-2015 Through 27-11-2015",
}