{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:26:57Z","timestamp":1775665617623,"version":"3.50.1"},"reference-count":24,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2016,4,21]],"date-time":"2016-04-21T00:00:00Z","timestamp":1461196800000},"content-version":"vor","delay-in-days":111,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Procedia Computer Science"],"published-print":{"date-parts":[[2016]]},"DOI":"10.1016\/j.procs.2016.04.035","type":"journal-article","created":{"date-parts":[[2016,5,3]],"date-time":"2016-05-03T14:33:41Z","timestamp":1462286021000},"page":"95-100","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":12,"special_numbering":"C","title":["A Temporal Coherence Loss Function for Learning Unsupervised Acoustic Embeddings"],"prefix":"10.1016","volume":"81","author":[{"given":"Gabriel","family":"Synnaeve","sequence":"first","affiliation":[]},{"given":"Emmanuel","family":"Dupoux","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.procs.2016.04.035_bib0005","unstructured":"Mohamed, A., Dahl, G., Hinton, G. Deep belief networks for phone recognition. In: NIPS Workshop on Deep Learning for Speech Recognition and Related Applications. 2009."},{"key":"10.1016\/j.procs.2016.04.035_bib0010","doi-asserted-by":"crossref","unstructured":"Dahl, G.E., Sainath, T.N., Hinton, G.E. Improving deep neural networks for lvcsr using rectified linear units and dropout. ICASSP; 2013.","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"10.1016\/j.procs.2016.04.035_bib0015","doi-asserted-by":"crossref","unstructured":"Jansen, A., Dupoux, E., Goldwater, S., Johnson, M., Khudanpur, S., Church, K., et al. A summary of the 2012 JH CLSP workshop on zero resource speech technologies and models of early language acquisition. In: Proceedings of ICASSP 2013. 2013.","DOI":"10.1109\/ICASSP.2013.6639245"},{"key":"10.1016\/j.procs.2016.04.035_bib0020","doi-asserted-by":"crossref","unstructured":"Versteegh, M., Thiollie\u2018re, R., Schatz, T., Xuan-Nga, C., Anguera, X., Jansen, A., et al. The zero resource speech challenge 2015. In: INTERSPEECH-2015. 2015.","DOI":"10.21437\/Interspeech.2015-638"},{"key":"10.1016\/j.procs.2016.04.035_bib0025","doi-asserted-by":"crossref","unstructured":"Thiollie\u2018re, R., Dunbar, E., Synnaeve, G., Versteegh, M., Dupoux, E. A hybrid dynamic time warping-deep neural network architecture for unsupervised acoustic modeling. In: Sixteenth Annual Conference of the International Speech Communication Association. 2015.","DOI":"10.21437\/Interspeech.2015-640"},{"issue":"2","key":"10.1016\/j.procs.2016.04.035_bib0030","doi-asserted-by":"crossref","first-page":"434","DOI":"10.1121\/1.380688","article-title":"Vowel duration in american english","volume":"58","author":"Umeda","year":"1975","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"3","key":"10.1016\/j.procs.2016.04.035_bib0035","doi-asserted-by":"crossref","first-page":"846","DOI":"10.1121\/1.381374","article-title":"Consonant duration in american english","volume":"61","author":"Umeda","year":"1977","journal-title":"The Journal of the Acoustical Society of America"},{"key":"10.1016\/j.procs.2016.04.035_bib0040","doi-asserted-by":"crossref","unstructured":"Synnaeve, G., Schatz, T., Dupoux, E. Phonetics embedding learning with side information. In: IEEE Spoken Language Technology Workshop (SLT 2014). IEEE; 2014, doi:10.1109\/slt.2014.7078558.","DOI":"10.1109\/SLT.2014.7078558"},{"key":"10.1016\/j.procs.2016.04.035_bib0045","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Variani, E., Peddinti, V. Mean temporal distance: predicting asr error from temporal properties of speech signal. In: Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on. IEEE; 2013, p. 7423-7426.","DOI":"10.1109\/ICASSP.2013.6639105"},{"key":"10.1016\/j.procs.2016.04.035_bib0050","doi-asserted-by":"crossref","unstructured":"Hermansky, H., Burget, L., Cohen, J., Dupoux, E., Feldman, N., Godfrey, J., et al. Towards machines that know when they do not know: Summary of work done at 2014 frederick jelinek memorial workshop in prague. In: ICASSP-2015 (IEEE International Conference on Acoustics Speech and Signal Processing). 2015.","DOI":"10.1109\/ICASSP.2015.7178924"},{"key":"10.1016\/j.procs.2016.04.035_bib0055","doi-asserted-by":"crossref","unstructured":"Varadarajan, B., Khudanpur, S., Dupoux, E. Unsupervised learning of acoustic subword units. In: Proceedings of ACL-08: HLT. 2008, p. 165-168.","DOI":"10.3115\/1557690.1557736"},{"key":"10.1016\/j.procs.2016.04.035_bib0060","unstructured":"Lee, C.y., Glass, J. A nonparametric Bayesian approach to acoustic model discovery. In: Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Long Papers-Volume 1. 2012, p. 40-49."},{"key":"10.1016\/j.procs.2016.04.035_bib0065","doi-asserted-by":"crossref","unstructured":"Siu, M.h., Gish, H., Chan, A., Belfield, W., Lowe, S. Unsupervized training of an HMM-based self-organizing recognizer with applications to topic classification and keyword discovery. Computer Speech & Language 2013; preprint.","DOI":"10.1016\/j.csl.2013.05.002"},{"key":"10.1016\/j.procs.2016.04.035_bib0070","doi-asserted-by":"crossref","unstructured":"Jansen, A., Church, K. Towards unsupervised training of speaker independent acoustic models. In: Proceedings of INTERSPEECH. 2011, p. 1693-1696.","DOI":"10.21437\/Interspeech.2011-184"},{"issue":"04","key":"10.1016\/j.procs.2016.04.035_bib0075","doi-asserted-by":"crossref","first-page":"669","DOI":"10.1142\/S0218001493000339","article-title":"Signature verification using a siamese time delay neural network","volume":"7","author":"Bromley","year":"1993","journal-title":"Internat Journ of Pattern Recog and Artific Intell"},{"key":"10.1016\/j.procs.2016.04.035_bib0080","doi-asserted-by":"crossref","unstructured":"Hadsell, R., Chopra, S., LeCun, Y. Dimensionality reduction by learning an invariant mapping. In: Computer vision and pattern recognition, 2006 IEEE computer society conference on; vol. 2. IEEE; 2006, p. 1735-1742.","DOI":"10.1109\/CVPR.2006.100"},{"key":"10.1016\/j.procs.2016.04.035_bib0085","unstructured":"Weston, J., Bengio, S., Usunier, N. Wsabie: Scaling up to large vocabulary image annotation. In: IJCAI; vol. 11. 2011, p. 2764-2770."},{"key":"10.1016\/j.procs.2016.04.035_bib0090","unstructured":"Synnaeve, G., Dupoux, E. Weakly supervised multi-embeddings learning of acoustic models. In: ICLR. 2014."},{"key":"10.1016\/j.procs.2016.04.035_bib0095","unstructured":"Zeiler, M.D. Adadelta: An adaptive learning rate method. arXiv preprint:12125701 2012."},{"key":"10.1016\/j.procs.2016.04.035_bib0100","unstructured":"Bastien, F., Lamblin, P., Pascanu, R., Bergstra, J., Goodfellow, I.J., Bergeron, A., et al. Theano: new features and speed improvements. Deep Learning and Unsupervised Feature Learning NIPS 2012 Workshop; 2012."},{"issue":"1","key":"10.1016\/j.procs.2016.04.035_bib0105","first-page":"1929","article-title":"A simple way to prevent neural networks from overfitting","volume":"15","author":"Srivastava","year":"2014","journal-title":"The Journal of Machine Learning Research"},{"key":"10.1016\/j.procs.2016.04.035_bib0110","doi-asserted-by":"crossref","unstructured":"Schatz, T., Peddinti, V., Bach, F., Jansen, A., Hynek, H., Dupoux, E. Evaluating speech features with the minimal-pair abx task: Analysis of the classical mfc\/plp pipeline. In: INTERSPEECH-2013. 2013, p. 1781-1785.","DOI":"10.21437\/Interspeech.2013-441"},{"key":"10.1016\/j.procs.2016.04.035_bib0115","doi-asserted-by":"crossref","unstructured":"Schatz, T., Peddinti, V., Cao, X.N., Bach, F., Hermansky, H., Dupoux, E. Evaluating speech features with the Minimal-Pair ABX task (II): Resistance to noise. In: Fifteenth Annual Conference of the International Speech Communication Association. 2014.","DOI":"10.21437\/Interspeech.2014-228"},{"key":"10.1016\/j.procs.2016.04.035_bib0120","doi-asserted-by":"crossref","unstructured":"Deng, L., Seltzer, M.L., Yu, D., Acero, A., Mohamed, A.R., Hinton, G.E. Binary coding of speech spectrograms using a deep auto-encoder. In: Interspeech. Citeseer; 2010, p. 1692-1695.","DOI":"10.21437\/Interspeech.2010-487"}],"container-title":["Procedia Computer Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050916300497?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1877050916300497?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2022,6,19]],"date-time":"2022-06-19T18:05:04Z","timestamp":1655661904000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1877050916300497"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"references-count":24,"alternative-id":["S1877050916300497"],"URL":"https:\/\/doi.org\/10.1016\/j.procs.2016.04.035","relation":{},"ISSN":["1877-0509"],"issn-type":[{"value":"1877-0509","type":"print"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A Temporal Coherence Loss Function for Learning Unsupervised Acoustic Embeddings","name":"articletitle","label":"Article Title"},{"value":"Procedia Computer Science","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.procs.2016.04.035","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2016 The Author(s). Published by Elsevier B.V.","name":"copyright","label":"Copyright"}]}}