{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T18:30:03Z","timestamp":1776882603273,"version":"3.51.2"},"reference-count":69,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Multidisciplinary Institute in Artificial Intelligence"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.3000593","type":"journal-article","created":{"date-parts":[[2020,6,8]],"date-time":"2020-06-08T21:48:51Z","timestamp":1591652931000},"page":"1788-1800","source":"Crossref","is-referenced-by-count":65,"title":["Audio-Visual Speech Enhancement Using Conditional Variational Auto-Encoders"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0272-8017","authenticated-orcid":false,"given":"Mostafa","family":"Sadeghi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8219-1298","authenticated-orcid":false,"given":"Simon","family":"Leglaive","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5354-1084","authenticated-orcid":false,"given":"Xavier","family":"Alameda-Pineda","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9214-8760","authenticated-orcid":false,"given":"Laurent","family":"Girin","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5232-024X","authenticated-orcid":false,"given":"Radu","family":"Horaud","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2270369"},{"key":"ref38","first-page":"1217","article-title":"Phoneme-dependent NMF for speech enhancement in monaural mixtures","author":"raj","year":"0","journal-title":"Proc Conf Int Speech Commun Assoc"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.899233"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.851927"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164453"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.21236\/ADA073139"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518538"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2008.04-08-771"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/S0165-1684(01)00128-1"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"ref60","article-title":"DARPA TIMIT acoustic phonetic continuous speech corpus CDROM","author":"garofolo","year":"1993"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1121\/1.4799597"},{"key":"ref61","article-title":"FaNT&#x2014;Filtering and noise adding tool","author":"hirsch","year":"2005"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-74494-8_52"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1121\/1.2229005"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5946317"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-860"},{"key":"ref65","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.858005"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1979.1163209"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941023"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682797"},{"key":"ref2","author":"benesty","year":"2006","journal-title":"Speech Enhancement"},{"key":"ref1","author":"lim","year":"1983","journal-title":"Speech Enhancement"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2018.8516711"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683704"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPA.2018.8659591"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1398"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682546"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937218"},{"key":"ref25","first-page":"3483","article-title":"Learning structured output representation using deep conditional generative models","author":"sohn","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683497"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2096212"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00168"},{"key":"ref58","author":"robert","year":"2005","journal-title":"Monte Carlo Statistical Methods"},{"key":"ref57","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"Maximum likelihood from incomplete data via the EM algorithm","volume":"39","author":"dempster","year":"1977","journal-title":"J Roy Statist Soc Ser B"},{"key":"ref56","author":"higgins","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461326"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.2017.1285773"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007665907178"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1990.10474930"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/SAM.2002.1191001"},{"key":"ref11","first-page":"ii?2025","article-title":"Noisy audio feature enhancement using audio-visual speech data","author":"goecke","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref12","first-page":"1173","article-title":"Audio-visual sound separation via hidden Markov models","author":"hershey","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6638354"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1400"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1955"},{"key":"ref16","first-page":"3051","article-title":"Seeing through noise: Speaker separation and enhancement using visually-derived speech","author":"gabbay","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2017.2784878"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2516"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461530"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1121\/1.1907309"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1201\/9781420015836"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.3109\/03005368709077786"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1044\/jshd.4004.481"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1121\/1.1358887"},{"key":"ref7","first-page":"1559","article-title":"Noisy speech enhancement with filters estimated from the speaker's lips","author":"girin","year":"0","journal-title":"Proc Eur Conf Speech Commun Technol"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682623"},{"key":"ref9","first-page":"772","article-title":"Learning joint statistical models for audio-visual fusion and segregation","author":"fisher iii","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22482-4_11"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2352935"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01217"},{"key":"ref47","first-page":"3581","article-title":"Semi-supervised learning with deep generative models","author":"kingma","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2364452"},{"key":"ref41","first-page":"436","article-title":"Speech enhancement based on deep denoising autoencoder","author":"lu","year":"0","journal-title":"Proc Conf Int Speech Commun Assoc"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2250961"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-211"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/09110765.pdf?arnumber=9110765","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T05:54:59Z","timestamp":1723010099000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9110765\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":69,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.3000593","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}