{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T13:59:46Z","timestamp":1762351186470,"version":"3.37.3"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2016,6,1]],"date-time":"2016-06-01T00:00:00Z","timestamp":1464739200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61371136","61271389"],"award-info":[{"award-number":["61371136","61271389"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"National Basic Research Program (973 Program) of China","award":["2013CB329302"],"award-info":[{"award-number":["2013CB329302"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2016,6]]},"DOI":"10.1109\/taslp.2016.2544660","type":"journal-article","created":{"date-parts":[[2016,3,21]],"date-time":"2016-03-21T18:56:54Z","timestamp":1458586614000},"page":"1129-1139","source":"Crossref","is-referenced-by-count":36,"title":["Improving Short Utterance Speaker Recognition by Modeling Speech Unit Classes"],"prefix":"10.1109","volume":"24","author":[{"given":"Lantian","family":"Li","sequence":"first","affiliation":[]},{"given":"Dong","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Chenhao","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Thomas Fang","family":"Zheng","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"doi-asserted-by":"publisher","key":"ref39","DOI":"10.1038\/214830a0"},{"doi-asserted-by":"publisher","key":"ref38","DOI":"10.1006\/csla.1998.0043"},{"year":"2014","author":"gong","journal-title":"Research on Highly Distinguishable Speech Selection Methods in Speaker Recognition","key":"ref33"},{"doi-asserted-by":"publisher","key":"ref32","DOI":"10.1007\/978-0-387-77592-0"},{"doi-asserted-by":"publisher","key":"ref31","DOI":"10.1006\/jmla.2001.2793"},{"doi-asserted-by":"publisher","key":"ref30","DOI":"10.1016\/S0167-6393(00)00094-7"},{"doi-asserted-by":"publisher","key":"ref37","DOI":"10.1006\/csla.1995.0010"},{"key":"ref36","first-page":"601","article-title":"Automatically clustering similar units for unit selection in speech synthesis","volume":"2","author":"black","year":"0","journal-title":"Proc EUROSPEECH"},{"key":"ref35","first-page":"307","author":"young","year":"0","journal-title":"Proc Workshop Hum Lang Technol"},{"key":"ref34","first-page":"16","author":"fatima","year":"0","journal-title":"Proc 11th Nat Conf Man-Mach Speech Commun (NCMMSC&#x2019;11)"},{"key":"ref28","first-page":"1617","article-title":"Improved context-dependent acoustic modeling for continuous chinese speech recognition","author":"zhang","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref27","first-page":"52","article-title":"Training universal background models for speaker recognition","author":"omar","year":"0","journal-title":"Proc Odyssey"},{"year":"1999","journal-title":"Handbook of the International Phonetic Association A Guide to the Use of the International Phonetic Alphabet","key":"ref29"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1016\/j.csl.2005.06.003"},{"doi-asserted-by":"publisher","key":"ref1","DOI":"10.1109\/5.628714"},{"doi-asserted-by":"publisher","key":"ref20","DOI":"10.1109\/TASL.2009.2031505"},{"key":"ref22","article-title":"Factor analysis modelling for speaker verification with short utterances","author":"vogt","year":"0","journal-title":"Proc Odyssey"},{"key":"ref21","first-page":"i","article-title":"A comparison of various adaptation methods for speaker verification with limited enrollment data","volume":"1","author":"mak","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process (ICASSP)"},{"doi-asserted-by":"publisher","key":"ref24","DOI":"10.1016\/j.specom.2010.04.007"},{"key":"ref23","first-page":"2341","article-title":"i-vector based speaker recognition on short utterances","author":"kanagasundaram","year":"0","journal-title":"Proc 12th Annu Conf Int Speech Commun Assoc (ISCA)"},{"doi-asserted-by":"publisher","key":"ref26","DOI":"10.1109\/89.279278"},{"doi-asserted-by":"publisher","key":"ref25","DOI":"10.1109\/79.543975"},{"doi-asserted-by":"publisher","key":"ref50","DOI":"10.1109\/TASSP.1981.1163530"},{"key":"ref51","doi-asserted-by":"crossref","first-page":"1895","DOI":"10.21437\/Eurospeech.1997-504","article-title":"The DET curve in assessment of detection task performance","volume":"4","author":"martin","year":"1997","journal-title":"Proc EUROSPEECH"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/TASL.2010.2064307"},{"key":"ref11","first-page":"1471","article-title":"Within-class covariance normalization for SVM-based speaker recognition","author":"hatch","year":"0","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref40","DOI":"10.1109\/TSA.2003.815822"},{"key":"ref12","first-page":"28","article-title":"An i-vector extractor suitable for speaker recognition with both microphone and telephone speech","author":"senoussaoui","year":"0","journal-title":"Proc Odyssey"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.1109\/ICCV.2007.4409052"},{"doi-asserted-by":"publisher","key":"ref14","DOI":"10.1561\/2000000039"},{"key":"ref15","first-page":"293","article-title":"Deep neural networks for extracting baum-welch statistics for speaker recognition","author":"kenny","year":"0","journal-title":"Proc Odyseey"},{"doi-asserted-by":"publisher","key":"ref16","DOI":"10.1109\/ICASSP.2014.6853887"},{"key":"ref17","first-page":"4052","article-title":"Deep neural networks for small footprint text-dependent speaker verification","author":"ehsan","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process (ICASSP)"},{"year":"2015","author":"li","article-title":"Deep speaker vectors for semi text-independent speaker verification","key":"ref18"},{"key":"ref19","first-page":"1580","article-title":"RSR2015: Database for text-dependent speaker verification using multiple pass-phrases","author":"larcher","year":"0","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.1016\/j.specom.2009.08.009"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.1155\/S1110865704310024"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.1016\/0167-6393(95)00009-D"},{"key":"ref5","first-page":"1971","article-title":"The 2012 NIST speaker recognition evaluation","author":"greenberg","year":"0","journal-title":"Proc INTERSPEECH"},{"doi-asserted-by":"publisher","key":"ref8","DOI":"10.1109\/89.279278"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1006\/dspr.1999.0361"},{"key":"ref49","first-page":"epfl-conf","article-title":"The kaldi speech recognition toolkit","author":"povey","year":"0","journal-title":"Proc IEEE Signal Process Soc Workshop Autom Speech Recognit Understand"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/TASL.2006.881693"},{"key":"ref46","first-page":"252","author":"xiong","year":"0","journal-title":"Proc Nat Conf Man-Mach Speech Commun"},{"key":"ref45","first-page":"1","article-title":"A K-phoneme-class based multi-model method for short utterance speaker recognition","volume":"20","author":"zhang","year":"0","journal-title":"Proc Asia-Pac Signal Inf Process Assoc Annu Summit Conf (APSIPA ASC)"},{"key":"ref48","first-page":"1523p","article-title":"Multi-layer channel normalization for frequency-dynamic feature extraction","volume":"12","author":"wang","year":"2005","journal-title":"J Software"},{"key":"ref47","first-page":"1307","article-title":"Acoustical modelling of phone transitions: Biphones and diphones-what are the differences?","author":"dobrisek","year":"0","journal-title":"Proc 6th Eur Conf Speech Commun Technol"},{"doi-asserted-by":"publisher","key":"ref42","DOI":"10.1109\/TASL.2007.902859"},{"key":"ref41","first-page":"126","article-title":"A gentle tutorial of the EM algorithm and its application to parameter estimation for Gaussian mixture and hidden Markov models","volume":"4","author":"bilmes","year":"1998","journal-title":"Int Comput Sci Inst"},{"key":"ref44","article-title":"A multimodel method for short-utterance speaker recognition","author":"zhang","year":"0","journal-title":"Proc Asia-Pac Signal Inf Process Assoc Annu Summit Conf (APSIPA ASC)"},{"key":"ref43","first-page":"599","article-title":"The use of cohort normalized scores for speaker verification","volume":"92","author":"rosenberg","year":"0","journal-title":"Proc Int Conf Spoken Lang Process (ICSLP)"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7463555\/07437438.pdf?arnumber=7437438","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,17]],"date-time":"2023-08-17T08:55:15Z","timestamp":1692262515000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7437438\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,6]]},"references-count":51,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2016.2544660","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"type":"print","value":"2329-9290"},{"type":"electronic","value":"2329-9304"}],"subject":[],"published":{"date-parts":[[2016,6]]}}}