{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T14:30:56Z","timestamp":1773412256526,"version":"3.50.1"},"reference-count":43,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,6]]},"DOI":"10.1109\/cvpr.2018.00527","type":"proceedings-article","created":{"date-parts":[[2018,12,18]],"date-time":"2018-12-18T01:49:37Z","timestamp":1545097777000},"page":"5020-5029","source":"Crossref","is-referenced-by-count":183,"title":["An End-to-End TextSpotter with Explicit Alignment and Attention"],"prefix":"10.1109","author":[{"given":"Tong","family":"He","sequence":"first","affiliation":[]},{"given":"Zhi","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Weilin","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Chunhua","family":"Shen","sequence":"additional","affiliation":[]},{"given":"Yu","family":"Qiao","sequence":"additional","affiliation":[]},{"given":"Changming","family":"Sun","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"970","DOI":"10.1109\/TPAMI.2013.182","article-title":"Robust text detection in natural scene images","volume":"36","author":"yin","year":"2014","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref38","first-page":"3","author":"yao","year":"2016","journal-title":"Scene text detection via holistic multi-channel prediction"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.166"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.452"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2646371"},{"key":"ref30","first-page":"3","article-title":"U-Net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"Proc Int Conf Medical Image Computing & Computer-Assisted Intervention"},{"key":"ref37","first-page":"3","article-title":"Detecting texts of arbitrary orientations in natural images","author":"yao","year":"2012","journal-title":"Proc IEEE Conf Comp Vis Patt Recogn"},{"key":"ref36","first-page":"6","author":"wojna","year":"2017","journal-title":"Attention-based extraction of structured information from street view imagery"},{"key":"ref35","article-title":"Detecting text in natural image with connectionist text proposal network","author":"tian","year":"2016","journal-title":"Proc Eur Conf Comp Vis"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.528"},{"key":"ref10","first-page":"3","author":"he","year":"2016","journal-title":"Accurate text localization in natural image with cascaded convolutional text network"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967274"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2547588"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.87"},{"key":"ref13","first-page":"3","author":"hong","year":"2016","journal-title":"Pvanet Lightweight deep neural networks for real-time object detection"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.157"},{"key":"ref15","first-page":"3","article-title":"Robust scene text detection with convolutional neural networks induced MSER trees","author":"huang","year":"2014","journal-title":"Proc Eur Conf Comp Vis"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0823-z"},{"key":"ref17","article-title":"Spatial transformer networks","author":"jaderberg","year":"2015","journal-title":"Proc Advances in Neural Inf Process Syst"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654889"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2496234"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.137"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"ref3","first-page":"3","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.254"},{"key":"ref29","article-title":"Faster R-CNN: Towards real-time object detection with region","author":"ren","year":"2015","journal-title":"Proc Advances in Neural Inf Process Syst"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.331"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.242"},{"key":"ref1","author":"bahdanau","year":"2016","journal-title":"Neural machine translation by jointly learning to align and translate"},{"key":"ref9","article-title":"Reading scene text in deep convolutional sequences","author":"he","year":"2016","journal-title":"Proc 29th AAAI Conf Artif In tell"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.221"},{"key":"ref22","first-page":"1","author":"li","year":"2017","journal-title":"Towards end-to-end car license plates detection and recognition with deep neural networks"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.245"},{"key":"ref24","article-title":"Textboxes: A fast text detector with a single deep neural network","author":"liao","year":"2017","journal-title":"Proc AAAI Nat Conf Artificial Intell"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.451"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.560"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298871"},{"key":"ref26","first-page":"1","author":"liu","year":"2018","journal-title":"Fots Fast oriented text spotting with a unified network"},{"key":"ref25","article-title":"SSD: Single shot multibox detector","author":"liu","year":"2016","journal-title":"Proc Eur Conf Comp Vis"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.283"}],"event":{"name":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","location":"Salt Lake City, UT, USA","start":{"date-parts":[[2018,6,18]]},"end":{"date-parts":[[2018,6,23]]}},"container-title":["2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8576498\/8578098\/08578625.pdf?arnumber=8578625","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,27]],"date-time":"2022-01-27T02:36:17Z","timestamp":1643250977000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8578625\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6]]},"references-count":43,"URL":"https:\/\/doi.org\/10.1109\/cvpr.2018.00527","relation":{},"subject":[],"published":{"date-parts":[[2018,6]]}}}