{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T15:31:13Z","timestamp":1778081473282,"version":"3.51.4"},"reference-count":69,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/iccv48922.2021.01256","type":"proceedings-article","created":{"date-parts":[[2022,2,28]],"date-time":"2022-02-28T22:08:02Z","timestamp":1646086082000},"page":"12777-12787","source":"Crossref","is-referenced-by-count":5,"title":["DnD: Dense Depth Estimation in Crowded Dynamic Indoor Scenes"],"prefix":"10.1109","author":[{"given":"Dongki","family":"Jung","sequence":"first","affiliation":[{"name":"NAVER LABS"}]},{"given":"Jaehoon","family":"Choi","sequence":"additional","affiliation":[{"name":"NAVER LABS"}]},{"given":"Yonghan","family":"Lee","sequence":"additional","affiliation":[{"name":"NAVER LABS"}]},{"given":"Deokhwa","family":"Kim","sequence":"additional","affiliation":[{"name":"NAVER LABS"}]},{"given":"Changick","family":"Kim","sequence":"additional","affiliation":[{"name":"KAIST"}]},{"given":"Dinesh","family":"Manocha","sequence":"additional","affiliation":[{"name":"University of Maryland"}]},{"given":"Donghwan","family":"Lee","sequence":"additional","affiliation":[{"name":"NAVER LABS"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/3DV50981.2020.00058"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2017.09.013"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2505283"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01124"},{"key":"ref30","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"European Conference on Computer Vision"},{"key":"ref37","article-title":"A survey of structure from motion","author":"ozyesil","year":"2017","journal-title":"arXiv preprint arXiv 1701 08241"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793637"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1177\/0278364920931151"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3386569.3392377"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00347"},{"key":"ref62","first-page":"767","article-title":"Mvsnet: Depth inference for unstructured multi-view stereo","author":"yao","year":"2018","journal-title":"Proceedings of the European Conference on Computer Vision (ECCV)"},{"key":"ref61","article-title":"Unsupervised learning of geometry with edge-aware depth-normal consistency","author":"yang","year":"0"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00578"},{"key":"ref28","article-title":"Crowdsteer: Realtime smooth and collision-free robot navigation in dense crowd scenarios trained using high-fidelity simulation","author":"liang","year":"2020","journal-title":"arXiv preprint arXiv 2004 06774"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00212"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00218"},{"key":"ref65","first-page":"5336","article-title":"Novel view synthesis of dynamic scenes with globally coherent depths from a monocular camera","author":"yoon","year":"2020","journal-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00202"},{"key":"ref29","article-title":"Of-vo: Re-liable navigation among pedestrians using commodity sensors","author":"liang","year":"2020","journal-title":"arXiv preprint arXiv 2004 10491"},{"key":"ref67","article-title":"P2net: Patch-match and plane-regularization for unsupervised indoor depth estimation","author":"yu","year":"2020","journal-title":"arXiv preprint arXiv 2007 09948"},{"key":"ref68","first-page":"1851","article-title":"Unsupervised learning of depth and ego-motion from video","author":"zhou","year":"2017","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.52"},{"key":"ref2","article-title":"Joint 2d-3d-semantic data for indoor scene understanding","author":"armeni","year":"2017","journal-title":"arXiv preprint arXiv 1702 07028"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001293"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref22","article-title":"Dense depth estimation of a complex dynamic scene without explicit 3d motion estimation","author":"kumar","year":"2019","journal-title":"arXiv preprint arXiv 1902 11152"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00324"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.32"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00465"},{"key":"ref25","article-title":"From big to small: Multi-scale local planar guidance for monocular depth estimation","author":"lee","year":"2019","journal-title":"arXiv preprint arXiv 1907 10326"},{"key":"ref50","first-page":"213","article-title":"Learning over subgoals for efficient navigation of structured, unknown environments","author":"stein","year":"2018","journal-title":"Conference on Robot Learning"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385773"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340802"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00290"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00069"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00040"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2019.00046"},{"key":"ref53","article-title":"Deepv2d: Video to depth with differentiable structure from motion","author":"teed","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00752"},{"key":"ref10","first-page":"2650","article-title":"Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture","author":"eigen","year":"2015","journal-title":"Proceedings of the IEEE International Conference on Computer Vision"},{"key":"ref11","first-page":"2366","article-title":"Depth map prediction from a single image using a multi-scale deep network","author":"eigen","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref40","article-title":"Towards robust monocular depth estimation: Mixing datasets for zero-shot cross-dataset transfer","author":"ranftl","year":"2020","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref12","article-title":"Confidence propagation through cnns for guided sparse depth regression","author":"eldesokey","year":"2019","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1561\/0600000052"},{"key":"ref15","first-page":"740","article-title":"Unsupervised cnn for single view depth estimation: Geometry to the rescue","author":"garg","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref16","first-page":"270","article-title":"Unsupervised monocular depth estimation with left-right consistency","author":"godard","year":"2017","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00256"},{"key":"ref19","author":"hartley","year":"2003","journal-title":"Multiple View Geometry in Computer Vision"},{"key":"ref4","first-page":"730","article-title":"Single-image depth perception in the wild","author":"chen","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref6","article-title":"Safenet: Self-supervised monocular depth estimation with semantic-aware feature extraction","author":"choi","year":"2020","journal-title":"arXiv preprint arXiv 2010 00170"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00575"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.2514\/6.2013-4641"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560831"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-007-0107-3"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref46","first-page":"501","article-title":"Pixelwise view selection for unstructured multi-view stereo","author":"sch\u00f6nberger","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"ref48","first-page":"746","article-title":"Indoor segmentation and support inference from rgbd images","author":"silberman","year":"2012","journal-title":"European Conference on Computer Vision"},{"key":"ref47","first-page":"2930","article-title":"Scene co-ordinate regression forests for camera relocalization in rgb-d images","author":"shotton","year":"2013","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref42","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.440"},{"key":"ref44","article-title":"Dense-cavoid: Real-time navigation in dense crowds using anticipatory behaviors","author":"sathyamoorthy","year":"2020","journal-title":"arXiv preprint arXiv 2002 05155"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_38"}],"event":{"name":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Montreal, QC, Canada","start":{"date-parts":[[2021,10,10]]},"end":{"date-parts":[[2021,10,17]]}},"container-title":["2021 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9709627\/9709628\/09709913.pdf?arnumber=9709913","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,15]],"date-time":"2022-06-15T20:18:54Z","timestamp":1655324334000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9709913\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":69,"URL":"https:\/\/doi.org\/10.1109\/iccv48922.2021.01256","relation":{},"subject":[],"published":{"date-parts":[[2021,10]]}}}