{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,21]],"date-time":"2025-10-21T15:27:34Z","timestamp":1761060454019,"version":"3.28.0"},"reference-count":59,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/cvpr.2017.223","type":"proceedings-article","created":{"date-parts":[[2017,11,9]],"date-time":"2017-11-09T21:50:33Z","timestamp":1510264233000},"page":"2067-2076","source":"Crossref","is-referenced-by-count":21,"title":["Predictive-Corrective Networks for Action Detection"],"prefix":"10.1109","author":[{"given":"Achal","family":"Dave","sequence":"first","affiliation":[]},{"given":"Olga","family":"Russakovsky","sequence":"additional","affiliation":[]},{"given":"Deva","family":"Ramanan","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition","year":"2014","author":"simonyan","key":"ref39"},{"key":"ref38","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Neural Infor-mation Proces System"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-010-0384-0"},{"key":"ref32","article-title":"Clock-work convnets for video semantic segmentation","author":"shelhamer","year":"2016","journal-title":"ECCV"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/72.410363"},{"key":"ref37","article-title":"Hollywood in homes: Crowdsourcing data collection for activity understanding","author":"sigurdsson","year":"2016","journal-title":"ECCV"},{"journal-title":"Asynchronous temporal fields for action recognition","year":"2016","author":"sigurdsson","key":"ref36"},{"journal-title":"Charades dataset","year":"0","author":"sigurdsson","key":"ref35"},{"key":"ref34","first-page":"1049","article-title":"Temporal action localization in untrimmed videos via multi-stage enns","author":"shou","year":"2016","journal-title":"Pro ceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref28","first-page":"7","article-title":"Human-debugging of machines","volume":"2","author":"parikh","year":"2011","journal-title":"NIPS WCSSWC"},{"key":"ref27","article-title":"The lear submission at thumos 2014","author":"oneata","year":"2014","journal-title":"THUMOS'14 challenge"},{"key":"ref29","first-page":"1310","article-title":"On the difficulty of training recurrent neural networks","volume":"28","author":"pascanu","year":"2013","journal-title":"ICML (3)"},{"key":"ref2","first-page":"1737","article-title":"Sgd-qn: Careful quasi-newton stochastic gradient descent","volume":"10","author":"bordes","year":"2009","journal-title":"The Journal of Machine Learning Research"},{"key":"ref1","volume":"abs 1609 8675","author":"abu-el-haija","year":"2016","journal-title":"Youtube-8m A large-scale video classification benchmark"},{"key":"ref20","article-title":"A clockwork rnn","author":"koutnik","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref22","volume":"abs 1605 8104","author":"lotter","year":"2016","journal-title":"Deep Predictive Coding Networks for Video Prediction and Unsupervised Learning"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TAC.1979.1101943"},{"key":"ref24","article-title":"Deep multi-scale video prediction beyond mean square error","author":"mathieu","year":"2016","journal-title":"ICLRE"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2007.09.009"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1037\/a0036646"},{"journal-title":"Towards good practices for very deep two-stream convnets","year":"2015","author":"wang","key":"ref50"},{"key":"ref51","article-title":"Temporal segment networks: towards good practices for deep action recognition","author":"wang","year":"2016","journal-title":"European Confer ence on Computer Vision"},{"key":"ref59","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"zeiler","year":"2014","journal-title":"European Conference on Com- Puter Vision"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"ref57","article-title":"Adsc submission at thumos challenge 2015","author":"yuan","year":"2015","journal-title":"THUMOS'15 chal lenge entry"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.293"},{"journal-title":"Every moment counts Dense detailed labeling of actions in complex videos","year":"2015","author":"yeung","key":"ref55"},{"key":"ref54","article-title":"Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks","author":"xue","year":"2016","journal-title":"NIPS"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/089976602317318938"},{"key":"ref52","article-title":"Hidden conditional random fields for gesture recognition","author":"wang","year":"2006","journal-title":"Computer Vision and Pattern Recognition (CVPR)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1002\/0471221546"},{"key":"ref11","article-title":"Autoencoders, minimum description length, and helmholtz free energy","author":"hinton","year":"1994","journal-title":"Neural Information Processing Systems"},{"key":"ref40","article-title":"Unsupervised learning of video representations using LSTMs","author":"srivastava","year":"2015","journal-title":"ICML"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995470"},{"key":"ref13","first-page":"448","article-title":"Batch normalization: Accelerating deep network training by reducing internal covariate shift","author":"ioffe","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning (ICML-15)"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.59"},{"key":"ref15","article-title":"Thumos challenge: Action recognition with a large number of classes","author":"jiang","year":"2014","journal-title":"ECCV Wo rkshop"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1117\/12.280797"},{"key":"ref17","article-title":"Fast saliency based pooling of fisher encoded dense trajectories","author":"karaman","year":"2014","journal-title":"THUMOS'14 challenge entry"},{"key":"ref18","article-title":"Visualizing and understanding recurrent networks","author":"karpathy","year":"2016","journal-title":"ICLR Workshop"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref4","article-title":"Torch7: A matlab-like environment for machine learning","author":"collobert","year":"2011","journal-title":"BigLearn NIPS Workshop number EPFL-CONF-192376"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-7908-2604-3_16"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref5","first-page":"1223","article-title":"Large scale distributed deep networks","author":"dean","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2008.06.001"},{"key":"ref49","article-title":"Action recognition and detection by combining motion and appearance features","author":"wang","year":"2014","journal-title":"THUMOS'14 challenge entry"},{"key":"ref9","article-title":"Unsupervised learning for physical interaction through video prediction","author":"finn","year":"2016","journal-title":"NIPS"},{"key":"ref46","article-title":"Generating videos with scene dynamics","author":"vondrick","year":"2016","journal-title":"Neural Information Processing Systems (NIPS)"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.18"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"ref47","article-title":"An uncertain future: Forecasting from variational autoencoders","author":"walker","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"journal-title":"Probabilistic Robotics (Intelligent Robotics and Autonomous Agents)","year":"2005","author":"thrun","key":"ref41"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"journal-title":"Long-term temporal convolutions for action recognition","year":"2016","author":"varol","key":"ref43"}],"event":{"name":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2017,7,21]]},"location":"Honolulu, HI","end":{"date-parts":[[2017,7,26]]}},"container-title":["2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8097368\/8099483\/08099706.pdf?arnumber=8099706","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,12,15]],"date-time":"2017-12-15T19:58:50Z","timestamp":1513367930000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8099706\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/cvpr.2017.223","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}