{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T09:52:00Z","timestamp":1761126720246,"version":"3.41.0"},"reference-count":74,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,7]]},"DOI":"10.1109\/cvpr.2017.575","type":"proceedings-article","created":{"date-parts":[[2017,11,9]],"date-time":"2017-11-09T21:50:33Z","timestamp":1510264233000},"page":"5415-5424","source":"Crossref","is-referenced-by-count":69,"title":["Creativity: Generating Diverse Questions Using Variational Autoencoders"],"prefix":"10.1109","author":[{"given":"Unnat","family":"Jain","sequence":"first","affiliation":[]},{"given":"Ziyu","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Schwing","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Simple baseline for visual question answering","year":"2015","author":"zhou","key":"ref73"},{"key":"ref72","article-title":"Visual madlibs: Fill in the blank image generation and question answering","author":"yu","year":"2015","journal-title":"Proc ICCV"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.10"},{"key":"ref70","article-title":"Attribute2Image: Conditional Image Generation from Visual Attributes","author":"yan","year":"2016","journal-title":"Proc ECCV"},{"journal-title":"Measuring machine intelligence through visual question answering","year":"2016","author":"zitnick","key":"ref74"},{"key":"ref39","article-title":"Imagenet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc NIPS"},{"key":"ref38","article-title":"Deep Kalman Filters","author":"krishnan","year":"2015","journal-title":"NIPS Workshop"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.494"},{"key":"ref31","article-title":"Revisiting Visual Question Answering Baselines","author":"jabri","year":"2016","journal-title":"Proc ECCV"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/312624.312649"},{"key":"ref37","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"kiros","year":"2015","journal-title":"TACL"},{"key":"ref36","article-title":"Auto-Encoding Variational Bayes","author":"kingma","year":"2014","journal-title":"ICLRE"},{"key":"ref35","article-title":"Semi-Supervised Learning with Deep Generative Models","author":"kingma","year":"2014","journal-title":"Proc NIPS"},{"key":"ref34","article-title":"Multimodal residual learning for visual qa","author":"kim","year":"2016","journal-title":"Proc NIPS"},{"key":"ref60","doi-asserted-by":"crossref","DOI":"10.1162\/tacl_a_00177","article-title":"Grounded compositional semantics for finding and describing images with sentences","author":"socher","year":"2014","journal-title":"Proc TACL"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"ref61","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Proc NIPS"},{"journal-title":"Diverse beam search Decoding diverse solutions from neural sequence models","year":"2016","author":"vijayakumar","key":"ref63"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.1126\/science.1127647","article-title":"Reducing the Dimensionality of Data with Neural Networks","author":"hinton","year":"2006","journal-title":"Science"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"},{"journal-title":"Image captioning and visual question answering based on attributes and their related external knowledge","year":"2016","author":"wu","key":"ref66"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref67","article-title":"Dynamic memory networks for visual and textual question answering","author":"xiong","year":"2016","journal-title":"Proc ICML"},{"key":"ref68","article-title":"Ask, attend and answer: Exploring question-guided spatial attention for visual question answering","author":"xu","year":"2016","journal-title":"Proc ECCV"},{"key":"ref69","article-title":"Show, attend and tell: Neural image caption generation with visual attention","author":"xu","year":"2015","journal-title":"Proc ICML"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref1","article-title":"Deep compositional question answering with neural module networks","author":"andreas","year":"2016","journal-title":"Proc CVPR"},{"key":"ref20","article-title":"Every picture tells a story: Generating sentences from images","author":"farhadi","year":"2010","journal-title":"Proc ECCV"},{"key":"ref22","article-title":"Are you talking to a machine? Dataset and Methods for Multilingual Image Question Answering","author":"gao","year":"2015","journal-title":"Proc NIPS"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1044"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10593-2_35"},{"key":"ref23","article-title":"A Systematic Exploration of Diversity in Machine Translation","author":"gimpel","year":"2013","journal-title":"EMNLP"},{"key":"ref26","article-title":"DRAW: A recurrent neural network for image generation","author":"gregor","year":"2015","journal-title":"Proc ICML"},{"key":"ref25","article-title":"Generative Adversarial Networks","author":"goodfellow","year":"2014","journal-title":"Proc NIPS"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1170"},{"key":"ref51","article-title":"Unsupervised representation learning with deep convolutional generative adversarial networks","author":"radford","year":"2016","journal-title":"ICLRE"},{"key":"ref59","article-title":"Chapter 6: Information Processing in Dynamical Systems: Foundations of Harmony Theory","author":"smolensky","year":"1986","journal-title":"Parallel Distributed Processing Explorations in the Microstructure of Cognition"},{"key":"ref58","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2015","journal-title":"ICLRE"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.499"},{"journal-title":"Fully Connected Deep Structured Networks","year":"2015","author":"schwing","key":"ref56"},{"key":"ref55","article-title":"Globally Convergent Parallel MAP LP Relaxation Solver using the Frank-Wolfe Algorithm","author":"schwing","year":"2014","journal-title":"Proc ICML"},{"key":"ref54","article-title":"Globally Convergent Dual MAP LP Relaxation Solvers using Fenchel-Young Margins","author":"schwing","year":"2012","journal-title":"Proc NIPS"},{"key":"ref53","article-title":"Improved Techniques for Training GANs","author":"salimans","year":"2016","journal-title":"Proc NIPS"},{"key":"ref52","article-title":"Exploring models and data for image question answering","author":"ren","year":"2015","journal-title":"Proc NIPS"},{"key":"ref10","article-title":"Importance Weighted Autoencoders","author":"burda","year":"2016","journal-title":"Proc ICLR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995466"},{"key":"ref12","article-title":"Learning Deep Structured Models","author":"chen","year":"2015","journal-title":"Proc ICML"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2477044"},{"key":"ref14","article-title":"A recurrent latent variable model for sequential data","author":"chung","year":"2015","journal-title":"Proc NIPS"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1092"},{"key":"ref16","doi-asserted-by":"crossref","DOI":"10.1111\/j.2517-6161.1977.tb01600.x","article-title":"Maximum likelihood from incomplete data via the EM algorithm","author":"dempster","year":"1977","journal-title":"J of the Royal Statistical Society"},{"key":"ref17","article-title":"Deep generative image models using a laplacian pyramid of adversarial networks","author":"denton","year":"2015","journal-title":"Proc NIPS"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298754"},{"key":"ref4","article-title":"Matching words and pictures","author":"barnard","year":"2003","journal-title":"JMLR"},{"key":"ref3","article-title":"Black box variational inference for state space models","author":"archer","year":"2016","journal-title":"ICLR Workshop"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177699147"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_1"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1214\/06-BA104"},{"key":"ref7","article-title":"Deep Generative Stochastic Networks trainable by Back-prop","author":"bengio","year":"2014","journal-title":"JMLR"},{"key":"ref49","article-title":"Smooth and Strong: MAP Inference with Linear Convergence","author":"meshi","year":"2015","journal-title":"Proc NIPS"},{"key":"ref9","article-title":"Latent Dirichlet Allocation","author":"blei","year":"2003","journal-title":"JMLR"},{"key":"ref46","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v30i1.10442","article-title":"Learning to answer questions from image using convolutional neural network","author":"ma","year":"2016","journal-title":"Proc AAAI"},{"key":"ref45","article-title":"Hierarchical question-image co-attention for visual question answering","author":"lu","year":"2016","journal-title":"Proc NIPS"},{"key":"ref48","article-title":"Deep Captioning with Multimodal Recurrent Neural Networks (m-rnn)","author":"mao","year":"2015","journal-title":"ICLRE"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.9"},{"key":"ref42","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"Proc ECCV"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref44","article-title":"Generative Adversarial Structured Networks","author":"london","year":"2016","journal-title":"Proc NIPS Workshop on Adversarial Training"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.1982.1056489"}],"event":{"name":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2017,7,21]]},"location":"Honolulu, HI","end":{"date-parts":[[2017,7,26]]}},"container-title":["2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8097368\/8099483\/08100058.pdf?arnumber=8100058","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,26]],"date-time":"2025-06-26T23:27:06Z","timestamp":1750980426000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8100058\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,7]]},"references-count":74,"URL":"https:\/\/doi.org\/10.1109\/cvpr.2017.575","relation":{},"subject":[],"published":{"date-parts":[[2017,7]]}}}