{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T06:31:34Z","timestamp":1776925894201,"version":"3.51.2"},"reference-count":17,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2013,2]]},"DOI":"10.1109\/pdp.2013.61","type":"proceedings-article","created":{"date-parts":[[2013,4,15]],"date-time":"2013-04-15T16:14:26Z","timestamp":1366042466000},"page":"375-383","source":"Crossref","is-referenced-by-count":3,"title":["ELMO: A User-Friendly API to Enable Local Memory in OpenCL Kernels"],"prefix":"10.1109","author":[{"family":"Jianbin Fang","sequence":"first","affiliation":[]},{"given":"A. L.","family":"Varbanescu","sequence":"additional","affiliation":[]},{"family":"Jie Shen","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sips","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"17","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454155"},{"key":"15","article-title":"CudaDMA: Optimizing GPU memory bandwidth via warp specialization","author":"bauer","year":"0","journal-title":"Proc of SC 2011"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1109\/SASP.2009.5226334"},{"key":"13","year":"2012","journal-title":"Intel OpenCL Optimization Guide"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1145\/1806596.1806606"},{"key":"11","article-title":"Scan primitives for gpu computing","author":"sengupta","year":"2007","journal-title":"Graphics Hardware 2007"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1023\/A:1014573219977"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1145\/1941553.1941561"},{"key":"2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC.2002.1012701"},{"key":"1","article-title":"Automatic data movement and computation mapping for multi-level parallel architectures with explicitly managed memories","author":"baskaran","year":"0","journal-title":"Proceedings of PPoPP 2008"},{"key":"10","doi-asserted-by":"crossref","first-page":"1073","DOI":"10.1109\/TCSVT.2009.2020478","article-title":"Cross-based local stereo matching using orthogonal integral images","volume":"19","author":"zhang","year":"2009","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"7","year":"0","journal-title":"The OpenCL Specification V1 2"},{"key":"6","year":"2011","journal-title":"GPU Computing Gems Emerald Edition (Applications of GPU Computing Series)"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1145\/1995896.1995932"},{"key":"4","year":"2011","journal-title":"The OpenACC Application Programming Interface V1 0"},{"key":"9","year":"2012","journal-title":"AMD Accelerated Parallel Processing - OpenCL"},{"key":"8","year":"2011","journal-title":"NVIDIA CUDA C Programming Guide Version 4 1"}],"event":{"name":"2013 21st Euromicro International Conference on Parallel, Distributed and Network-Based Processing (PDP 2013)","location":"Belfast","start":{"date-parts":[[2013,2,27]]},"end":{"date-parts":[[2013,3,1]]}},"container-title":["2013 21st Euromicro International Conference on Parallel, Distributed, and Network-Based Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6498211\/6498515\/06498578.pdf?arnumber=6498578","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T07:22:18Z","timestamp":1498029738000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6498578\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2013,2]]},"references-count":17,"URL":"https:\/\/doi.org\/10.1109\/pdp.2013.61","relation":{},"subject":[],"published":{"date-parts":[[2013,2]]}}}