{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,31]],"date-time":"2026-01-31T02:57:46Z","timestamp":1769828266550,"version":"3.49.0"},"reference-count":50,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,2]]},"DOI":"10.1109\/hpca.2017.42","type":"proceedings-article","created":{"date-parts":[[2017,5,12]],"date-time":"2017-05-12T17:03:21Z","timestamp":1494608601000},"page":"85-96","source":"Crossref","is-referenced-by-count":55,"title":["Design and Analysis of an APU for Exascale Computing"],"prefix":"10.1109","author":[{"given":"Thiruvengadam","family":"Vijayaraghavan","sequence":"first","affiliation":[]},{"given":"Arun","family":"Karunanithi","sequence":"additional","affiliation":[]},{"given":"Onur","family":"Kayiran","sequence":"additional","affiliation":[]},{"given":"Mitesh","family":"Meswani","sequence":"additional","affiliation":[]},{"given":"Indrani","family":"Paul","sequence":"additional","affiliation":[]},{"given":"Matthew","family":"Poremba","sequence":"additional","affiliation":[]},{"given":"Steven","family":"Raasch","sequence":"additional","affiliation":[]},{"given":"Steven K.","family":"Reinhardt","sequence":"additional","affiliation":[]},{"given":"Greg","family":"Sadowski","sequence":"additional","affiliation":[]},{"given":"Vilas","family":"Sridharan","sequence":"additional","affiliation":[]},{"given":"Yasuko","family":"Eckert","sequence":"additional","affiliation":[]},{"given":"Gabriel H.","family":"Loh","sequence":"additional","affiliation":[]},{"given":"Michael J.","family":"Schulte","sequence":"additional","affiliation":[]},{"given":"Mike","family":"Ignatowski","sequence":"additional","affiliation":[]},{"given":"Bradford M.","family":"Beckmann","sequence":"additional","affiliation":[]},{"given":"William C.","family":"Brantley","sequence":"additional","affiliation":[]},{"given":"Joseph L.","family":"Greathouse","sequence":"additional","affiliation":[]},{"given":"Wei","family":"Huang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","article-title":"Implementing a Leading Loads Performance Predictor on Commodity Processors","author":"su","year":"2014","journal-title":"Proc of USENIX Annual Technical Conf (USENIX)"},{"key":"ref38","article-title":"Simulation of Exascale Nodes through Runtime Hardware Monitoring","author":"greathouse","year":"2013","journal-title":"ASCR modeling and simulation of exascale systems and applications workshop"},{"key":"ref33","article-title":"Toward Efficient Programmer-managed Two-level Memory Hierarchies in Exascale Computers","author":"meswani","year":"2015","journal-title":"Proc of the Int'l Workshop on Hardware-Software Co-Design for High Performance Computing"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.56"},{"key":"ref31","article-title":"Hybrid Memory Cube: Breakthrough DRAM Performance with a Fundamentally Re-Architected DRAM Subsystem","author":"pawlowski","year":"2011","journal-title":"Hot Chips 23"},{"key":"ref30","article-title":"Memory-centric system interconnect design with hybrid memory cubes","author":"kim","year":"2013","journal-title":"Proc Int Conf Parallel Architectures and Compilation Techniques (PACT)"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"ref36","doi-asserted-by":"crossref","DOI":"10.1145\/2024716.2024718","article-title":"The Gem5 Simulator","volume":"39","author":"binkert","year":"2011","journal-title":"SIGARCH Comput Archit News"},{"key":"ref35","article-title":"Fundamental Latency Tradeoffs in Architecting DRAM Caches","author":"qureshi","year":"2012","journal-title":"Proc 28th Int l Symp MicroArchitecture (MICRO)"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155673"},{"key":"ref28","year":"0","journal-title":"Fast Forward 2 R&D Draft Statement of Work"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056027"},{"key":"ref29","article-title":"Memory Network: Enabling Technology for Scalable Near-Data Computing","author":"kim","year":"2014","journal-title":"Proc of the Workshop on Near-Data Processing"},{"key":"ref2","first-page":"365","article-title":"Dark silicon and the end of multicore scaling","author":"esmaeilzadeh","year":"2011","journal-title":"2011 38th Annual International Symposium on Computer Architecture (ISCA) ISCA"},{"key":"ref1","year":"0","journal-title":"Top500 News Release June 2008"},{"key":"ref20","year":"0","journal-title":"AMD Radeon R9 Series Gaming Graphics Cards with High Bandwidth Memory"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/2994133.2994141"},{"key":"ref21","article-title":"A 4&#x00D7;4&#x00D7;2 Homogeneous Scalable 3D Network-on-Chip Circuit with 326MFlit\/s 0.66pJ\/b Robust and Fault-Tolerant Asynchronous 3D Links","year":"2016","journal-title":"Proc Int l Solid-States Circuits Conf (ISSCC)"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI.2016.60"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.34"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/PACT.2015.30"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853227"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749474"},{"key":"ref10","article-title":"Heterogeneous System Architecture (HSA): Architecture and Algorithms","year":"2014","journal-title":"Tutorial at the 31st Int'l Symp Computer Architecture (ISCA)"},{"key":"ref11","year":"0","journal-title":"ROCm Open Platform For Development Discovery and Education around GPU Computing"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.17"},{"key":"ref12","year":"0","journal-title":"JEDEC"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/2884045.2884052"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835930"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541981"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694350"},{"key":"ref17","article-title":"Lazy Release Consistency for GPU s","author":"alsop","year":"2016","journal-title":"Proc 28th Int l Symp MicroArchitecture (MICRO)"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/2540708.2540747"},{"key":"ref19","article-title":"Xilinx Stacked Silicon Interconnect Technology Delivers Breakthrough FPGA Capacity, Bandwidth, and Power Efficiency","author":"saban","year":"2011","journal-title":"Xilinx"},{"key":"ref4","year":"0","journal-title":"The Green500 List-June 2016"},{"key":"ref3","year":"0","journal-title":"Pathforward Draft Technical Requirements"},{"key":"ref6","year":"0","journal-title":"Intel Federal LLC To Propel Supercomputing Advancement For The U S Government"},{"key":"ref5","year":"0","journal-title":"NVIDIA Wins $18 Million DOE Grant for Exascale Computing Research"},{"key":"ref8","year":"0","journal-title":"MOCHI ARCHITECTURE"},{"key":"ref7","year":"0","journal-title":"CRAY to Explore Alternative Processor Technologies for Supercomputing"},{"key":"ref49","article-title":"Thermal Feasibility of Die-Stacked Processing in Memory","author":"eckert","year":"2014","journal-title":"Proc of the Workshop on Near-Data Processing"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/2830772.2830808"},{"key":"ref46","year":"0","journal-title":"MaxFlops Workload Description"},{"key":"ref45","article-title":"Proxy Applications for Co-Design","year":"0","journal-title":"proxyapps lanl gov\/"},{"key":"ref48","doi-asserted-by":"crossref","DOI":"10.1145\/2366231.2337161","article-title":"RAIDR: Retention-Aware Intelligent DRAM Refresh","author":"liu","year":"2012","journal-title":"Proc Int l Symp Computer Architecture (ISCA)"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2006.876103"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2015.7056063"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2016.7581263"},{"key":"ref44","article-title":"The AMD gem5 APU Simulator: Modeling Heterogeneous Systems in gem5","author":"beckmann","year":"2015","journal-title":"Tutorial at the Int'l Symp on Microarchitecture (MICRO)"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2015.22"}],"event":{"name":"2017 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","location":"Austin, TX","start":{"date-parts":[[2017,2,4]]},"end":{"date-parts":[[2017,2,8]]}},"container-title":["2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7920262\/7920798\/07920816.pdf?arnumber=7920816","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,2,2]],"date-time":"2020-02-02T05:25:08Z","timestamp":1580621108000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7920816\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,2]]},"references-count":50,"URL":"https:\/\/doi.org\/10.1109\/hpca.2017.42","relation":{},"subject":[],"published":{"date-parts":[[2017,2]]}}}