{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T02:19:26Z","timestamp":1730254766548,"version":"3.28.0"},"reference-count":60,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1109\/icpp.2012.37","type":"proceedings-article","created":{"date-parts":[[2012,10,26]],"date-time":"2012-10-26T17:47:20Z","timestamp":1351273640000},"page":"510-519","source":"Crossref","is-referenced-by-count":3,"title":["Mechanisms and Evaluation of Cross-Layer Fault-Tolerance for Supercomputing"],"prefix":"10.1109","author":[{"given":"Chen-Han","family":"Ho","sequence":"first","affiliation":[]},{"given":"Marc","family":"de Kruijf","sequence":"additional","affiliation":[]},{"given":"Karthikeyan","family":"Sankaralingam","sequence":"additional","affiliation":[]},{"given":"Barry","family":"Rountree","sequence":"additional","affiliation":[]},{"given":"Martin","family":"Schulz","sequence":"additional","affiliation":[]},{"given":"Bronis R.","family":"de Supinski","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"35","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2008.3"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.23"},{"key":"33","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2010.5470473"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2007.18"},{"journal-title":"The NAS Parallel Benchmarks","year":"0","key":"39"},{"key":"37","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2005.70"},{"key":"38","article-title":"Detailed Design and Evaluation of Redundant Multi-Threading Alternatives","author":"mukherjee","year":"0","journal-title":"ISCA '02"},{"key":"43","doi-asserted-by":"publisher","DOI":"10.1145\/1113841.1113843"},{"key":"42","doi-asserted-by":"publisher","DOI":"10.1109\/CGO.2005.34"},{"key":"41","first-page":"111","article-title":"ReVive: Cost-Effective Architectural Support for Rollback Recovery in Shared-Memory Multiprocessors","author":"prvulovic","year":"2002","journal-title":"ISCA-29"},{"key":"40","doi-asserted-by":"publisher","DOI":"10.1145\/2000064.2000089"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2009.20"},{"key":"23","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815968"},{"key":"24","doi-asserted-by":"publisher","DOI":"10.1007\/BFb0029359"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1109\/TC.1984.1676475"},{"key":"26","first-page":"641","article-title":"Software Fault Tolerance: An Overview","volume":"2655","author":"kienzle","year":"2003","journal-title":"Lecture Notes in Computer Science"},{"key":"27","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2007.100"},{"journal-title":"ASC Sequoia Benchmarks","year":"0","key":"28"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-30566-8_27"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2003.1253246"},{"journal-title":"Using Code Perforation to Improve Performance Reduce Energy Consumption and Respond to Failures","year":"2009","author":"agarwal","key":"2"},{"journal-title":"Ccc Visioning Study on Cross-layer Reliability","year":"0","key":"1"},{"key":"7","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2005.70"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1145\/1346281.1346315"},{"journal-title":"ExaScale Computing Study Technology Challenges in Achieving Exascale Systems Peter Kogge","year":"2008","author":"bergman","key":"6"},{"key":"5","doi-asserted-by":"publisher","DOI":"10.1145\/1806596.1806620"},{"journal-title":"Flicker Saving Refresh-Power in Mobile Devices Through Critical Data Partitioning","year":"2009","author":"liu","key":"32"},{"key":"4","doi-asserted-by":"publisher","DOI":"10.1145\/1815961.1815967"},{"key":"31","doi-asserted-by":"publisher","DOI":"10.1145\/1669112.1669172"},{"key":"9","doi-asserted-by":"publisher","DOI":"10.1145\/1454115.1454128"},{"key":"8","doi-asserted-by":"publisher","DOI":"10.1109\/IISWC.2009.5306793"},{"journal-title":"mpiP Lightweight Scalable MPI Profiling","year":"2005","author":"vetter","key":"59"},{"key":"58","first-page":"112","article-title":"tunable replica circuits and adaptive voltage-frequency techniques for dynamic voltage, temperature, and aging variation tolerance","author":"tschanz","year":"2009","journal-title":"2009 Symposium on VLSI Circuits VLSIC"},{"journal-title":"Containment Domains A Full-System Approach to Computational Resiliency","year":"2011","author":"sullivan","key":"57"},{"journal-title":"Reliability Wearout Mechanisms in Advanced CMOS Technologies","year":"0","author":"strong","key":"56"},{"key":"19","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155667"},{"key":"55","article-title":"A Programming Model and Language Implementation for Concurrent Failure Prone Hardware","author":"stanley-marbell","year":"0","journal-title":"Workshop on Programming Models for Ubiquitous Parallelism 2006"},{"key":"17","doi-asserted-by":"publisher","DOI":"10.1177\/1094342004046052"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1145\/1736020.1736063"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2003.1253179"},{"key":"16","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2151008"},{"key":"13","article-title":"The Case for Modular Redundancy in Large-Scale High Performance Computing Systems","author":"engelmann","year":"0","journal-title":"PCDN '09"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2005.26"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2006.15"},{"key":"12","article-title":"Relax: An Architectural Framework for Software Recovery of Hardware Faults","author":"de kruijf","year":"0","journal-title":"ISCA 2010"},{"key":"21","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2009.4798256"},{"key":"20","article-title":"Paceline: Improving Single-Thread Performance in Nanoscale CMPs through Core Overclocking","author":"greskamp","year":"0","journal-title":"PAC '07"},{"key":"60","doi-asserted-by":"publisher","DOI":"10.1145\/1508244.1508265"},{"key":"49","article-title":"Stochastic Computing: Embracing Errors in Architecture and Design of Processors and Applications","author":"sartori","year":"0","journal-title":"CASES 2011"},{"key":"48","doi-asserted-by":"publisher","DOI":"10.1145\/1993498.1993518"},{"key":"45","doi-asserted-by":"publisher","DOI":"10.1145\/1869459.1869525"},{"key":"44","doi-asserted-by":"publisher","DOI":"10.1145\/1929501.1929517"},{"key":"47","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2008.4630072"},{"journal-title":"The Design Implementation and Evaluation of Jade a Portable Implicitly Parallel Programming Language","year":"1994","author":"rinard","key":"46"},{"journal-title":"Sentinel Silicon Cells","year":"0","key":"10"},{"key":"51","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2012.6263938"},{"key":"52","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2006.42"},{"key":"53","first-page":"123","article-title":"SafetyNet: Improving the Availability of Shared Memory Multiprocessors with Global Checkpoint\/Recovery","author":"sorin","year":"2002","journal-title":"ISCA-29"},{"key":"54","article-title":"A Taxonomy to Enable Error Recovery and Correction in Software","author":"sridharan","year":"0","journal-title":"Workshop on Quality-Aware Design 2008"},{"key":"50","doi-asserted-by":"publisher","DOI":"10.1109\/24.914544"}],"event":{"name":"2012 41st International Conference on Parallel Processing (ICPP)","start":{"date-parts":[[2012,9,10]]},"location":"Pittsburgh, PA, USA","end":{"date-parts":[[2012,9,13]]}},"container-title":["2012 41st International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx5\/6336540\/6337566\/06337612.pdf?arnumber=6337612","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,3,21]],"date-time":"2017-03-21T22:03:07Z","timestamp":1490133787000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6337612\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,9]]},"references-count":60,"URL":"https:\/\/doi.org\/10.1109\/icpp.2012.37","relation":{},"subject":[],"published":{"date-parts":[[2012,9]]}}}