{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T05:56:06Z","timestamp":1776923766614,"version":"3.51.2"},"reference-count":37,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012,11]]},"DOI":"10.1109\/sc.2012.49","type":"proceedings-article","created":{"date-parts":[[2013,3,2]],"date-time":"2013-03-02T07:39:24Z","timestamp":1362209964000},"page":"1-12","source":"Crossref","is-referenced-by-count":57,"title":["Detection and correction of silent data corruption for large-scale high-performance computing"],"prefix":"10.1109","author":[{"given":"David","family":"Fiala","sequence":"first","affiliation":[]},{"given":"Frank","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Engelmann","sequence":"additional","affiliation":[]},{"given":"Rolf","family":"Riesen","sequence":"additional","affiliation":[]},{"given":"Kurt","family":"Ferreira","sequence":"additional","affiliation":[]},{"given":"Ron","family":"Brightwell","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"19","doi-asserted-by":"publisher","DOI":"10.2172\/1081941"},{"key":"35","doi-asserted-by":"crossref","first-page":"124","DOI":"10.1007\/978-3-642-03770-2_19","article-title":"Volpexmpi: An MPI library for execution of parallel applications on volatile nodes","volume":"5759","author":"leblanc","year":"2009","journal-title":"Lecture Notes in Computer Science"},{"key":"17","article-title":"Simulation challenge: Exascale planning overview","author":"shalf","year":"2010","journal-title":"HEC FSIO R&D Workshop"},{"key":"36","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2012.11.002"},{"key":"18","doi-asserted-by":"publisher","DOI":"10.1177\/1094342010391989"},{"key":"33","doi-asserted-by":"crossref","first-page":"208","DOI":"10.1007\/978-3-642-15646-5_22","article-title":"Transparent redundant computing with MPI","volume":"6305","author":"brightwell","year":"2010","journal-title":"Lecture Notes in Computer Science"},{"key":"15","doi-asserted-by":"publisher","DOI":"10.1109\/ICDCS.2012.56"},{"key":"34","doi-asserted-by":"publisher","DOI":"10.2316\/P.2011.719-031"},{"key":"16","article-title":"Hpc landscape - Application accelerators: Deus ex machina?","author":"vetter","year":"2009","journal-title":"High Performance Embedded Computing Workshop"},{"key":"13","first-page":"25","article-title":"Transient fault detection via simultaneous multithreading","author":"reinhardt","year":"2000","journal-title":"Proceedings of 27th International Symposium on Computer Architecture (IEEE Cat No RS00201) ISCA"},{"key":"14","doi-asserted-by":"publisher","DOI":"10.1109\/FCCM.2005.61"},{"key":"37","author":"cobb","year":"2011","journal-title":"Mpiecho A Framework for Transparent Mpi Task Replication"},{"key":"11","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2005.70"},{"key":"12","doi-asserted-by":"publisher","DOI":"10.1145\/859618.859631"},{"key":"21","author":"debardeleben","year":"2009","journal-title":"High-end Computing Resilience Analysis of Issues Facing the HEC Community and Path-forward for Research and Development"},{"key":"20","doi-asserted-by":"publisher","DOI":"10.1109\/PDP.2012.22"},{"key":"22","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/46\/1\/067"},{"key":"23","author":"bronevetsky","year":"0"},{"key":"24","article-title":"System implications of memory reliability in exascale computing","author":"li","year":"2011","journal-title":"Supercomputing"},{"key":"25","doi-asserted-by":"publisher","DOI":"10.1145\/2148600.2148626"},{"key":"26","doi-asserted-by":"publisher","DOI":"10.1109\/TDMR.2005.855685"},{"key":"27","article-title":"Soft error vulnerability of iterative linear algebra methods","author":"bronevetsky","year":"2007","journal-title":"Proceedings of the 21st ACM International Conference on Supercomputing (ICS) 2008"},{"key":"28","doi-asserted-by":"publisher","DOI":"10.1109\/5.119549"},{"key":"29","doi-asserted-by":"publisher","DOI":"10.1109\/L-CA.2008.12"},{"key":"3","doi-asserted-by":"publisher","DOI":"10.1145\/2150976.2150989"},{"key":"2","article-title":"Failure trends in a large disk drive population","author":"pinheiro","year":"0","journal-title":"USENIX Conference on File and Storage Technologies 2007"},{"key":"10","doi-asserted-by":"publisher","DOI":"10.1147\/rd.201.0020"},{"key":"1","doi-asserted-by":"publisher","DOI":"10.1145\/1555349.1555372"},{"key":"30","doi-asserted-by":"publisher","DOI":"10.1109\/TDSC.2008.62"},{"key":"7","article-title":"Evaluating the viability of process replication reliability for exascale systems","author":"ferreira","year":"2011","journal-title":"Supercomputing"},{"key":"6","article-title":"Software failures and the road to a petaflop machine","author":"philp","year":"2005","journal-title":"HPCRI 1st Workshop on High Performance Computing Reliability Issues in Proceedings of the 11th International Symposium on High Performance Computer Architecture (HPCA-11)"},{"key":"32","first-page":"189","article-title":"The case for modular redundancy in large-scale high performance computing systems","author":"engelmann","year":"2009","journal-title":"Proceedings of the 8th IASTED International Conference on Parallel and Distributed Computing and Networks (PDCN) 2009"},{"key":"5","first-page":"19","article-title":"Application MTTFE vs. platform MTTF: A fresh perspective on system reliability and application throughput for computations at scale","author":"daly","year":"0","journal-title":"Proceedings of the Workshop on Resiliency in High Performance Computing (Resilience) 2008 May 2008"},{"key":"31","first-page":"99","article-title":"Detailed design and evaluation of redundant multithreading alternatives","author":"mukherjee","year":"0","journal-title":"Proceedings of the 29th Annual International Symposium on Computer Architecture (ISCA) 2002"},{"key":"4","author":"daly","year":"2007","journal-title":"ADTSC Nuclear Weapons Highlights Facilitating High-throughput ASC Calculations"},{"key":"9","author":"bronevetsky","year":"0"},{"key":"8","article-title":"What is the monster in the closet?","author":"geist","year":"2011","journal-title":"Workshop on Architectures I Exascale and Beyond Gaps in Research Gaps in Our Thinking"}],"event":{"name":"2012 SC - International Conference for High Performance Computing, Networking, Storage and Analysis","location":"Salt Lake City, UT","start":{"date-parts":[[2012,11,10]]},"end":{"date-parts":[[2012,11,16]]}},"container-title":["2012 International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6468170\/6468439\/06468485.pdf?arnumber=6468485","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,21]],"date-time":"2017-06-21T08:37:15Z","timestamp":1498034235000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/6468485\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,11]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/sc.2012.49","relation":{},"subject":[],"published":{"date-parts":[[2012,11]]}}}