@inbook{34584ae59dd34a3285b452c5872c6045,
title = "Scalable algorithmic detection of silent data corruption for high-dimensional PDEs",
abstract = "In this paper we show how to benefit from the numerical properties of a well-established extrapolation method—the combination technique—to make it tolerant to silent data corruption (SDC). The term SDC refers to errors in data not detected by the system. We use the hierarchical structure of the combination technique to detect if parts of the floating point data are corrupted. The method we present is based on robust regression and other well-known outlier detection techniques. It is a lossy approach, meaning we sacrifice some accuracy but we benefit from the small computational overhead. We test our algorithms on a d-dimensional advection-diffusion equation and inject SDC of different orders of magnitude. We show that our method has a very good detection rate: large errors are always detected, and the small errors that go undetected do not noticeably damage the solution. We also carry out scalability tests for a 5D scenario. We finally discuss how to deal with false positives and how to extend these ideas to more general quantities of interest.",
author = "Hinojosa, {Alfredo Parra} and Bungartz, {Hans Joachim} and Dirk Pfl{\"u}ger",
note = "Publisher Copyright: {\textcopyright} 2018, Springer International Publishing AG, part of Springer Nature.",
year = "2018",
doi = "10.1007/978-3-319-75426-0_5",
language = "English",
series = "Lecture Notes in Computational Science and Engineering",
publisher = "Springer Verlag",
pages = "93--115",
booktitle = "Lecture Notes in Computational Science and Engineering",
}