@inbook{8527a180c20841679b0d28707108f07c,
title = "Enabling Application-Integrated Proactive Fault Tolerance",
abstract = "Exascale computing is the next major milestone for the HPC community. Due to a steadily increasing probability of failures, current applications must be made malleable to be able to cope with dynamic resource changes. In this paper, we show first results with LAIK, a lightweight library for dynamically re-distributable application data. This allows to free compute nodes from workload before a predicted failure. For a real-world application, we show that LAIK adds negligible overhead. In addition, we show the effect of different re-distribution strategies.",
keywords = "Application-Integrated Fault Tolerance, Data Distribution, High Performance Computing, Parallel Programming Models",
author = "Dai Yang and Josef Weidendorfer and Carsten Trinitis and Tilman K{\"u}stner and Sibylle Ziegler",
note = "Publisher Copyright: {\textcopyright} 2018 The authors and IOS Press.",
year = "2018",
doi = "10.3233/978-1-61499-843-3-475",
language = "English",
series = "Advances in Parallel Computing",
publisher = "IOS Press BV",
pages = "475--484",
editor = "Joubert, {Gerhard R.} and Patrizio Dazzi and Frans Peters and Marco Danelutto and Sanzio Bassini",
booktitle = "Parallel Computing is Everywhere",
}