@inproceedings{b154662533794e27975e403fa501da7a,
title = "Noise injection techniques to expose subtle and unintended message races",
abstract = "Debugging intermittently occurring bugs within MPI applications is challenging, and message races, a condition in which two or more sends race to match with a receive, are one of the common root causes. Many debugging tools have been proposed to help programmers resolve them, but their runtime interference perturbs the timing such that subtle races often cannot be reproduced with debugging tools. We present novel noise injection techniques to expose message races even under a tool's control. We first formalize this race problem in the context of non-deterministic parallel applications and use this analysis to determine an effective noise-injection strategy to uncover them. We codified these techniques in Ninja (Noise INJection Agent) that exposes these races without modification to the application. Our evaluations on synthetic cases as well as a real-world bug in Hypre-2.10.1 show that Ninja significantly helps expose races.",
keywords = "Debugging, MPI, Non-determinism",
author = "Kento Sato and Ahn, {Dong H.} and Ignacio Laguna and Lee, {Gregory L.} and Martin Schulz and Chambreau, {Christopher M.}",
note = "Publisher Copyright: {\textcopyright} 2017 ACM.; 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP 2017 ; Conference date: 04-02-2017 Through 08-02-2017",
year = "2017",
month = jan,
day = "26",
doi = "10.1145/3018743.3018767",
language = "English",
series = "Proceedings of the ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPOPP",
publisher = "Association for Computing Machinery",
pages = "89--101",
booktitle = "PPoPP 2017 - Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming",
}