bibtex

2022

@article{phmnjscb2022,
title = {MPI detach — Towards automatic asynchronous local completion},
journal = {Parallel Computing},
volume = {109},
pages = {102859},
year = {2022},
issn = {0167-8191},
doi = {https://doi.org/10.1016/j.parco.2021.102859},
url = {https://www.sciencedirect.com/science/article/pii/S0167819121001022},
author = {Joachim Protze and Marc-André Hermanns and Matthias S. Müller and Van Man Nguyen and Julien Jaeger and Emmanuelle Saillard and Patrick Carribault and Denis Barthou},
keywords = {Message Passing Interface, Asynchronous communication, OpenMP tasking, Hybrid parallelism, Static analysis, Code transformation}
}
@article{bpj2022,
title = {Towards leveraging collective performance with the support of MPI 4.0 features in MPC},
journal = {Parallel Computing},
volume = {109},
pages = {102860},
year = {2022},
issn = {0167-8191},
doi = {https://doi.org/10.1016/j.parco.2021.102860},
url = {https://www.sciencedirect.com/science/article/pii/S0167819121001034},
author = {Stephane Bouhrour and Thibaut Pepin and Julien Jaeger}
}

2021

@InProceedings{= »dbjcp2021″dbjcp2021,
author= »Dionisi, Thomas
and Bouhrour, Stephane
and Jaeger, Julien
and Carribault, Patrick
and P{\’e}rache, Marc »,
editor= »Sousa, Leonel
and Roma, Nuno
and Tom{\’a}s, Pedro »,
title= »Enhancing Load-Balancing of MPI Applications with Workshare »,
booktitle= »Euro-Par 2021: Parallel Processing »,
year= »2021″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »466–481″
}

@INPROCEEDINGS{hsjgbdbs2021,
author={Holmes, Daniel J. and Skjellum, Anthony and Jaeger, Julien and Grant, Ryan E. and Bangalore, Purushotham V. and Dosanjh, Matthew G.F. and Bienz, Amanda and Schafer, Derek},
booktitle={2021 Workshop on Exascale MPI (ExaMPI)},
title={Partitioned Collective Communication},
year={2021},
volume={},
number={},
pages={9-17},
doi={10.1109/ExaMPI54564.2021.00007}
}

@InProceedings{prcg2021,
author= »Pereira, Romain and Roussel, Adrien and Carribault, Patrick and Gautier, Thierry »,
editor= »McIntosh-Smith, Simon and de Supinski, Bronis R. and Klinkenberg, Jannis »,
title= »Communication-Aware Task Scheduling Strategy in Hybrid MPI+OpenMP Applications »,
booktitle= »OpenMP: Enabling Massive Node-Level Parallelism »,
year= »2021″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »197–210″,
isbn= »978-3-030-85262-7″
}

@InProceedings{baasjcp2021,
author= »Besnard, Jean-Baptiste and Adam, Julien and Malony, Allen D. and Shende, Sameer and Jaeger, Julien and Carribault, Patrick and P{\’e}rache, Marc »,
editor= »Mix, Hartmut and Niethammer, Christoph and Zhou, Huan and Nagel, Wolfgang E. and Resch, Michael M. »,
title= »Exploring Space-Time Trade-Off in Backtraces »,
booktitle= »Tools for High Performance Computing 2018 / 2019″,
year= »2021″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »151–168″
}

2020

@InProceedings{lgjmrtg2020,
author = {Edgar A. León, Balazs Gerofi, Julien Jaeger, Guillaume Mercier, Rolf Riesen, Masamichi Takagi, Brice Goglin},
title = {Application-Driven Requirements for Node Resource Management in Next-Generation Systems},
booktitle = {Proceedings of the 2020 International Workshop on Runtime and Operating Systems for Supercomputers (ROSS 2020), Atlanta, GA, USA},
year = {2020}
}

@InProceedings{nsjbc2020-2,
author = {Van Man Nguyen, Emmanuelle Saillard, Julien Jaeger, Denis Barthou, Patrick Carribault},
title = {PARCOACH Extension for Static MPI Nonblocking and Persistent Communication Validation},
booktitle = {Proceedings of the Fourth International Workshop on Software Correctness for HPC Applications, Atlanta, GA, USA},
year = {2020}
}

@InProceedings{bj2020,
author = {Bouhrour, St\'{e}phane and Jaeger, Julien},
title = {Implementation and Performance Evaluation of MPI Persistent Collectives in MPC: A Case Study},
year = {2020},
isbn = {9781450388801},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3416315.3416321},
doi = {10.1145/3416315.3416321},
abstract = { Persistent collective communications have recently been voted in the MPI standard, opening the door to many optimizations to reduce collectives cost, in particular for recurring operations. Indeed persistent semantics contains an initialization phase called only once for a specific collective. It can be used to collect building costs necessary to the collective, to avoid paying them each time the operation is performed. We propose an overview of the implementation of the persistent collectives in the MPC MPI runtime. We first present a na\ »{\i}ve implementation for MPI runtimes already providing nonblocking collectives. Then, we improve this first implementation with two levels of caching optimizations. We present the performance results of the na\ »{\i}ve and optimized versions and discuss their impact on different collective algorithms. We observe performance improvement compared to the na\ »{\i}ve version on a repetitive benchmark, up to a 3x speedup for the reduce collective.},
booktitle = {27th European MPI Users’ Group Meeting},
pages = {51–60},
numpages = {10},
location = {Austin, TX, USA},
series = {EuroMPI/USA ’20}
}

@INPROCEEDINGS{rcj2020,
author= »Roussel, Adrien
and Carribault, Patrick
and Jaeger, Julien »,
editor= »Milfeld, Kent
and de Supinski, Bronis R.
and Koesterke, Lars
and Klinkenberg, Jannis »,
title= »Preliminary Experience with OpenMP Memory Management Implementation »,
booktitle= »OpenMP: Portable Multi-Level Parallelism on Modern Systems »,
year= »2020″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »313–327″,
abstract= »Because of the evolution of compute units, memory heterogeneity is becoming popular in HPC systems. But dealing with such various memory levels often requires different approaches and interfaces. For this purpose, OpenMP 5.0 defines memory-management constructs to offer application developers the ability to tackle the issue of exploiting multiple memory spaces in a portable way. This paper proposes an overview of memory-management from applications to runtimes. Thus, we describe a convenient way to tune an application to include memory management constructs. We also detail a methodology to integrate them into an OpenMP runtime supporting multiple memory types (DDR, MCDRAM and NVDIMM). We implement our design into the MPC framework, while presenting some results on a realistic benchmark. »,
isbn= »978-3-030-58144-2″
}

@inproceedings{nsjbc2020,
author= »Nguyen, Van Man
and Saillard, Emmanuelle
and Jaeger, Julien
and Barthou, Denis
and Carribault, Patrick »,
editor= »Jagode, Heike
and Anzt, Hartwig
and Juckeland, Guido
and Ltaief, Hatem »,
title= »Automatic Code Motion to Extend MPI Nonblocking Overlap Window »,
booktitle= »High Performance Computing »,
year= »2020″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »43–54″,
abstract= »HPC applications rely on a distributed-memory parallel programming model to improve the overall execution time. This leads to spawning multiple processes that need to communicate with each other to make the code progress. But these communications involve overheads caused by network latencies or synchronizations between processes. One possible approach to reduce those overheads is to overlap communications with computations. MPI allows this solution through its nonblocking communication mode: a nonblocking communication is composed of an initialization and a completion call. It is then possible to overlap the communication by inserting computations between these two calls. The use of nonblocking collective calls is however still marginal and adds a new layer of complexity. In this paper we propose an automatic static optimization that (i) transforms blocking MPI communications into their nonblocking counterparts and (ii) performs extensive code motion to increase the size of overlapping intervals between initialization and completion calls. Our method is implemented in LLVM as a compilation pass, and shows promising results on two mini applications. »,
isbn= »978-3-030-59851-8″
}

@inproceedings{reynier2020,
author = {Reynier, Florian},
title = {Utilisation de cœurs dédiés pour la progression des communications non bloquantes},
series = {Compas’ 2020},
year = {2020},
numpages = {7}
}

2019

@INPROCEEDINGS{djjpt2019,
author = {Alexandre Denis and Julien Jaeger and Emmanuel Jeannot and Marc P{\'{e}}rache and Hugo Taboada},
title = {Study on progress threads placement and dedicated cores for overlapping {MPI} nonblocking collectives on manycore processor},
journal = {Int. J. High Perform. Comput. Appl.},
volume = {33},
number = {6},
year = {2019},
url = {https://doi.org/10.1177/1094342019860184},
doi = {10.1177/1094342019860184},
timestamp = {Thu, 12 Mar 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/ijhpca/DenisJJPT19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{akbbpcjms2019,
author = {Julien Adam and Maxime Kermarquer and Jean{-}Baptiste Besnard and Leonardo Bautista{-}Gomez and Marc P{\'{e}}rache and Patrick Carribault and Julien Jaeger and Allen D. Malony and Sameer Shende},
title = {Checkpoint/restart approaches for a thread-based {MPI} runtime},
journal = {Parallel Comput.},
volume = {85},
pages = {204–219},
year = {2019},
url = {https://doi.org/10.1016/j.parco.2019.02.006},
doi = {10.1016/j.parco.2019.02.006},
timestamp = {Sat, 22 Feb 2020 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/pc/AdamKBBPCJMS19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{vccjpr2019,
author = {Ricardo Bispo Vieira and Antoine Capra and Patrick Carribault and Julien Jaeger and Marc P{\'{e}}rache and Adrien Roussel},
editor = {Xing Fan and Bronis R. de Supinski and Oliver Sinnen and Nasser Giacaman},
title = {Detecting Non-sibling Dependencies in OpenMP Task-Based Applications},
booktitle = {OpenMP: Conquering the Full Hardware Spectrum – 15th International Workshop on OpenMP, {IWOMP} 2019, Auckland, New Zealand, September 11-13, 2019, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {11718},
pages = {231–245},
publisher = {Springer},
year = {2019},
url = {https://doi.org/10.1007/978-3-030-28596-8\_16},
doi = {10.1007/978-3-030-28596-8\_16},
timestamp = {Mon, 26 Aug 2019 14:25:50 +0200},
biburl = {https://dblp.org/rec/conf/iwomp/VieiraCCJPR19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{brhjmbs2019,
author = {Purushotham V. Bangalore and Rolf Rabenseifner and Daniel J. Holmes and Julien Jaeger and Guillaume Mercier and Claudia Blaas{-}Schenner and Anthony Skjellum},
editor = {Torsten Hoefler and Jesper Larsson Tr{\ »{a}}ff},
title = {Exposition, clarification, and expansion of {MPI} semantic terms and conventions: is a nonblocking {MPI} function permitted to block?},
booktitle = {Proceedings of the 26th European {MPI} Users’ Group Meeting, EuroMPI 2019, Z{\ »{u}}rich, Switzerland, September 11-13, 2019},
pages = {2:1–2:10},
publisher = {{ACM}},
year = {2019},
url = {https://doi.org/10.1145/3343211.3343213},
doi = {10.1145/3343211.3343213},
timestamp = {Wed, 11 Sep 2019 12:26:40 +0200},
biburl = {https://dblp.org/rec/conf/pvm/BangaloreRHJMBS19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@InProceedings{bjmstpc2019,
author = {Jean{-}Baptiste Besnard and Julien Jaeger and Allen D. Malony and Sameer Shende and Hugo Taboada and Marc P{\'{e}}rache and Patrick Carribault},
editor = {Torsten Hoefler and Jesper Larsson Tr{\ »{a}}ff},
title = {Mixing ranks, tasks, progress and nonblocking collectives},
booktitle = {Proceedings of the 26th European {MPI} Users’ Group Meeting, EuroMPI 2019, Z{\ »{u}}rich, Switzerland, September 11-13, 2019},
pages = {10:1–10:10},
publisher = {{ACM}},
year = {2019},
url = {https://doi.org/10.1145/3343211.3343221},
doi = {10.1145/3343211.3343221},
timestamp = {Wed, 11 Sep 2019 12:26:40 +0200},
biburl = {https://dblp.org/rec/conf/pvm/BesnardJMSTPC19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

2018

@INPROCEEDINGS{djt2018,
author = {Alexandre Denis and Julien Jaeger and Hugo Taboada},
editor = {Gabriele Mencagli and Dora B. Heras and Valeria Cardellini and Emiliano Casalicchio and Emmanuel Jeannot and Felix Wolf and Antonio Salis and Claudio Schifanella and Ravi Reddy Manumachu and Laura Ricci and Marco Beccuti and Laura Antonelli and Jos{\'{e}} Daniel Garc{\'{\i}}a S{\'{a}}nchez and Stephen L. Scott},
title = {Progress Thread Placement for Overlapping {MPI} Non-blocking Collectives Using Simultaneous Multi-threading},
booktitle = {Euro-Par 2018: Parallel Processing Workshops – Euro-Par 2018 International Workshops, Turin, Italy, August 27-28, 2018, Revised Selected Papers},
series = {Lecture Notes in Computer Science},
volume = {11339},
pages = {123–133},
publisher = {Springer},
year = {2018},
url = {https://doi.org/10.1007/978-3-030-10549-5\_10},
doi = {10.1007/978-3-030-10549-5\_10},
timestamp = {Fri, 27 Dec 2019 21:26:53 +0100},
biburl = {https://dblp.org/rec/conf/europar/DenisJT18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{sdcjpp2018,
author = {Marc Sergent and Mario Dagrada and Patrick Carribault and Julien Jaeger and Marc P{\'{e}}rache and Guillaume Papaur{\'{e}}},
editor = {Marco Aldinucci and Luca Padovani and Massimo Torquati},
title = {Efficient Communication/Computation Overlap with MPI+OpenMP Runtimes Collaboration},
booktitle = {Euro-Par 2018: Parallel Processing – 24th International Conference on Parallel and Distributed Computing, Turin, Italy, August 27-31, 2018, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {11014},
pages = {560–572},
publisher = {Springer},
year = {2018},
url = {https://doi.org/10.1007/978-3-319-96983-1\_40},
doi = {10.1007/978-3-319-96983-1\_40},
timestamp = {Tue, 14 May 2019 10:00:46 +0200},
biburl = {https://dblp.org/rec/conf/europar/SergentDCJPP18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{djjpt2018,
author = {Alexandre Denis and Julien Jaeger and Emmanuel Jeannot and Marc P{\'{e}}rache and Hugo Taboada},
editor = {Marco Aldinucci and Luca Padovani and Massimo Torquati},
title = {Dynamic Placement of Progress Thread for Overlapping {MPI} Non-blocking Collectives on Manycore Processor},
booktitle = {Euro-Par 2018: Parallel Processing – 24th International Conference on Parallel and Distributed Computing, Turin, Italy, August 27-31, 2018, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {11014},
pages = {616–627},
publisher = {Springer},
year = {2018},
url = {https://doi.org/10.1007/978-3-319-96983-1\_44},
doi = {10.1007/978-3-319-96983-1\_44},
timestamp = {Tue, 14 May 2019 10:00:46 +0200},
biburl = {https://dblp.org/rec/conf/europar/DenisJJPT18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@INPROCEEDINGS{bjcb2018,
author = {Hugo Brunie and Julien Jaeger and Patrick Carribault and Denis Barthou},
editor = {Bruce Jacob},
title = {Profile-guided scope-based data allocation method},
booktitle = {Proceedings of the International Symposium on Memory Systems, {MEMSYS} 2018, Old Town Alexandria, VA, USA, October 01-04, 2018},
pages = {169–182},
publisher = {{ACM}},
year = {2018},
url = {https://doi.org/10.1145/3240302.3240313},
doi = {10.1145/3240302.3240313},
timestamp = {Sat, 05 Jan 2019 20:13:10 +0100},
biburl = {https://dblp.org/rec/conf/memsys/BrunieJCB18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

@InProceedings{abmspcj2018,
author = {Julien Adam and Jean{-}Baptiste Besnard and Allen D. Malony and Sameer Shende and Marc P{\'{e}}rache and Patrick Carribault and Julien Jaeger},
title = {Transparent High-Speed Network Checkpoint/Restart in {MPI}},
booktitle = {Proceedings of the 25th European {MPI} Users’ Group Meeting, Barcelona, Spain, September 23-26, 2018},
pages = {12:1–12:11},
publisher = {{ACM}},
year = {2018},
url = {https://doi.org/10.1145/3236367.3236383},
doi = {10.1145/3236367.3236383},
timestamp = {Wed, 21 Nov 2018 12:44:23 +0100},
biburl = {https://dblp.org/rec/conf/pvm/AdamBMSPCJ18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org},}

2017

@INPROCEEDINGS{lwcjpn2017,
author={A. Loussert and B. Welterlen and P. Carribault and J. Jaeger and M. Pérache and R. Namyst},
booktitle={2017 29th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)},
title={Resource-Management Study in HPC Runtime-Stacking Context},
year={2017},
volume={},
number={},
pages={177-184},
keywords={application program interfaces;message passing;multiprocessing systems;parallel machines;parallel programming;shared memory systems;HPC runtime-stacking context;HPC supercomputers;building blocks;compute resources;distributed memory models;distributed programming model;hybrid parallel application;manycore processors;memory footprint;mixing multiple models;multicore processors;resource-management study;runtime libraries;runtime stacking;shared-memory communications;shared-memory models;Computational modeling;Instruction sets;Libraries;Programming;Runtime;Runtime library;Stacking;HPC;MPI;OpenMP;Parallel Programming},
doi={10.1109/SBAC-PAD.2017.30},
ISSN={},
month={Oct},}

@InProceedings{ccbmpj2017,
author= »Capra, Antoine and Carribault, Patrick and Besnard, Jean-Baptiste and Malony, Allen D. and P{\’e}rache, Marc and Jaeger, Julien »,
editor= »de Supinski, Bronis R.
and Olivier, Stephen L.
and Terboven, Christian
and Chapman, Barbara M.
and M{\ »u}ller, Matthias S. »,
title= »User Co-scheduling for MPI+OpenMP Applications Using OpenMP Semantics »,
booktitle= »Scaling OpenMP for Exascale Performance and Portability »,
year= »2017″,
publisher= »Springer International Publishing »,
address= »Cham »,
pages= »203–216″,
isbn= »978-3-319-65578-9″
}

@INPROCEEDINGS{bmspcj2017,
author={J. B. Besnard and A. D. Malony and S. Shende and M. Pérache and P. Carribault and J. Jaeger},
booktitle={2017 46th International Conference on Parallel Processing Workshops (ICPPW)},
title={Towards a Better Expressiveness of the Speedup Metric in MPI Context},
year={2017},
volume={},
number={},
pages={251-260},
keywords={application program interfaces;message passing;multiprocessing systems;parallel processing;program diagnostics;software metrics;software performance evaluation;software tools;MPI abstraction;MPI+OpenMP benchmark;MPI+X model;OpenMP scaling;Speedup metric;many-core processors;parallel applications;performance metric;performance tools;Acceleration;Electronic mail;Mathematical model;Measurement;Programming;Scalability;Tools;MPI;MPI_Section;Phase;Profiling;Speedup},
doi={10.1109/ICPPW.2017.45},
ISSN={1530-2016},
month={Aug},}

2016

@inproceedings{baspcjm2016,
author = {Besnard, Jean-Baptiste and Adam, Julien and Shende, Sameer and P{\’e}rache, Marc and Carribault, Patrick and Jaeger, Julien and Malony, Allen D.},
title = {Introducing Task-Containers As an Alternative to Runtime-Stacking},
booktitle = {Proceedings of the 23rd European MPI Users’ Group Meeting},
series = {EuroMPI 2016},
year = {2016},
isbn = {978-1-4503-4234-6},
location = {Edinburgh, United Kingdom},
pages = {51–63},
numpages = {13},
url = {http://doi.acm.org/10.1145/2966884.2966910},
doi = {10.1145/2966884.2966910},
acmid = {2966910},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {In-Situ, MPI+X, Privatization, Thread-Based MPI},
}

@inproceedings{ap2016,
author = {Julien Adam and
Marc P{\'{e}}rache},
title = {A Parallel and Resilient Frontend for High Performance Validation
Suites},
booktitle = {High Performance Computing for Computational Science – {VECPAR} 2016
– 12th International Conference, Porto, Portugal, June 28-30, 2016,
Revised Selected Papers},
pages = {248–255},
year = {2016},
crossref = {DBLP:conf/vecpar/2016},
url = {https://doi.org/10.1007/978-3-319-61982-8\_22},
doi = {10.1007/978-3-319-61982-8\_22},
timestamp = {Mon, 17 Jul 2017 08:43:10 +0200},
biburl = {https://dblp.org/rec/bib/conf/vecpar/AdamP16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{baspcjm2016,
author = {Besnard, Jean-Baptiste and Adam, Julien and Shende, Sameer and P{\’e}rache, Marc and Carribault, Patrick and Jaeger, Julien and Malony, Allen D.},
title = {Introducing Task-Containers As an Alternative to Runtime-Stacking},
booktitle = {Proceedings of the 23rd European MPI Users’ Group Meeting},
series = {EuroMPI 2016},
year = {2016},
isbn = {978-1-4503-4234-6},
location = {Edinburgh, United Kingdom},
pages = {51–63},
numpages = {13},
url = {http://doi.acm.org/10.1145/2966884.2966910},
doi = {10.1145/2966884.2966910},
acmid = {2966910},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {In-Situ, MPI+X, Privatization, Thread-Based MPI},
}

@inproceedings{taboada2016,
author = {Taboada, Hugo},
title = {Impact du placement des threads de progression pour les collectives MPI non-bloquantes},
series = {Compas’ 2016},
year = {2016},
location = {Lorient, France},
pages = {12:1–12:8},
articleno = {12},
numpages = {8},
keywords = {MPI, Collective non-bloquantes, Asynchronisme, Thread de progression, },
}

2015

@inproceedings{bmspcj2015,
author = {Besnard, Jean-Baptiste and Malony, Allen and Shende, Sameer and P{\’e}rache, Marc and Carribault, Patrick and Jaeger, Julien},
title = {An MPI Halo-Cell Implementation for Zero-Copy Abstraction},
booktitle = {Proceedings of the 22Nd European MPI Users’ Group Meeting},
series = {EuroMPI ’15},
year = {2015},
isbn = {978-1-4503-3795-3},
location = {Bordeaux, France},
pages = {3:1–3:9},
articleno = {3},
numpages = {9},
url = {http://doi.acm.org/10.1145/2802658.2802669},
doi = {10.1145/2802658.2802669},
acmid = {2802669},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Ghost-Cells, MPI, MPI_Halo, Zero-Copy, memory},
}

@inproceedings{jscb2015,
author = {Jaeger, Julien and Saillard, Emmanuelle and Carribault, Patrick and Barthou, Denis},
title = {Correctness Analysis of MPI-3 Non-Blocking Communications in PARCOACH},
booktitle = {Proceedings of the 22Nd European MPI Users’ Group Meeting},
series = {EuroMPI ’15},
year = {2015},
isbn = {978-1-4503-3795-3},
location = {Bordeaux, France},
pages = {16:1–16:2},
articleno = {16},
numpages = {2},
url = {http://doi.acm.org/10.1145/2802658.2802674},
doi = {10.1145/2802658.2802674},
acmid = {2802674},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {MPI, Non-blocking collectives, checker, static analysis},
}

@article{jmcpj2015,
title={Optimisation des opérations collectives dans les applications MPI+OpenMP},
author={Jaeger, Julien and Mah{\'{e}}o, Aur{\`{e}}le and Carribault, Patrick and P{\'{e}}rache, Marc and Jalby, William},
year={2015},
publisher={Commissariat {\`a} l'{\’E}nergie Atomique},
booktitle={Chocs avancées 2014},
pages={44-45}
}

@incollection{scb2015-2,
year={2015},
isbn={978-3-662-48095-3},
booktitle={Euro-Par 2015: Parallel Processing},
volume={9233},
series={Lecture Notes in Computer Science},
editor={Tr\ »aff, Jesper Larsson and Hunold, Sascha and Versaci, Francesco},
doi={10.1007/978-3-662-48096-0_3},
title={MPI Thread-Level Checking for MPI+OpenMP Applications},
url={http://dx.doi.org/10.1007/978-3-662-48096-0_3},
publisher={Springer Berlin Heidelberg},
keywords={Static verification; OpenMP; MPI; MPI thread level},
author={Saillard, Emmanuelle and Carribault, Patrick and Barthou, Denis},
pages={31-42},
language={English}
}

@inproceedings{scb2015,
author = {Saillard, Emmanuelle and Carribault, Patrick and Barthou, Denis},
title = {Static/Dynamic Validation of MPI Collective Communications in Multi-threaded Context},
booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
series = {PPoPP 2015},
year = {2015},
isbn = {978-1-4503-3205-7},
location = {San Francisco, CA, USA},
pages = {279–280},
numpages = {2},
url = {http://doi.acm.org/10.1145/2688500.2688548},
doi = {10.1145/2688500.2688548},
acmid = {2688548},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Control Flow, MPI+OpenMP, Static, Verification},
}

@article{jcp2015,
author = {Julien Jaeger and
Patrick Carribault and
Marc P{\'{e}}rache},
title = {Fine-grain data management directory for OpenMP 4.0 and OpenACC},
journal = {Concurrency and Computation: Practice and Experience},
volume = {27},
number = {6},
pages = {1528–1539},
year = {2015},
url = {http://dx.doi.org/10.1002/cpe.3352},
doi = {10.1002/cpe.3352},
timestamp = {Wed, 08 Apr 2015 14:12:59 +0200},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/concurrency/JaegerCP15},
bibsource = {dblp computer science bibliography, http://dblp.org}
}

2014

@article{spb-j2014,
author = {Emmanuelle Saillard and
Patrick Carribault and
Denis Barthou},
title = {{PARCOACH:} Combining static and dynamic validation of {MPI} collective
communications},
journal = {{IJHPCA}},
volume = {28},
number = {4},
pages = {425–434},
year = {2014},
url = {http://dx.doi.org/10.1177/1094342014552204},
doi = {10.1177/1094342014552204},
timestamp = {Thu, 27 Nov 2014 11:31:05 +0100},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/ijhpca/SaillardCB14},
bibsource = {dblp computer science bibliography, http://dblp.org}
}

@incollection{cocp2014,
year={2014},
isbn={978-3-319-09872-2},
booktitle={Euro-Par 2014 Parallel Processing},
volume={8632},
series={Lecture Notes in Computer Science},
editor={Silva, Fernando and Dutra, Inês and Santos Costa, Vítor},
doi={10.1007/978-3-319-09873-9_50},
title={Evaluation of OpenMP Task Scheduling Algorithms for Large NUMA Architectures},
url={http://dx.doi.org/10.1007/978-3-319-09873-9_50},
publisher={Springer International Publishing},
author={Clet-Ortega, Jérôme and Carribault, Patrick and Pérache, Marc},
pages={596-607},
language={English}
}

@inproceedings{mcpj2014,
author = {Mah{\’e}o, Aur\`{e}le and Carribault, Patrick and P{\’e}rache, Marc and Jalby, William},
title = {Optimizing Collective Operations in Hybrid Applications},
booktitle = {Proceedings of the 21st European MPI Users’ Group Meeting (EuroMPI/ASIA 2014)},
series = {EuroMPI/ASIA ’14},
year = {2014},
isbn = {978-1-4503-2875-3},
location = {Kyoto, Japan},
pages = {121:121–121:122},
articleno = {121},
numpages = {2},
url = {http://doi.acm.org/10.1145/2642769.2642791},
doi = {10.1145/2642769.2642791},
acmid = {2642791},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Collective Communications, MPI, OpenMP},
}

@incollection{scb2014,
year={2014},
isbn={978-3-319-11453-8},
booktitle={Using and Improving OpenMP for Devices, Tasks, and More, Proceedings of the 10th International Workshop on OpenMP (IWOMP 2014)},
volume={8766},
series={Lecture Notes in Computer Science},
editor={DeRose, Luiz and de Supinski, BronisR. and Olivier, StephenL. and Chapman, BarbaraM. and Müller, MatthiasS.},
doi={10.1007/978-3-319-11454-5_6},
title={Static Validation of Barriers and Worksharing Constructs in OpenMP Applications},
url={http://dx.doi.org/10.1007/978-3-319-11454-5_6},
publisher={Springer International Publishing},
author={Saillard, Emmanuelle and Carribault, Patrick and Barthou, Denis},
pages={73-86},
language={English}
}

@incollection{jcp2014,
year={2014},
isbn={978-3-642-54419-4},
booktitle={Euro-Par 2013: Parallel Processing Workshops},
volume={8374},
series={Lecture Notes in Computer Science},
editor={an Mey, Dieter and Alexander, Michael and Bientinesi, Paolo and Cannataro, Mario and Clauss, Carsten and Costan, Alexandru and Kecskemeti, Gabor and Morin, Christine and Ricci, Laura and Sahuquillo, Julio and Schulz, Martin and Scarano, Vittorio and Scott, StephenL. and Weidendorfer, Josef},
doi={10.1007/978-3-642-54420-0_17},
title={Data-Management Directory for OpenMP 4.0 and OpenACC},
url={http://dx.doi.org/10.1007/978-3-642-54420-0_17},
publisher={Springer Berlin Heidelberg},
author={Jaeger, Julien and Carribault, Patrick and Pérache, Marc},
pages={168-177},
language={English}
}

@article{dcpj2014,
year={2014},
issn={0010-485X},
journal={Computing},
volume={96},
number={4},
doi={10.1007/s00607-013-0327-z},
title={Improving MPI communication overlap with collaborative polling},
url={http://dx.doi.org/10.1007/s00607-013-0327-z},
publisher={Springer Vienna},
keywords={HPC; Overlap; MPI; High-speed network; Polling; 68N19; 68N15},
author={Didelot, Sylvain and Carribault, Patrick and Pérache, Marc and Jalby, William},
pages={263-278},
language={English}
}

2013

@inproceedings{bpj2013,
author={Besnard, J.-B. and Perache, M. and Jalby, W.},
booktitle={Parallel Processing (ICPP), 2013 42nd International Conference on},
title={Event Streaming for Online Performance Measurements Reduction},
year={2013},
month={Oct},
pages={985-994},
keywords={application program interfaces;distributed processing;mainframes;message passing;parallel machines;search engines;NAS-MPI benchmarks;architectural constraints;concurrent application profiling;data management trace;development cycle;distributed analysis engine;event streaming;file-system;instrumentation-data;online performance measurements reduction;representative C++ MPI application;runtime coupling;supercomputers;trace-based coupling;Couplings;Engines;Instruments;Libraries;Parallel processing;Runtime;Virtualization;Code coupling;MPI virtualization;Online trace analysis;Performance tools},
doi={10.1109/ICPP.2013.117},
ISSN={0190-3918},}

@inproceedings{vpj2013,
author = {Valat, S{\’e}bastien and P{\’e}rache, Marc and Jalby, William},
title = {Introducing Kernel-level Page Reuse for High Performance Computing},
booktitle = {Proceedings of the ACM SIGPLAN Workshop on Memory Systems Performance and Correctness},
series = {MSPC ’13},
year = {2013},
isbn = {978-1-4503-2103-7},
location = {Seattle, Washington},
pages = {3:1–3:9},
articleno = {3},
numpages = {9},
url = {http://doi.acm.org/10.1145/2492408.2492414},
doi = {10.1145/2492408.2492414},
acmid = {2492414},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {Linux, NUMA, kernel, many-core, memory allocator, memory pool, page fault, parallel, process, zero page},
}

@misc{mp-patent2013,
title={Method, computer program and device for managing memory access in a multiprocessor architecture of numa type},
author={Menyhart, Z. and Perache, M.},
url={http://www.google.com/patents/US20130262790},
year={2013},
month=oct # « ~3 »,
publisher={Google Patents},
note={US Patent App. 13/993,665}
}

@inproceedings{scb2013,
author = {Saillard, Emmanuelle and Carribault, Patrick and Barthou, Denis},
title = {Combining Static and Dynamic Validation of MPI Collective Communications},
booktitle = {Proceedings of the 20th European MPI Users’ Group Meeting},
series = {EuroMPI ’13},
year = {2013},
isbn = {978-1-4503-1903-4},
location = {Madrid, Spain},
pages = {117–122},
numpages = {6},
url = {http://doi.acm.org/10.1145/2488551.2488555},
doi = {10.1145/2488551.2488555},
acmid = {2488555},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {MPI, collective, correctness, debugging, static analysis},
}

2012

@incollection{dcpj2012,
year={2012},
isbn={978-3-642-33517-4},
booktitle={Recent Advances in the Message Passing Interface, Proceedings of the 19th European MPI User’s Group Meeting (EuroMPI 2012)},
volume={7490},
series={Lecture Notes in Computer Science},
editor={Träff, JesperLarsson and Benkner, Siegfried and Dongarra, JackJ.},
doi={10.1007/978-3-642-33518-1_9},
title={Improving MPI Communication Overlap with Collaborative Polling},
url={http://dx.doi.org/10.1007/978-3-642-33518-1_9},
publisher={Springer Berlin Heidelberg},
keywords={HPC; Overlap; MPI; High-Speed Network; Polling},
author={Didelot, Sylvain and Carribault, Patrick and Pérache, Marc and Jalby, William},
pages={37-46},
language={English}
}

@incollection{mkcpj2012,
year={2012},
isbn={978-3-642-30960-1},
booktitle={OpenMP in a Heterogeneous World, Proceedings of the 8th International Workshop on OpenMP (IWOMP 2012)},
volume={7312},
series={Lecture Notes in Computer Science},
editor={Chapman, BarbaraM. and Massaioli, Federico and Müller, MatthiasS. and Rorro, Marco},
doi={10.1007/978-3-642-30961-8_20},
title={Adaptive OpenMP for Large NUMA Nodes},
url={http://dx.doi.org/10.1007/978-3-642-30961-8_20},
publisher={Springer Berlin Heidelberg},
author={Mahéo, Aurèle and Koliaï, Souad and Carribault, Patrick and Pérache, Marc and Jalby, William},
pages={254-257},
language={English}
}

@inproceedings{tcp2012,
author={Tchiboukdjian, M. and Carribault, P. and Perache, M.},
booktitle={Parallel Distributed Processing Symposium (IPDPS), 2012 IEEE 26th International},
title={Hierarchical Local Storage: Exploiting Flexible User-Data Sharing Between MPI Tasks},
year={2012},
month={May},
pages={366-377},
keywords={application program interfaces;data handling;message passing;parallel programming;shared memory systems;C languages;C++ languages;Fortran languages;HLS;MPI tasks;Open MP;core ratio;data sharing;diminishing memory;exploiting flexible user data sharing;hierarchical local storage;multiple programming models;open MP programming model;overall memory consumption;parallel semantics;shared memory programming model;Computational modeling;Data models;Instruction sets;Memory management;Multicore processing;Programming;Semantics;High-Performance Computing;Memory Consumption;Parallel Programming Model},
doi={10.1109/IPDPS.2012.42},
ISSN={1530-2075},}

@inproceedings{vcc2012,
author = {Vet, Jean-Yves and Carribault, Patrick and Cohen, Albert},
title = {Multigrain Affinity for Heterogeneous Work Stealing},
year = {2012},
month = {January},
booktitle = {Fifth Workshop on Programmability Issues for Multi-Core Computers (MULTIPROG ’12) at HiPEAC-2012},
publisher = {HiPEAC Network of Excellence},
title = {OpenCL on shared memory multicore CPUs},
editors = {Ayguade, Eduard and Gaster, Benedict, and Howes, Lee and Stenström, Per and Unsal, Osman},
}

@article{cdjp2012,
title={Mpc: A unified parallel framework for hpc},
author={Carribault, Patrick and Diakhate, François and Jourdren, Herv{\’e} and P{\’e}rache, Marc},
year={2012},
publisher={Commissariat {\`a} l'{\’E}nergie Atomique},
booktitle={Chocs 41},
pages={81-88}
}

@article{bcddejp2012,
title={Adaptive mesh refinement methods and advanced programming models for high performance computing},
author={Ballereau, Philippe and Carribault, Patrick and Duboc, F. and Dureau, David and Enaux, C{\’e}dric and Jourdren, Herv{\’e} and P{\’e}rache, Marc},
year={2012},
publisher={Commissariat {\`a} l'{\’E}nergie Atomique},
booktitle={Chocs 41},
pages={81-88}
}

@article{cpj2011,
title={Hiérarchie des données en parallélisme hybride},
author={Carribault, Patrick and P{\’e}rache, Marc and Jourdren, Herv{\’e}},
year={2012},
publisher={Commissariat {\`a} l'{\’E}nergie Atomique},
booktitle={Chocs avancées 2011},
pages={42-43}
}

2011

@incollection{cpj2011,
year={2011},
isbn={978-3-642-21486-8},
booktitle={OpenMP in the Petascale Era, Proceedings of the 7th International Workshop on OpenMP (IWOMP 2011)},
volume={6665},
series={Lecture Notes in Computer Science},
editor={Chapman, BarbaraM. and Gropp, WilliamD. and Kumaran, Kalyan and Müller, MatthiasS.},
doi={10.1007/978-3-642-21487-5_7},
title={Thread-Local Storage Extension to Support Thread-Based MPI/OpenMP Applications},
url={http://dx.doi.org/10.1007/978-3-642-21487-5_7},
publisher={Springer Berlin Heidelberg},
author={Carribault, Patrick and Pérache, Marc and Jourdren, Hervé},
pages={80-93},
language={English}
}

2010

@inproceedings{ppcj2010,
author={Pouget, K. and Perache, M. and Carribault, P. and Jourdren, H.},
booktitle={Parallel Distributed Processing, Workshops and Phd Forum (IPDPSW), 2010 IEEE International Symposium on},
title={User level DB: a debugging API for user-level thread libraries},
year={2010},
month={April},
pages={1-7},
keywords={application program interfaces;multi-threading;multiprocessing programs;program debugging;API debugging;M user-level threads;MARCEL;N kernel-level threads;Sun Microsystems Thread_DB API;generic algorithms;hybrid M:N libraries;multiprocessor communication;multithreading;parallel programming;user level DB library;Debugging;GDB;MPC;Multithreading},
doi={10.1109/IPDPSW.2010.5470815},}

@incollection{cpj2010,
year={2010},
isbn={978-3-642-13216-2},
booktitle={Beyond Loop Level Parallelism in OpenMP: Accelerators, Tasking and More, Proceedings of the 6th International Workshop on OpenMP (IWOMP 2010)},
volume={6132},
series={Lecture Notes in Computer Science},
editor={Sato, Mitsuhisa and Hanawa, Toshihiro and Müller, MatthiasS. and Chapman, BarbaraM. and de Supinski, BronisR.},
doi={10.1007/978-3-642-13217-9_1},
title={Enabling Low-Overhead Hybrid MPI/OpenMP Parallelism with MPC},
url={http://dx.doi.org/10.1007/978-3-642-13217-9_1},
publisher={Springer Berlin Heidelberg},
author={Carribault, Patrick and Pérache, Marc and Jourdren, Hervé},
pages={1-14},
language={English}
}

@article{acejp2010,
title={Calcul haute performance en transfert radiatif},
author={Arslan, V. and Carribault, Patrick and  Enaux, C{\’e}dric and Jourdren, Herv{\’e} and P{\’e}rache, Marc},
year={2010},
publisher={Commissariat {\`a} l'{\’E}nergie Atomique},
booktitle={Chocs avancées 2009},
pages={38-39}
}

2009

@incollection{pcj2009,
year={2009},
isbn={978-3-642-03769-6},
booktitle={Recent Advances in Parallel Virtual Machine and Message Passing Interface, Proceedings of the 16th European PVM/MPI Users’ Group Meeting (EuroPVM/MPI 2009)},
volume={5759},
series={Lecture Notes in Computer Science},
editor={Ropo, Matti and Westerholm, Jan and Dongarra, Jack},
doi={10.1007/978-3-642-03770-2_16},
title={MPC-MPI: An MPI Implementation Reducing the Overall Memory Consumption},
url={http://dx.doi.org/10.1007/978-3-642-03770-2_16},
publisher={Springer Berlin Heidelberg},
keywords={Message passing; Memory consumption; High-performance computing; Multithreading},
author={Pérache, Marc and Carribault, Patrick and Jourdren, Hervé},
pages={94-103},
language={English}
}

@incollection{dpnj2009,
year={2009},
isbn={978-3-642-00954-9},
booktitle={Euro-Par 2008 Workshops – Parallel Processing},
volume={5415},
series={Lecture Notes in Computer Science},
editor={César, Eduardo and Alexander, Michael and Streit, Achim and Träff, JesperLarsson and Cérin, Christophe and Knüpfer, Andreas and Kranzlmüller, Dieter and Jha, Shantenu},
doi={10.1007/978-3-642-00955-6_7},
title={Efficient Shared Memory Message Passing for Inter-VM Communications},
url={http://dx.doi.org/10.1007/978-3-642-00955-6_7},
publisher={Springer Berlin Heidelberg},
author={Diakhaté, François and Perache, Marc and Namyst, Raymond and Jourdren, Herve},
pages={53-62},
language={English}
}

2008

@inproceedings{zpj2008,
author = {Zuckerman, St{\’e}phane and P{\’e}rache, Marc and Jalby, William},
title = {Fine Tuning Matrix Multiplications on Multicore},
booktitle = {Proceedings of the 15th International Conference on High Performance Computing},
series = {HiPC’08},
year = {2008},
isbn = {978-3-540-89893-1},
location = {Bangalore, India},
pages = {30–41},
numpages = {12},
url = {http://dl.acm.org/citation.cfm?id=1791889.1791898},
acmid = {1791898},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
keywords = {BLAS, cache coherency, multicore},
}

@inproceedings{pjn2008,
author = {P{\’e}rache, Marc and Jourdren, Herv{\’e} and Namyst, Raymond},
title = {MPC: A Unified Parallel Runtime for Clusters of NUMA Machines},
booktitle = {Proceedings of the 14th International Euro-Par Conference on Parallel Processing},
series = {Euro-Par ’08},
year = {2008},
isbn = {978-3-540-85450-0},
location = {Las Palmas de Gran Canaria, Spain},
pages = {78–88},
numpages = {11},
url = {http://dx.doi.org/10.1007/978-3-540-85451-7_9},
doi = {10.1007/978-3-540-85451-7_9},
acmid = {1429633},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
}