diff --git a/Utilities/Tools/jobutils.sh b/Utilities/Tools/jobutils.sh index cbdab6c9e0b43..84ae6552a26eb 100644 --- a/Utilities/Tools/jobutils.sh +++ b/Utilities/Tools/jobutils.sh @@ -46,10 +46,12 @@ o2_cleanup_shm_files() { } # Function to find out all the (recursive) child processes starting from a parent PID. -# The output includes includes the parent -# output is saved in child_pid_list +# The output includes the parent childprocs() { local parent=$1 + if [ ! "$2" ]; then + child_pid_list="" + fi if [ "$parent" ] ; then child_pid_list="$child_pid_list $parent" for childpid in $(pgrep -P ${parent}); do diff --git a/run/o2sim_parallel.cxx b/run/o2sim_parallel.cxx index 1c2971511af4d..670704e51dc73 100644 --- a/run/o2sim_parallel.cxx +++ b/run/o2sim_parallel.cxx @@ -118,12 +118,18 @@ int checkresult() return errors; } +std::vector gChildProcesses; // global vector of child pids + // signal handler for graceful exit void sighandler(int signal) { if (signal == SIGINT || signal == SIGTERM) { - LOG(INFO) << "signal caught ... clean up and exit"; + LOG(INFO) << "o2-sim driver: Signal caught ... clean up and exit"; cleanup(); + // forward signal to all children + for (auto& pid : gChildProcesses) { + kill(pid, signal); + } exit(0); } } @@ -346,7 +352,7 @@ int main(int argc, char* argv[]) } return r; } else { - childpids.push_back(pid); + gChildProcesses.push_back(pid); close(pipe_serverdriver_fd[1]); std::cout << "Spawning particle server on PID " << pid << "; Redirect output to " << getServerLogName() << "\n"; launchThreadMonitoringEvents(pipe_serverdriver_fd[0], "DISTRIBUTING EVENT : "); @@ -380,7 +386,7 @@ int main(int argc, char* argv[]) "worker", "--mq-config", localconfig.c_str(), "--severity", "info", (char*)nullptr); return 0; } else { - childpids.push_back(pid); + gChildProcesses.push_back(pid); std::cout << "Spawning sim worker " << id << " on PID " << pid << "; Redirect output to " << workerlogss.str() << "\n"; } @@ -409,13 +415,13 @@ int main(int argc, char* argv[]) return 0; } else { std::cout << "Spawning hit merger on PID " << pid << "; Redirect output to " << getMergerLogName() << "\n"; - childpids.push_back(pid); + gChildProcesses.push_back(pid); close(pipe_mergerdriver_fd[1]); launchThreadMonitoringEvents(pipe_mergerdriver_fd[0], "EVENT FINISHED : "); } // wait on merger (which when exiting completes the workflow) - auto mergerpid = childpids.back(); + auto mergerpid = gChildProcesses.back(); int status, cpid; // wait just blocks and waits until any child returns; but we make sure to wait until merger is here @@ -426,8 +432,8 @@ int main(int argc, char* argv[]) } // we bring down all processes if one of them aborts if (WTERMSIG(status) == SIGABRT) { - for (auto p : childpids) { - kill(p, SIGABRT); + for (auto p : gChildProcesses) { + kill(p, SIGTERM); } cleanup(); LOG(FATAL) << "ABORTING DUE TO ABORT IN COMPONENT"; @@ -438,7 +444,7 @@ int main(int argc, char* argv[]) LOG(INFO) << "Simulation process took " << timer.RealTime() << " s"; // make sure the rest shuts down - for (auto p : childpids) { + for (auto p : gChildProcesses) { if (p != mergerpid) { LOG(DEBUG) << "SHUTTING DOWN CHILD PROCESS " << p; kill(p, SIGTERM);