job_killer.sh 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #!/bin/sh
  2. # Usage: job_killer.sh [-c]? [grace_period_seconds] [process_pattern] [sidecar]?
  3. #
  4. # This script waits for a termination signal and gracefully terminates another process before exiting.
  5. #
  6. # Attempts to gracefully kill a process by sending SIGTERM to the first process that matches
  7. # the pattern. If "-c" is set, it will also signal all child processes of the main process.
  8. # All processes are forcibly killed if they have not exited after the grace period.
  9. #
  10. # Example: if process that should be killed has start command "./run_job.sh", and grace
  11. # period should be 30s, would run "./job_killer.sh 30 ./run_job.sh".
  12. kill_child_procs=false
  13. while getopts ":c" opt; do
  14. case $opt in
  15. c)
  16. kill_child_procs=true
  17. esac
  18. done
  19. if $kill_child_procs
  20. then
  21. grace_period_seconds=$2
  22. target=$3
  23. sidecar=$4
  24. else
  25. grace_period_seconds=$1
  26. target=$2
  27. sidecar=$3
  28. fi
  29. global_timeout=$TIMEOUT
  30. if [ -z "$global_timeout" ]; then
  31. global_timeout=3600
  32. fi
  33. echo "set global timeout value of $global_timeout"
  34. pattern="$(printf '[%s]%s' $(echo $target | cut -c 1) $(echo $target | cut -c 2-))"
  35. graceful_shutdown() {
  36. echo "starting graceful shutdown..."
  37. local timeout=$1
  38. echo "searching for process pattern: $pattern"
  39. local target_pid=$(pgrep -f $pattern -l | grep -v 'job_killer.sh' | grep -v 'wait_for_job.sh' | grep -v 'grep' | awk '{ printf "%d ", $1 }' | sort)
  40. local list="$target_pid"
  41. if [ -n "$target_pid" ]; then
  42. # request graceful shutdown from target_pid
  43. kill -0 ${target_pid} 2>/dev/null && kill -TERM ${target_pid}
  44. if $kill_child_procs
  45. then
  46. for c in $(ps -o pid= --ppid $target_pid); do
  47. # request graceful shutdown of all children, and append to process list
  48. kill -0 $c 2>/dev/null && kill -TERM $c && list="$list $c" || true
  49. done
  50. fi
  51. # schedule hard kill after timeout
  52. (sleep ${timeout}; kill -9 -${target_pid} 2>/dev/null || true) &
  53. local killer=${!}
  54. # wait for processes to finish
  55. for c in $list; do
  56. echo "waiting for process $c"
  57. tail --pid=$c -f /dev/null
  58. done
  59. wait ${list} 2>/dev/null || true
  60. # children exited gracefully - cancel timer
  61. sleep 0.1 && kill -9 ${killer} 2>/dev/null && target_pid="" || true
  62. fi
  63. # run the sidecar killer, this will terminate any additional sidecars if necessary
  64. if [ -n "$sidecar" ]; then
  65. echo "killing sidecar command: $sidecar"
  66. ./sidecar_killer.sh $sidecar
  67. fi
  68. echo "Exit Gracefully (0)" && exit 0
  69. }
  70. trap 'graceful_shutdown $grace_period_seconds $target' TERM INT HUP
  71. sleep 2
  72. echo "waiting for job to start..."
  73. timeout 10s ./wait_for_job.sh $pattern
  74. target_pid=$(pgrep -f $pattern -l | grep -v 'job_killer.sh' | grep -v 'wait_for_job.sh' | grep -v 'grep' | awk '{ printf "%d ", $1 }' | sort)
  75. target_pid_name=$(pgrep -f $pattern -l | grep -v 'job_killer.sh' | grep -v 'wait_for_job.sh' | grep -v 'grep')
  76. if [ -n "$target_pid" ]; then
  77. echo "targeting pids $target_pid matched by $target_pid_name"
  78. # schedule hard kill after global timeout
  79. is_global_shutdown=""
  80. (sleep ${global_timeout}; echo "triggering global shutdown" && is_global_shutdown="true" && graceful_shutdown $grace_period_seconds $target || true) &
  81. global_killer=${!}
  82. tail --pid=$target_pid -f /dev/null &
  83. child=$!
  84. wait "$child"
  85. if [ -z "$is_global_shutdown" ]; then
  86. # cancel hard kill timer
  87. sleep 0.1 && kill -9 ${global_killer} 2>/dev/null || true
  88. graceful_shutdown $grace_period_seconds $target
  89. fi
  90. else
  91. echo "no process could be targeted within 10s, initiating shutdown"
  92. if [ -n "$sidecar" ]; then
  93. echo "killing sidecar command: $sidecar"
  94. ./sidecar_killer.sh $sidecar
  95. fi
  96. fi