核心服务器上跑了一堆的脚本、程序,难免有时候会出现僵尸进程,死不死活不活的在那里占用资源,最初只是写了个根据关键字查杀进程的linux shell脚本,后来发现很多时候进程死在那里的时候其实是内部调用子进程的时候出现了问题,这时候光杀父进程根本没解决根本问题。比如说rsync的时候通过ssh来连接,rsync本身没问题,但可能ssh死掉了。因此重新写了脚本,递归查找子进程。
#!/bin/sh # 递归找到导致进程僵死的最底层子进程并杀除. ParentProcessID=$1; if [ "x${ParentProcessID}" = "x" ] ; then echo "Please Supply the top Parent Process ID to be killed!" echo "Usage:sh $0 PID [-v]" echo "PID The Parent Process ID as root" echo "-v is this argument supplied,no real kill operation will be performed,only process tree be show." exit 1 filet IsRealKillDo=1; if [ "x$2" = "x-v" ] ; then let IsRealKillDo=0; fi
echo "Begin Kill the Leaf Process of process ${ParentProcessID}" >&2
killpidList=""
function loopNextSubProcess(){ local nParentProcessID=$1 local tmpPidList="" tmpPidList=`ps -A --format='%p%PisParent' --width 2048 -w --sort pid|grep "${nParentProcessID}isParent"|grep -v grep|grep -v "$$" | awk '{ printf $1 }'` ps --format='%p%P%a' --width 2048 -w -p ${nParentProcessID}|grep -v grep|grep -v "$$" >&2 if [ "x${tmpPidList}" = "x" ] ; then echo "****Got One Leaf = [${nParentProcessID}]****" >&2 killpidList="${killpidList}n${nParentProcessID}" return fi
for theNextPid in ${tmpPidList} ; do loopNextSubProcess ${theNextPid} done }
loopNextSubProcess ${ParentProcessID}
if [ ${IsRealKillDo} -eq 1 -a "x${killpidList}" != "x" ] ; then for curpid in `echo -e ${killpidList}` ; do if [ "x${curpid}" != "x" ] ; then echo "kill -9 ${curpid}" kill -9 ${curpid} fi done else echo -e ${killpidList} fi