日韩无码专区无码一级三级片|91人人爱网站中日韩无码电影|厨房大战丰满熟妇|AV高清无码在线免费观看|另类AV日韩少妇熟女|中文日本大黄一级黄色片|色情在线视频免费|亚洲成人特黄a片|黄片wwwav色图欧美|欧亚乱色一区二区三区

RELATEED CONSULTING
相關(guān)咨詢
選擇下列產(chǎn)品馬上在線溝通
服務(wù)時(shí)間:8:30-17:00
你可能遇到了下面的問(wèn)題
關(guān)閉右側(cè)工具欄

新聞中心

這里有您想知道的互聯(lián)網(wǎng)營(yíng)銷解決方案
用GDB排查Python程序故障

某Team在用Python開(kāi)發(fā)一些代碼,涉及子進(jìn)程以及設(shè)法消除僵尸進(jìn)程的需求。實(shí)踐中他們碰上Python程序非預(yù)期退出的現(xiàn)象。最初他們決定 用GDB調(diào)試Python解釋器,查看exit()的源頭。我聽(tīng)了之后,覺(jué)得這個(gè)問(wèn)題應(yīng)該用別的調(diào)試思路。幫他們排查這次程序故障時(shí),除去原始問(wèn)題,還衍 生了其他問(wèn)題。

這次的問(wèn)題相比西安研發(fā)中心曾經(jīng)碰上的Python信號(hào)處理問(wèn)題,有不少基礎(chǔ)知識(shí)、先驗(yàn)知識(shí)是共用的,此處不做再普及,感興趣的同學(xué)可以翻看我以前發(fā)過(guò)的文章。

下文是一次具體的調(diào)試、分析記錄。為了簡(jiǎn)化現(xiàn)場(chǎng)、方便調(diào)試,已將原始問(wèn)題、衍生問(wèn)題濃縮成DebugPythonWithGDB_6.py、DebugPythonWithGDB_7.py。

$ vi DebugPythonWithGDB_6.py

PHP

#!/usr/bin/env python# -*- encoding: utf-8 -*-import sys, os, signal, subprocess, shlex, tracebackdef on_SIGCHLD ( signum, frame ) :print "[on_SIGCHLD"sys.stdout.write( "signum = %u\n" % signum )traceback.print_stack( frame )print os.waitpid( -1, os.WNOHANG )"""try :print os.waitpid( -1, os.WNOHANG )except OSError :sys.stdout.write( 'Line[%u]: OSError\n' % sys.exc_info()[2].tb_lineno )"""print "on_SIGCHLD]"def do_more ( count ) :print '[do_more() begin %u]' % countos.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )"""## 這里存在競(jìng)爭(zhēng)條件,可以增加觸發(fā)OSError異常的概率#os.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )os.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )os.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )os.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )"""print '[do_more() end %u]' % countdef main ( prog, args ) :if 0 == len( args ) :print 'Usage: %s ' % progelse :sys.stdout.write( "Parent = %u\n" % os.getpid() )## 本例中,即使有下列代碼,Ctrl-C仍然無(wú)效。#signal.signal( signal.SIGINT, signal.SIG_DFL )## signal.signal( signal.SIGCHLD, signal.SIG_IGN )#signal.signal( signal.SIGCHLD, on_SIGCHLD )#count = 0while True :## 本例中父進(jìn)程只是一個(gè)調(diào)度框架,不需要與子進(jìn)程進(jìn)行通信,因此不# 需要特別處理"stdin=None, stdout=None, stderr=None"。#child = subprocess.Popen \(## 不要直接用args[0].split(),它在處理單、雙引號(hào)時(shí)不是我們# 期望的行為。考慮這種例子,ls -l "/tmp/non exist"#shlex.split( args[0] ),## all file descriptors except 0, 1 and 2 will be closed# before the child process is executed#close_fds = True,cwd = "/tmp")sys.stdout.write( "Child = %u\n" % child.pid )## child.send_signal( signal.SIGTERM )# child.terminate()#child.kill()## child.wait()#do_more( count )count += 1if '__main__' == __name__ :try :main( os.path.basename( sys.argv[0] ), sys.argv[1:] )except KeyboardInterrupt :pass

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

#!/usr/bin/env python

# -*- encoding: utf-8 -*-

import sys, os, signal, subprocess, shlex, traceback

def on_SIGCHLD ( signum, frame ) :

    print "[on_SIGCHLD"

    sys.stdout.write( "signum  = %u\n" % signum )

    traceback.print_stack( frame )

    print os.waitpid( -1, os.WNOHANG )

    """

    try :

        print os.waitpid( -1, os.WNOHANG )

    except OSError :

        sys.stdout.write( 'Line[%u]: OSError\n' % sys.exc_info()[2].tb_lineno )

    """

    print "on_SIGCHLD]"

def do_more ( count ) :

    print '[do_more() begin %u]' % count

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

    """

    #

    # 這里存在競(jìng)爭(zhēng)條件,可以增加觸發(fā)OSError異常的概率

    #

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

    """

    print '[do_more() end %u]' % count

def main ( prog, args ) :

    if 0 == len( args ) :

        print 'Usage: %s ' % prog

    else :

        sys.stdout.write( "Parent  = %u\n" % os.getpid() )

        #

        # 本例中,即使有下列代碼,Ctrl-C仍然無(wú)效。

        #

        signal.signal( signal.SIGINT, signal.SIG_DFL )

        #

        # signal.signal( signal.SIGCHLD, signal.SIG_IGN )

        #

        signal.signal( signal.SIGCHLD, on_SIGCHLD )

        #

        count   = 0

        while True :

            #

            # 本例中父進(jìn)程只是一個(gè)調(diào)度框架,不需要與子進(jìn)程進(jìn)行通信,因此不

            # 需要特別處理"stdin=None, stdout=None, stderr=None"。

            #

            child   = subprocess.Popen  \

            (

                #

                # 不要直接用args[0].split(),它在處理單、雙引號(hào)時(shí)不是我們

                # 期望的行為。考慮這種例子,ls -l "/tmp/non exist"

                #

                shlex.split( args[0] ),

                #

                # all file descriptors except 0, 1 and 2 will be closed

                # before the child process is executed

                #

                close_fds   = True,

                cwd         = "/tmp"

            )

            sys.stdout.write( "Child   = %u\n" % child.pid )

            #

            # child.send_signal( signal.SIGTERM )

            # child.terminate()

            #

            child.kill()

            #

            # child.wait()

            #

            do_more( count )

            count  += 1

if '__main__' == __name__ :

    try :

        main( os.path.basename( sys.argv[0] ), sys.argv[1:] )

    except KeyboardInterrupt :

        pass

 

PHP

$ python DebugPythonWithGDB_6.py 'python -c "import time;time.sleep(3600)"'Parent = 10244Child = 10245[do_more() begin 0][on_SIGCHLDsignum = 17File "DebugPythonWithGDB_6.py", line 81, inmain( os.path.basename( sys.argv[0] ), sys.argv[1:] )File "DebugPythonWithGDB_6.py", line 76, in maindo_more( count )File "DebugPythonWithGDB_6.py", line 20, in do_moreprint '[do_more() begin %u]' % count(10245, 9)on_SIGCHLD]Child = 10246[on_SIGCHLDsignum = 17File "DebugPythonWithGDB_6.py", line 81, inmain( os.path.basename( sys.argv[0] ), sys.argv[1:] )File "DebugPythonWithGDB_6.py", line 76, in maindo_more( count )File "DebugPythonWithGDB_6.py", line 21, in do_moreos.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )Traceback (most recent call last):File "DebugPythonWithGDB_6.py", line 81, inmain( os.path.basename( sys.argv[0] ), sys.argv[1:] )File "DebugPythonWithGDB_6.py", line 76, in maindo_more( count )File "DebugPythonWithGDB_6.py", line 21, in do_moreos.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )File "DebugPythonWithGDB_6.py", line 10, in on_SIGCHLDprint os.waitpid( -1, os.WNOHANG )OSError: [Errno 10] No child processes

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

$ python DebugPythonWithGDB_6.py 'python -c "import time;time.sleep(3600)"'

Parent  = 10244

Child   = 10245

[do_more() begin 0]

[on_SIGCHLD

signum  = 17

  File "DebugPythonWithGDB_6.py", line 81, in

    main( os.path.basename( sys.argv[0] ), sys.argv[1:] )

  File "DebugPythonWithGDB_6.py", line 76, in main

    do_more( count )

  File "DebugPythonWithGDB_6.py", line 20, in do_more

    print '[do_more() begin %u]' % count

(10245, 9)

on_SIGCHLD]

Child   = 10246

[on_SIGCHLD

signum  = 17

  File "DebugPythonWithGDB_6.py", line 81, in

    main( os.path.basename( sys.argv[0] ), sys.argv[1:] )

  File "DebugPythonWithGDB_6.py", line 76, in main

    do_more( count )

  File "DebugPythonWithGDB_6.py", line 21, in do_more

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

Traceback (most recent call last):

  File "DebugPythonWithGDB_6.py", line 81, in

    main( os.path.basename( sys.argv[0] ), sys.argv[1:] )

  File "DebugPythonWithGDB_6.py", line 76, in main

    do_more( count )

  File "DebugPythonWithGDB_6.py", line 21, in do_more

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

  File "DebugPythonWithGDB_6.py", line 10, in on_SIGCHLD

    print os.waitpid( -1, os.WNOHANG )

OSError: [Errno 10] No child processes

流程進(jìn)入on_SIGCHLD(),但os.waitpid()拋出OSError異常。幫助里寫的是,如果系統(tǒng)調(diào)用 waitpid()返回-1,就拋出異常: An OSError is raised with the value of errno when the syscall returns -1. 10245號(hào)子進(jìn)程在on_SIGCHLD()里waitpid()成功,(10245, 9)中的9表示該進(jìn)程是被SIGKILL干掉的,符合預(yù)期。 10246號(hào)子進(jìn)程是do_more()里的os.system()產(chǎn)生的shell進(jìn)程,它結(jié)束時(shí)向10244號(hào)父進(jìn)程投遞了SIGCHLD信號(hào)。 on_SIGCHLD()里waitpid()時(shí),已經(jīng)在別處wait*()過(guò),10246號(hào)子進(jìn)程已經(jīng)徹底消失,系統(tǒng)調(diào)用waitpid()返回 -1,Python函數(shù)os.waitpid()拋出異常。 整個(gè)過(guò)程非常復(fù)雜,用偽代碼描述如下:

PHP

do_more()os.system()posix_system() // posixmodule.c__libc_system() // weak_alias (__libc_system, system)do_system() // sysdeps/posix/system.c/** SIG_IGN** Ctrl-C暫時(shí)失效*/sigaction( SIGINT, &sa, &intr )/** 屏蔽(阻塞)SIGCHLD信號(hào)*/sigaddset( &sa.sa_mask, SIGCHLD )sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )fork()子進(jìn)程(10246號(hào)子進(jìn)程)/** 恢復(fù)原有SIGINT信號(hào)處理方式*/sigaction( SIGINT, &intr, (struct sigaction *)NULL )/** 調(diào)用"sh -c ..."*/execve()[shell子進(jìn)程結(jié)束,向DebugPythonWithGDB_6.py投遞SIGCHLD][由于SIGCHLD信號(hào)已被屏蔽(阻塞),其保持在內(nèi)核態(tài)的未決信號(hào)鏈上]父進(jìn)程(10244號(hào)父進(jìn)程)/** 同步調(diào)用,會(huì)阻塞。不是在信號(hào)句柄中異步調(diào)用。** 10246號(hào)子進(jìn)程在此被wait*()回收后徹底消失*/waitpid( pid, &status, 0 )/** 恢復(fù)原有SIGINT信號(hào)處理方式*/sigaction( SIGINT, &intr, (struct sigaction *)NULL )/** 取消對(duì)SIGCHLD的屏蔽(阻塞)*/sigprocmask( SIG_SETMASK, &omask, (sigset_t *)NULL )[SIGCHLD信號(hào)的屏蔽(阻塞)被取消][DebugPythonWithGDB_6.py的C級(jí)信號(hào)句柄signal_handler()安排"延遲調(diào)用"后返回][DebugPythonWithGDB_6.py的on_SIGCHLD()此時(shí)并未得到執(zhí)行,因?yàn)閎uilt-in函數(shù)os.system()尚未返回]/** built-in函數(shù)os.system()返回后,10244號(hào)父進(jìn)程開(kāi)始處理"延遲調(diào)用",調(diào)用* Python級(jí)信號(hào)句柄。這個(gè)SIGCHLD信號(hào)是10246號(hào)子進(jìn)程投遞過(guò)來(lái)的。** DebugPythonWithGDB_6.py的on_SIGCHLD()得到執(zhí)行*/on_SIGCHLD()/** 調(diào)用waitpid( -1, &status, WNOHANG ),試圖處理10246號(hào)子進(jìn)程。** 10246號(hào)子進(jìn)程已為前述waitpid( pid, &status, 0 )所處理,此處系統(tǒng)調(diào)用* 返回-1,導(dǎo)致os.waitpid()拋出OSError異常。*/os.waitpid( -1, os.WNOHANG )

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

do_more()

    os.system()

        posix_system()          // posixmodule.c

            __libc_system()     // weak_alias (__libc_system, system)

                do_system()     // sysdeps/posix/system.c

                    /*

                     * SIG_IGN

                     *

                     * Ctrl-C暫時(shí)失效

                     */

                    sigaction( SIGINT, &sa, &intr )

                    /*

                     * 屏蔽(阻塞)SIGCHLD信號(hào)

                     */

                    sigaddset( &sa.sa_mask, SIGCHLD )

                    sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )

                    fork()

                        子進(jìn)程(10246號(hào)子進(jìn)程)

                            /*

                             * 恢復(fù)原有SIGINT信號(hào)處理方式

                             */

                            sigaction( SIGINT, &intr, (struct sigaction *)NULL )

                            /*

                             * 調(diào)用"sh -c ..."

                             */

                            execve()

                            [shell子進(jìn)程結(jié)束,向DebugPythonWithGDB_6.py投遞SIGCHLD]

                            [由于SIGCHLD信號(hào)已被屏蔽(阻塞),其保持在內(nèi)核態(tài)的未決信號(hào)鏈上]

                        父進(jìn)程(10244號(hào)父進(jìn)程)

                            /*

                             * 同步調(diào)用,會(huì)阻塞。不是在信號(hào)句柄中異步調(diào)用。

                             *

                             * 10246號(hào)子進(jìn)程在此被wait*()回收后徹底消失

                             */

                            waitpid( pid, &status, 0 )

                            /*

                             * 恢復(fù)原有SIGINT信號(hào)處理方式

                             */

                            sigaction( SIGINT, &intr, (struct sigaction *)NULL )

                            /*

                             * 取消對(duì)SIGCHLD的屏蔽(阻塞)

                             */

                            sigprocmask( SIG_SETMASK, &omask, (sigset_t *)NULL )

                            [SIGCHLD信號(hào)的屏蔽(阻塞)被取消]

                            [DebugPythonWithGDB_6.py的C級(jí)信號(hào)句柄signal_handler()安排"延遲調(diào)用"后返回]

                            [DebugPythonWithGDB_6.py的on_SIGCHLD()此時(shí)并未得到執(zhí)行,因?yàn)閎uilt-in函數(shù)os.system()尚未返回]

/*

* built-in函數(shù)os.system()返回后,10244號(hào)父進(jìn)程開(kāi)始處理"延遲調(diào)用",調(diào)用

* Python級(jí)信號(hào)句柄。這個(gè)SIGCHLD信號(hào)是10246號(hào)子進(jìn)程投遞過(guò)來(lái)的。

*

* DebugPythonWithGDB_6.py的on_SIGCHLD()得到執(zhí)行

*/

on_SIGCHLD()

    /*

     * 調(diào)用waitpid( -1, &status, WNOHANG ),試圖處理10246號(hào)子進(jìn)程。

     *

     * 10246號(hào)子進(jìn)程已為前述waitpid( pid, &status, 0 )所處理,此處系統(tǒng)調(diào)用

     * 返回-1,導(dǎo)致os.waitpid()拋出OSError異常。

     */

    os.waitpid( -1, os.WNOHANG )

整個(gè)過(guò)程之所以如此復(fù)雜,主要是因?yàn)镻ython的信號(hào)處理機(jī)制比較復(fù)雜,讓已經(jīng)非常復(fù)雜的Linux信號(hào)機(jī)制再添變數(shù)。參看:

PHP

《2.50 對(duì)Python解釋器進(jìn)行調(diào)試》《22.0 Linux信號(hào)機(jī)制》

1

2

《2.50 對(duì)Python解釋器進(jìn)行調(diào)試》

《22.0 Linux信號(hào)機(jī)制》

就本例而言,為了確保DebugPythonWithGDB_6.py不因OSError異常而終止,只需在on_SIGCHLD()中調(diào)用os.waitpid()時(shí)捕捉OSError異常:

PHP

def on_SIGCHLD ( signum, frame ) :try :print os.waitpid( -1, os.WNOHANG )except OSError :sys.stdout.write( 'Line[%u]: OSError\n' % sys.exc_info()[2].tb_lineno )

1

2

3

4

5

def on_SIGCHLD ( signum, frame ) :

    try :

        print os.waitpid( -1, os.WNOHANG )

    except OSError :

        sys.stdout.write( 'Line[%u]: OSError\n' % sys.exc_info()[2].tb_lineno )

前述觀點(diǎn)有些是動(dòng)態(tài)調(diào)試得到,有些是靜態(tài)分析得到。有人可能問(wèn)了,為什么不攔截Python進(jìn)程的C級(jí)信號(hào)句柄,查看SIGCHLD 信號(hào)源,以此確認(rèn)10246號(hào)子進(jìn)程可能被回收兩次?其實(shí)我最初也想這么干來(lái)著,但這是行不通的,因?yàn)镻ython的C級(jí)信號(hào)句柄 signal_handler()是那種最原始的單形參信號(hào)句柄,不是高大上的三形參信號(hào)句柄。 用GDB調(diào)試Python解釋器:

PHP

# gdb -q -ex "b *signal_handler" -ex r --args /usr/bin/python2.7-dbg DebugPythonWithGDB_6.py '/usr/bin/python2.7-dbg -c "import time;time.sleep(3600)"'...Breakpoint 1 at 0x8216f2d: file ../Modules/signalmodule.c, line 185.Starting program: /usr/bin/python2.7-dbg DebugPythonWithGDB_6.py /usr/bin/python2.7-dbg\ -c\ \"import\ time\;time.sleep\(3600\)\"[Thread debugging using libthread_db enabled]Using host libthread_db library "/lib/i386-linux-gnu/i686/cmov/libthread_db.so.1".Parent = 10284Child = 10288[do_more() begin 0]Child = 10289Breakpoint 1, signal_handler (sig_num=17) at ../Modules/signalmodule.c:185185 {(gdb) py-bt#10 Frame 0xb7c20034, for file DebugPythonWithGDB_6.py, line 21, in do_more (count=0)os.system( r'printf "Child = %u\n" $$;/bin/sleep 1' )#13 Frame 0xb7cb37dc, for file DebugPythonWithGDB_6.py, line 76, in main (prog='DebugPythonWithGDB_6.py', args=['/usr/bin/python2.7-dbg -c "import time;time.sleep(3600)"'], count=0, child=)do_more( count )#16 Frame 0xb7cbe49c, for file DebugPythonWithGDB_6.py, line 81, in ()main( os.path.basename( sys.argv[0] ), sys.argv[1:] )(gdb) bt 7#0 signal_handler (sig_num=17) at ../Modules/signalmodule.c:185#1#2 0xb7fdcd3c in __kernel_vsyscall ()#3 0xb7db25eb in __sigprocmask (how=how@entry=2, set=0x0, set@entry=0xbffff0d4, oset=oset@entry=0x0) at ../sysdeps/unix/sysv/linux/sigprocmask.c:57#4 0xb7dc2084 in do_system (line=line@entry=0xb7cbf9e4 "printf \"Child = %u\\n\" $$;/bin/sleep 1") at ../sysdeps/posix/system.c:161#5 0xb7dc2380 in __libc_system (line=line@entry=0xb7cbf9e4 "printf \"Child = %u\\n\" $$;/bin/sleep 1") at ../sysdeps/posix/system.c:184#6 0xb7fa9bfb in system (line=0xb7cbf9e4 "printf \"Child = %u\\n\" $$;/bin/sleep 1") at pt-system.c:28(More stack frames follow...)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

# gdb -q -ex "b *signal_handler" -ex r --args /usr/bin/python2.7-dbg DebugPythonWithGDB_6.py '/usr/bin/python2.7-dbg -c "import time;time.sleep(3600)"'

...

Breakpoint 1 at 0x8216f2d: file ../Modules/signalmodule.c, line 185.

Starting program: /usr/bin/python2.7-dbg DebugPythonWithGDB_6.py /usr/bin/python2.7-dbg\ -c\ \"import\ time\;time.sleep\(3600\)\"

[Thread debugging using libthread_db enabled]

Using host libthread_db library "/lib/i386-linux-gnu/i686/cmov/libthread_db.so.1".

Parent  = 10284

Child   = 10288

[do_more() begin 0]

Child   = 10289

Breakpoint 1, signal_handler (sig_num=17) at ../Modules/signalmodule.c:185

185     {

(gdb) py-bt

#10 Frame 0xb7c20034, for file DebugPythonWithGDB_6.py, line 21, in do_more (count=0)

    os.system( r'printf "Child   = %u\n" $$;/bin/sleep 1' )

#13 Frame 0xb7cb37dc, for file DebugPythonWithGDB_6.py, line 76, in main (prog='DebugPythonWithGDB_6.py', args=['/usr/bin/python2.7-dbg -c "import time;time.sleep(3600)"'], count=0, child=)

    do_more( count )

#16 Frame 0xb7cbe49c, for file DebugPythonWithGDB_6.py, line 81, in  ()

    main( os.path.basename( sys.argv[0] ), sys.argv[1:] )

(gdb) bt 7

#0  signal_handler (sig_num=17) at ../Modules/signalmodule.c:185

#1  

#2  0xb7fdcd3c in __kernel_vsyscall ()

#3  0xb7db25eb in __sigprocmask (how=how@entry=2, set=0x0, set@entry=0xbffff0d4, oset=oset@entry=0x0) at ../sysdeps/unix/sysv/linux/sigprocmask.c:57

#4  0xb7dc2084 in do_system (line=line@entry=0xb7cbf9e4 "printf \"Child   = %u\\n\" $$;/bin/sleep 1") at ../sysdeps/posix/system.c:161

#5  0xb7dc2380 in __libc_system (line=line@entry=0xb7cbf9e4 "printf \"Child   = %u\\n\" $$;/bin/sleep 1") at ../sysdeps/posix/system.c:184

#6  0xb7fa9bfb in system (line=0xb7cbf9e4 "printf \"Child   = %u\\n\" $$;/bin/sleep 1") at pt-system.c:28

(More stack frames follow...)

查看#4的system.c:161,這個(gè)位置已經(jīng)在waitpid( pid, &status, 0 )之后: sigprocmask( SIG_SETMASK, &omask, (sigset_t *)NULL ) 其作用是取消對(duì)SIGCHLD的屏蔽(阻塞)。 此時(shí)內(nèi)存布局如下:

PHP

內(nèi)存高址方向fpstate // ESP+0x2DC output/x *(struct _fpstate *)($esp+0x2dc)retcode // ESP+0x2D4 x/3i $esp+0x2d4extramask // ESP+0x2D0 x/1wx $esp+0x2d0fpstate_unused // ESP+0x60 output/x *(struct _fpstate *)($esp+0x60)sigcontext_ia32 // ESP+8 output/x *(struct sigcontext *)($esp+8)sig // ESP+4 信號(hào)值,信號(hào)句柄***形參pretcode // ESP RetAddr=__kernel_sigreturn// hexdump $esp 0x2dc內(nèi)存低址方向

1

2

3

4

5

6

7

8

9

10

11

12

內(nèi)存高址方向

fpstate         // ESP+0x2DC output/x *(struct _fpstate *)($esp+0x2dc)

retcode         // ESP+0x2D4 x/3i $esp+0x2d4

extramask       // ESP+0x2D0 x/1wx $esp+0x2d0

fpstate_unused  // ESP+0x60 output/x *(struct _fpstate *)($esp+0x60)

sigcontext_ia32 // ESP+8 output/x *(struct sigcontext *)($esp+8)

sig             // ESP+4 信號(hào)值,信號(hào)句柄***形參

pretcode        // ESP RetAddr=__kernel_sigreturn

                // hexdump $esp 0x2dc

內(nèi)存低址方向

 

PHP

(gdb) x/2wa $esp0xbfffea6c: 0xb7fdcd18 0x11(gdb) x/3i $esp+0x2d40xbfffed40: pop eax0xbfffed41: mov eax,0x770xbfffed46: int 0x80(gdb) output/x *(struct sigcontext *)($esp+8){gs = 0x33,__gsh = 0x0,fs = 0x0,__fsh = 0x0,es = 0x7b,__esh = 0x0,ds = 0x7b,__dsh = 0x0,edi = 0xb7f2a000,esi = 0x8,ebp = 0x1,esp = 0xbfffeff0,ebx = 0x2,edx = 0x0,ecx = 0xbffff0d4,eax = 0x0,trapno = 0x1,err = 0x0,eip = 0xb7fdcd3c,cs = 0x73,__csh = 0x0,eflags = 0x246,esp_at_signal = 0xbfffeff0,ss = 0x7b,__ssh = 0x0,fpstate = 0xbfffed50,oldmask = 0x0,cr2 = 0x0}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

(gdb) x/2wa $esp

0xbfffea6c:     0xb7fdcd18  0x11

(gdb) x/3i $esp+0x2d4

   0xbfffed40:  pop    eax

   0xbfffed41:  mov    eax,0x77

   0xbfffed46:  int    0x80

(gdb) output/x *(struct sigcontext *)($esp+8)

{

  gs = 0x33,

  __gsh = 0x0,

  fs = 0x0,

  __fsh = 0x0,

  es = 0x7b,

  __esh = 0x0,

  ds = 0x7b,

  __dsh = 0x0,

  edi = 0xb7f2a000,

  esi = 0x8,

  ebp = 0x1,

  esp = 0xbfffeff0,

  ebx = 0x2,

  edx = 0x0,

  ecx = 0xbffff0d4,

  eax = 0x0,

  trapno = 0x1,

  err = 0x0,

  eip = 0xb7fdcd3c,

  cs = 0x73,

  __csh = 0x0,

  eflags = 0x246,

  esp_at_signal = 0xbfffeff0,

  ss = 0x7b,

  __ssh = 0x0,

  fpstate = 0xbfffed50,

  oldmask = 0x0,

  cr2 = 0x0

}

因?yàn)槭菃涡螀⑿盘?hào)句柄,沒(méi)有siginfo,無(wú)法在用戶態(tài)獲知信號(hào)源。但我分析此時(shí)的信號(hào)源不是10289號(hào)子進(jìn)程,而是10288 號(hào)子進(jìn)程。10288產(chǎn)生SIGCHLD時(shí),SIGCHLD信號(hào)已被屏蔽(阻塞),只能保持在內(nèi)核態(tài)的未決信號(hào)鏈上。之后待10289產(chǎn)生SIGCHLD 時(shí),sigpending.signal中相應(yīng)位已經(jīng)置位,10289產(chǎn)生的SIGCHLD被丟棄,不會(huì)進(jìn)入內(nèi)核態(tài)的未決信號(hào)鏈。SIGCHLD信號(hào)的屏 蔽(阻塞)被取消后,從內(nèi)核態(tài)的未決信號(hào)鏈上取出10288產(chǎn)生的SIGCHLD進(jìn)行處理。于是斷點(diǎn)***。 如果完全理解了前述實(shí)驗(yàn)結(jié)果及分析,就會(huì)發(fā)現(xiàn)DebugPythonWithGDB_6.py存在競(jìng)爭(zhēng)條件。subprocess.Popen()對(duì)應(yīng)的 子進(jìn)程投遞SIGCHLD信號(hào)時(shí),父進(jìn)程有兩種可能:

PHP

1) os.system()調(diào)用sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )之前2) os.system()調(diào)用sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )之后

1

2

1) os.system()調(diào)用sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )之前

2) os.system()調(diào)用sigprocmask( SIG_BLOCK, &sa.sa_mask, &omask )之后

情況1)會(huì)觸發(fā)OSError異常,情況2)不會(huì)觸發(fā)OSError異常。執(zhí)行: $ python DebugPythonWithGDB_6.py ‘python -c “import time;time.sleep(3600)”‘ 有時(shí)會(huì)因OSError異常而終止,有時(shí)就一直循環(huán)執(zhí)行下去。出現(xiàn)這種差異,正是競(jìng)爭(zhēng)環(huán)境的表征。 小結(jié)一下: 假設(shè)針對(duì)SIGCHLD安裝了Python級(jí)信號(hào)句柄,其調(diào)用os.waitpid( -1, os.WNOHANG )回收子進(jìn)程。如果別處會(huì)調(diào)用os.system(),則必須在os.waitpid()外側(cè)捕捉OSError異常。不建議這種方式的混用。 對(duì)waitpid()的分析到此就結(jié)束了,說(shuō)點(diǎn)調(diào)試過(guò)程中出現(xiàn)的其他問(wèn)題。 意外地發(fā)現(xiàn)Ctrl-C無(wú)法終止情況2),而我已經(jīng)調(diào)用: signal.signal( signal.SIGINT, signal.SIG_DFL ) 這是因?yàn)閐o_system()中一上來(lái)就調(diào)用了:

PHP

sa.sa_handler = SIG_IGN;sigaction( SIGINT, &sa, &intr );

1

2

sa.sa_handler   = SIG_IGN;

sigaction( SIGINT, &sa, &intr );

導(dǎo)致Ctrl-C暫時(shí)失效,直至do_system()結(jié)束。假設(shè)DebugPythonWithGDB_6.py已經(jīng)出 現(xiàn)情況2),查看它的信號(hào)處理方式:

PHP

# ps auwx | grep pythonroot 10355 0.0 0.5 8116 5812 pts/0 S+ 15:57 0:00 python DebugPythonWithGDB_6.py python -c "import time;time.sleep(3600)"root 10389 0.0 0.0 0 0 pts/0 Z+ 15:57 0:00 [python]root 10393 0.0 0.0 2936 852 pts/1 R+ 15:57 0:00 grep python# stap -DMAXACTION=10000 -g /usr/share/doc/systemtap-doc/examples/process/psig.stp -x 1035510355: pythonHUP defaultINT ignored // 不是預(yù)期的defaultQUIT ignoredILL defaultTRAP defaultABRT defaultBUS defaultFPE defaultKILL defaultUSR1 defaultSEGV defaultUSR2 defaultPIPE ignoredALRM defaultTERM defaultSTKFLT defaultCHLD blocked,caught 0x818a480 0...

          1

          2

          3

          4

          5

          6

          7

          8

          9

          10

          11

          12

          13

          14

          15

          16

          17

          18

          19

          20

          21

          22

          23

          24

          # ps auwx | grep python

          root     10355  0.0  0.5   8116  5812 pts/0    S+   15:57   0:00 python DebugPythonWithGDB_6.py python -c "import time;time.sleep(3600)"

          root     10389  0.0  0.0      0 新聞標(biāo)題:用GDB排查Python程序故障
          當(dāng)前鏈接:http://www.5511xx.com/article/cdeosso.html