首页
社区
课程
招聘
编译器角度下学习c/c++
2021-3-26 14:42 3238

编译器角度下学习c/c++

2021-3-26 14:42
3238

借看学宝地记录一下调试gcc/g++ parser的方法:
1.下载源代码:https://gcc.gnu.org/mirrors.html
2.配置编译参数:

1
./configure --enable-languages=c,c++ --program-suffix=-10.2 --program-prefix=x86_64-linux-gnu- --build=x86_64-linux-gnu --disable-multilib --enable-lto --enable-pugin --prefix=/usr

3.编译:

1
make  && make install

4.gdb 调试编译器
c++代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#include <iostream>
struct B {
    virtual void f(int) { std::cout << "B::f\n"; }
    void g(char)        { std::cout << "B::g\n"; }
    void h(int)         { std::cout << "B::h\n"; }
 protected:
    int m; // B::m is protected
    typedef int value_type;
};
 
struct D : B {
    using B::m; // D::m is public
    using B::value_type; // D::value_type is public
 
    using B::f;
    void f(int) { std::cout << "D::f\n"; } // D::f(int) overrides B::f(int)
    using B::g;
    void g(int) { std::cout << "D::g\n"; } // both g(int) and g(char) are visible
                                           // as members of D
    using B::h;
    void h(int) { std::cout << "D::h\n"; } // D::h(int) hides B::h(int)
};
 
int main()
{
    D d;
    B& b = d;
 
//    b.m = 2; // error, B::m is protected
    d.m = 1; // protected B::m is accessible as public D::m
    b.f(1); // calls derived f()
    d.f(1); // calls derived f()
    d.g(1); // calls derived g(int)
    d.g('a'); // calls base g(char)
    b.h(1); // calls base h()
    d.h(1); // calls derived h()
}

启动调试器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# gdb g++
GNU gdb (GDB) 8.3.1
Copyright (C) 2019 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
    <http://www.gnu.org/software/gdb/documentation/>.
 
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from g++...
(gdb) set follow-fork-mode child
(gdb) set args -g using.cpp
(gdb) b cpp_push_buffer(cpp_reader*, unsigned char const*, unsigned long, int)
Breakpoint 1 at 0x447d60: file ../../libcpp/directives.c, line 2574.
(gdb) r
Starting program: /usr/local/bin/g++ -g using.cpp
[Attaching after process 2969 vfork to child process 2973]
[New inferior 2 (process 2973)]
[Detaching vfork parent process 2969 after child exec]
[Inferior 1 (process 2969) detached]
process 2973 is executing new program: /usr/libexec/gcc/x86_64-linux-gnu/10.1.0/cc1plus
[Switching to process 2973]
 
Thread 2.1 "cc1plus" hit Breakpoint 1, cpp_push_buffer (pfile=pfile@entry=0x21defb0,
    buffer=0x223a5c0 "#include <iostream>\r\nstruct B {\r\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:"..., len=1090, from_stage3=0) at ../../libcpp/directives.c:2574
2574      cpp_buffer *new_buffer = XOBNEW (&pfile->buffer_ob, cpp_buffer);
(gdb) finish
Run till exit from #0  cpp_push_buffer (pfile=pfile@entry=0x21defb0,
    buffer=0x223a5c0 "#include <iostream>\r\nstruct B {\r\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:"..., len=1090, from_stage3=0) at ../../libcpp/directives.c:2574
_cpp_stack_file (pfile=pfile@entry=0x21defb0, file=0x223a490, type=type@entry=IT_MAIN, loc=loc@entry=0) at ../../libcpp/files.c:935
935      buffer->file = file;
Value returned is $1 = (cpp_buffer *) 0x21df560
(gdb) p parse_in.buffer.cur
$2 = (const unsigned char *) 0x0
(gdb) p parse_in.buffer.buf
$3 = (const unsigned char *) 0x223a5c0 "#include <iostream>\r\nstruct B {\r\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:"...
(gdb)

gcc有个全局变量parse_in可以查看当前处理的文件。而cpp_push_buffer()函数是用来切换parse_in处理的buffer。
通过下面操作就可以调试gcc读取源文件生成token,进入进入parser

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
(gdb) p &parse_in.buffer.cur
$4 = (const unsigned char **) 0x21df560
(gdb) watch *(int *)0x21df560
Hardware watchpoint 2: *(int *)0x21df560
(gdb) commands
Type commands for breakpoint(s) 2, one per line.
End with a line saying just "end".
>p parse_in.buffer.cur
>end
(gdb) c
Thread 2.1 "cc1plus" hit Hardware watchpoint 2: *(int *)0x21df560
 
Old value = 35890646
New value = 35890651
lex_identifier (pfile=pfile@entry=0x21defb0,
    base=0x223a5d5 "struct B {\n\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:\r\n    int m; // B::m "..., starts_ucn=starts_ucn@entry=false, nst=nst@entry=0x7fffffffdf14, spelling=spelling@entry=0x2231ee0) at ../../libcpp/lex.c:1475
1475      if (starts_ucn || forms_identifier_p (pfile, false, nst))
$17 = (const unsigned char *) 0x223a5db " B {\n\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:\r\n    int m; // B::m is pro"...
(gdb) bt
#0  lex_identifier (pfile=pfile@entry=0x21defb0,
    base=0x223a5d5 "struct B {\n\n    virtual void f(int) { std::cout << \"B::f\\n\"; }\r\n    void g(char)        { std::cout << \"B::g\\n\"; }\r\n    void h(int)         { std::cout << \"B::h\\n\"; }\r\n protected:\r\n    int m; // B::m "..., starts_ucn=starts_ucn@entry=false, nst=nst@entry=0x7fffffffdf14, spelling=spelling@entry=0x2231ee0) at ../../libcpp/lex.c:1475
#1  0x000000000154a377 in _cpp_lex_direct (pfile=pfile@entry=0x21defb0) at ../../libcpp/lex.c:2848
#2  0x000000000154bb69 in _cpp_lex_token (pfile=pfile@entry=0x21defb0) at ../../libcpp/lex.c:2608
#3  0x0000000001551468 in cpp_get_token_1 (pfile=0x21defb0, location=location@entry=0x7fffffffe114) at ../../libcpp/macro.c:2808
#4  0x0000000001552d35 in cpp_get_token_with_location (pfile=<optimized out>, loc=loc@entry=0x7fffffffe114) at ../../libcpp/macro.c:2994
#5  0x00000000007f40df in c_lex_with_flags (value=value@entry=0x7fffffffe118, loc=loc@entry=0x7fffffffe114, cpp_flags=cpp_flags@entry=0x7fffffffe112 "@\360`j\240\020", lex_flags=lex_flags@entry=2)
    at ../../gcc/c-family/c-lex.c:458
#6  0x00000000006f462f in cp_lexer_get_preprocessor_token (lexer=lexer@entry=0x7ffff070c0a8, token=token@entry=0x7fffffffe110) at ../../gcc/cp/parser.c:808
#7  0x0000000000732bf4 in cp_lexer_new_main () at ../../gcc/cp/parser.c:656
#8  cp_parser_new () at ../../gcc/cp/parser.c:3981
#9  c_parse_file () at ../../gcc/cp/parser.c:43972
#10 0x00000000007fb25c in c_common_parse_file () at ../../gcc/c-family/c-opts.c:1190
#11 0x0000000000c4e004 in compile_file () at ../../gcc/toplev.c:458
#12 0x000000000062763c in do_compile () at ../../gcc/toplev.c:2274
#13 toplev::main (this=this@entry=0x7fffffffe21e, argc=<optimized out>, argc@entry=14, argv=<optimized out>, argv@entry=0x7fffffffe318) at ../../gcc/toplev.c:2413
#14 0x000000000062b09f in main (argc=14, argv=0x7fffffffe318) at ../../gcc/main.c:39

g++和gcc parser在处理token的时候不一样,c语言是由parser来驱动读取token,而c++是将所有token全部预处理完后在继续调用parser。c语言是用上面的方法是可以直接看到parser的。c++还要用下面的方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
(gdb) frame 7
#7  0x0000000000732bf4 in cp_lexer_new_main () at ../../gcc/cp/parser.c:656
656          cp_lexer_get_preprocessor_token (lexer, &token);
(gdb) l
651      lexer->buffer->quick_push (token);
652   
653      /* Get the remaining tokens from the preprocessor.  */
654      while (token.type != CPP_EOF)
655        {
656          cp_lexer_get_preprocessor_token (lexer, &token);
657          vec_safe_push (lexer->buffer, token);
658        }
659   
660      lexer->next_token = lexer->buffer->address ();
(gdb) b 657
Breakpoint 3 at 0x732bf4: file ../../gcc/cp/parser.c, line 657.
(gdb) c
Continuing.
 
Thread 2.1 "cc1plus" hit Breakpoint 3, vec_safe_reserve<cp_token, va_gc> (exact=false, nelems=1, v=@0x7ffff070c0a8: 0x7ffff04a1000) at ../../gcc/cp/parser.c:657
657          vec_safe_push (lexer->buffer, token);
Thread 2.1 "cc1plus" hit Breakpoint 3, vec_safe_reserve<cp_token, va_gc> (exact=false, nelems=1, v=@0x7ffff070c0a8: 0x7ffff04a1000) at ../../gcc/cp/parser.c:657
657          vec_safe_push (lexer->buffer, token);
(gdb) n
656          cp_lexer_get_preprocessor_token (lexer, &token);
(gdb) p lexer->buffer.m_vecpfx
$22 = {m_alloc = 131071, m_using_auto_storage = 0, m_num = 124274}
(gdb) p lexer->buffer.m_vecdata[124273]
$23 = {type = CPP_NAME, keyword = RID_MAX, flags = 1 '\001', implicit_extern_c = false, error_reported = false, purged_p = false, tree_check_p = false, location = 278957888, u = {
    tree_check_value = 0x7ffff02d1540, value = 0x7ffff02d1540}}
(gdb) p lexer->buffer.m_vecdata[124273].u .value .identifier .id
$24 = {str = 0x7ffff02d2170 "B", len = 1, hash_value = 4294967250}

上面是可以获取每个token所在位置。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
(gdb) p &lexer->buffer.m_vecdata[124273]
$26 = (cp_token *) 0x7ffff0686718
(gdb) awatch *(int *)0x7ffff0686718
Hardware access (read/write) watchpoint 4: *(int *)0x7ffff0686718
(gdb) c
Continuing.
 
Thread 2.1 "cc1plus" hit Hardware access (read/write) watchpoint 4: *(int *)0x7ffff0686718
 
Value = -268313290
0x00000000007326f8 in cp_lexer_peek_nth_token (n=60, lexer=<optimized out>) at ../../gcc/cp/parser.c:1095
1095          if (!token->purged_p)
(gdb) bt
#0  0x00000000007326f8 in cp_lexer_peek_nth_token (n=60, lexer=<optimized out>) at ../../gcc/cp/parser.c:1095
#1  cp_parser_declaration (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:13356
#2  0x0000000000732dd5 in cp_parser_translation_unit (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:4734
#3  c_parse_file () at ../../gcc/cp/parser.c:43975
#4  0x00000000007fb25c in c_common_parse_file () at ../../gcc/c-family/c-opts.c:1190
#5  0x0000000000c4e004 in compile_file () at ../../gcc/toplev.c:458
#6  0x000000000062763c in do_compile () at ../../gcc/toplev.c:2274
#7  toplev::main (this=this@entry=0x7fffffffe21e, argc=<optimized out>, argc@entry=14, argv=<optimized out>, argv@entry=0x7fffffffe318) at ../../gcc/toplev.c:2413
#8  0x000000000062b09f in main (argc=14, argv=0x7fffffffe318) at ../../gcc/main.c:39

可以通过上面的方法来c++ parser来学习c++了。
二。也可以通过下如下断点来调试g++获取下一个token

1
2
3
4
5
6
7
8
(gdb) frame 2
#2  0x0000000000732dd5 in cp_parser_translation_unit (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:4734
4734        cp_parser_toplevel_declaration (parser);
(gdb) p parser->lexer->next_token
$27 = (cp_token_position) 0x7ffff0686708
(gdb) p &parser->lexer->next_token
$28 = (cp_token_position *) 0x7ffff070c0b8
(gdb) watch *(int *)0x7ffff070c0b8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
(gdb) c
Continuing.
 
Thread 2.1 "cc1plus" hit Hardware watchpoint 5: *(int *)0x7ffff070c0b8
 
Old value = -261593320
New value = -261593304
cp_lexer_consume_token (lexer=0x7ffff070c0a8) at ../../gcc/cp/parser.c:1123
1123      while (lexer->next_token->purged_p);
(gdb) bt
#0  cp_lexer_consume_token (lexer=0x7ffff070c0a8) at ../../gcc/cp/parser.c:1123
#1  0x00000000006f91da in cp_parser_require (matching_location=0, token_desc=RT_NAME, type=CPP_NAME, parser=<optimized out>) at ../../gcc/cp/parser.c:30695
#2  cp_parser_identifier (parser=<optimized out>) at ../../gcc/cp/parser.c:4105
#3  0x000000000070a790 in cp_parser_class_head (nested_name_specifier_p=<synthetic pointer>, parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:24347
#4  cp_parser_class_specifier_1 (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:23811
#5  0x000000000070af34 in cp_parser_class_specifier (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:24180
#6  cp_parser_type_specifier (parser=0x7ffff072fa18, flags=<optimized out>, decl_specs=0x7fffffffde90, is_declaration=<optimized out>, declares_class_or_enum=0x7fffffffdd40, is_cv_qualifier=<optimized out>)
    at ../../gcc/cp/parser.c:17711
#7  0x000000000070bea7 in cp_parser_decl_specifier_seq (parser=0x7ffff072fa18, flags=1, decl_specs=0x7fffffffde90, declares_class_or_enum=0x7fffffffde78) at ../../gcc/cp/parser.c:14358
#8  0x000000000070c882 in cp_parser_simple_declaration (parser=0x7ffff072fa18, function_definition_allowed_p=true, maybe_range_for_decl=0x0) at ../../gcc/cp/parser.c:13613
#9  0x00000000007326b3 in cp_parser_declaration (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:13433
#10 0x0000000000732dd5 in cp_parser_translation_unit (parser=0x7ffff072fa18) at ../../gcc/cp/parser.c:4734
#11 c_parse_file () at ../../gcc/cp/parser.c:43975
#12 0x00000000007fb25c in c_common_parse_file () at ../../gcc/c-family/c-opts.c:1190
#13 0x0000000000c4e004 in compile_file () at ../../gcc/toplev.c:458
#14 0x000000000062763c in do_compile () at ../../gcc/toplev.c:2274
#15 toplev::main (this=this@entry=0x7fffffffe21e, argc=<optimized out>, argc@entry=14, argv=<optimized out>, argv@entry=0x7fffffffe318) at ../../gcc/toplev.c:2413
#16 0x000000000062b09f in main (argc=14, argv=0x7fffffffe318) at ../../gcc/main.c:39

下面是调试生成汇编的方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
(gdb) b print_reg
Breakpoint 6 at 0xf03940: file ../../gcc/config/i386/i386.c, line 12327.
(gdb) c
Continuing.
 
Thread 2.1 "cc1plus" hit Breakpoint 6, print_reg (x=0x7ffff07003c0, code=0, file=0x224fa80) at ../../gcc/config/i386/i386.c:12327
12327      if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
(gdb) bt
#0  print_reg (x=0x7ffff07003c0, code=0, file=0x224fa80) at ../../gcc/config/i386/i386.c:12327
#1  0x000000000096a822 in output_operand (x=<optimized out>, code=<optimized out>) at ../../gcc/final.c:4051
#2  0x000000000096b35e in output_asm_insn (templ=templ@entry=0x1be255c "push{q}\t%1", operands=0x211efe0 <recog_data>) at ../../gcc/final.c:3963
#3  0x000000000096ea1a in final_scan_insn_1 (insn=0x7fffeeb87280, file=0x224fa80, seen=<optimized out>, nopeepholes=<optimized out>, optimize_p=<optimized out>) at ../../gcc/final.c:3106
#4  0x000000000096eccc in final_scan_insn (insn=<optimized out>, file=<optimized out>, optimize_p=<optimized out>, nopeepholes=<optimized out>, seen=<optimized out>) at ../../gcc/final.c:3152
#5  0x000000000096efb5 in final_1 (first=0x7fffeeb156c8, file=0x224fa80, seen=<optimized out>, optimize_p=0) at ../../gcc/final.c:2020
#6  0x000000000096f745 in rest_of_handle_final () at ../../gcc/final.c:4658
#7  (anonymous namespace)::pass_final::execute (this=<optimized out>) at ../../gcc/final.c:4736
#8  0x0000000000b924e8 in execute_one_pass (pass=0x2213200) at ../../gcc/passes.c:2502
#9  0x0000000000b92e30 in execute_pass_list_1 (pass=0x2213200) at ../../gcc/passes.c:2590
#10 0x0000000000b92e42 in execute_pass_list_1 (pass=0x2212d20) at ../../gcc/passes.c:2591
#11 0x0000000000b92e42 in execute_pass_list_1 (pass=0x2211080) at ../../gcc/passes.c:2591
#12 0x0000000000b92e69 in execute_pass_list (fn=0x7fffeeedea50, pass=<optimized out>) at ../../gcc/passes.c:2601
#13 0x000000000088d41e in cgraph_node::expand (this=0x7fffeef6f870) at ../../gcc/context.h:48
#14 0x000000000088e514 in output_in_order () at ../../gcc/cgraphunit.c:2667
#15 symbol_table::compile (this=0x7ffff0703100) at ../../gcc/cgraphunit.c:2908
#16 0x000000000089052d in symbol_table::compile (this=0x7ffff0703100) at ../../gcc/cgraphunit.c:3091
#17 symbol_table::finalize_compilation_unit (this=0x7ffff0703100) at ../../gcc/cgraphunit.c:3091
#18 0x0000000000c4e24f in compile_file () at ../../gcc/toplev.c:483
#19 0x000000000062763c in do_compile () at ../../gcc/toplev.c:2274
#20 toplev::main (this=this@entry=0x7fffffffe21e, argc=<optimized out>, argc@entry=14, argv=<optimized out>, argv@entry=0x7fffffffe318) at ../../gcc/toplev.c:2413
#21 0x000000000062b09f in main (argc=14, argv=0x7fffffffe318) at ../../gcc/main.c:39
(gdb) p hi_reg_name[x.u.reg.regno]
$29 = 0x16d2f65 "bp"

调试寄存器分配后的结果对下面这个函数下断点:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* Set up REG_RENUMBER and CALLER_SAVE_NEEDED (used by reload) from
   the allocation found by IRA.  */
static void
setup_reg_renumber (void)
{
  int regno, hard_regno;
  ira_allocno_t a;
  ira_allocno_iterator ai;
 
  caller_save_needed = 0;
  FOR_EACH_ALLOCNO (a, ai)
    {
      if (ira_use_lra_p && ALLOCNO_CAP_MEMBER (a) != NULL)
    continue;
      /* There are no caps at this point.  */
      ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
      if (! ALLOCNO_ASSIGNED_P (a))
    /* It can happen if A is not referenced but partially anticipated
       somewhere in a region.  */
    ALLOCNO_ASSIGNED_P (a) = true;
      ira_free_allocno_updated_costs (a);
      hard_regno = ALLOCNO_HARD_REGNO (a);
      regno = ALLOCNO_REGNO (a);
      reg_renumber[regno] = (hard_regno < 0 ? -1 : hard_regno);
      if (hard_regno >= 0)
    {
      int i, nwords;
      enum reg_class pclass;
      ira_object_t obj;
 
      pclass = ira_pressure_class_translate[REGNO_REG_CLASS (hard_regno)];
      nwords = ALLOCNO_NUM_OBJECTS (a);
      for (i = 0; i < nwords; i++)
        {
          obj = ALLOCNO_OBJECT (a, i);
          OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)
        |= ~reg_class_contents[pclass];
        }
      if (ira_need_caller_save_p (a, hard_regno))
        {
          ira_assert (!optimize || flag_caller_saves
              || (ALLOCNO_CALLS_CROSSED_NUM (a)
                  == ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a))
              || regno >= ira_reg_equiv_len
              || ira_equiv_no_lvalue_p (regno));
          caller_save_needed = 1;
        }
    }
    }
}

reg_renumber变量联系了多个寄存器分配算法中可以查找它的引用来看各个寄存器分配之间的配合。


[培训]二进制漏洞攻防(第3期);满10人开班;模糊测试与工具使用二次开发;网络协议漏洞挖掘;Linux内核漏洞挖掘与利用;AOSP漏洞挖掘与利用;代码审计。

收藏
点赞0
打赏
分享
最新回复 (0)
游客
登录 | 注册 方可回帖
返回