首页
社区
课程
招聘
[原创] 看雪 2023 KCTF 年度赛 第八题 AI核心地带
2023-9-19 03:07 8696

[原创] 看雪 2023 KCTF 年度赛 第八题 AI核心地带

2023-9-19 03:07
8696

IDA打开,只有一个main函数,短小精悍无混淆:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
int __cdecl main(int argc, const char **argv, const char **envp)
{
  FILE *v3; // eax
  int buflen_; // kr00_4
  int buflen; // esi
  int i; // eax
  char v7; // bl
  bool v8; // cc
  signed int cc; // edx
  int should_be_0_; // edi
  int cc_; // edi
  char v12; // al
  char v13; // cl
  char v14; // dl
  bool v15; // sf
  int v16; // eax
  const char *v17; // eax
  int c; // [esp+Ch] [ebp-46Ch]
  int last_cc; // [esp+10h] [ebp-468h]
  int also_should_be_0; // [esp+14h] [ebp-464h]
  int v22; // [esp+18h] [ebp-460h]
  int is_last_round; // [esp+1Ch] [ebp-45Ch]
  unsigned int i_; // [esp+20h] [ebp-458h]
  int should_be_0; // [esp+24h] [ebp-454h]
  int v26; // [esp+28h] [ebp-450h]
  char Buffer[1004]; // [esp+2Ch] [ebp-44Ch] BYREF
  char const1[47]; // [esp+418h] [ebp-60h] BYREF
  __int64 const1_47; // [esp+447h] [ebp-31h]
  int v30; // [esp+44Fh] [ebp-29h]
  char v31; // [esp+453h] [ebp-25h]
  _BYTE const2[23]; // [esp+454h] [ebp-24h] BYREF
  int const2_23; // [esp+46Bh] [ebp-Dh]
  __int16 v34; // [esp+46Fh] [ebp-9h]
  char v35; // [esp+471h] [ebp-7h]
 
  strcpy(const1, "flag{BzcZDnfNIqmQCtkTGlwLyDYeiHIjxSXwkRKzpFPv}");
  v22 = 'lo^?';
  const1_47 = 0i64;
  strcpy(const2, "Can you crack me?^olo^");
  v30 = 0;
  v31 = 0;
  const2_23 = 0;
  v34 = 0;
  v35 = 0;
  last_cc = 0;
  is_last_round = 0;
  should_be_0 = 0;
  also_should_be_0 = 0;
  printf("Please input:");
  v3 = _iob_func();
  fgets(Buffer, 1001, v3);
  buflen_ = strlen(Buffer);
  buflen = buflen_;
  if ( buflen_ > 0 && (buflen = buflen_ - 1, Buffer[buflen_ - 1] == '\n') )
  {
    if ( (unsigned int)buflen >= 1001 )
    {
      __report_rangecheckfailure(&Buffer[1]);
      __debugbreak();
    }
    Buffer[buflen] = 0;
  }
  else
  {
    should_be_0 = 1;
  }
  i = 0;
  if ( buflen < 0 )
    buflen = 0;
  i_ = 0;
  v7 = const1[42];
  v8 = buflen > 0;
  do
  {
    if ( v8 )
    {
      c = Buffer[i];
      if ( c - (unsigned int)'0' > 9 )
      {
        should_be_0_ = 1;
        should_be_0 = 1;
        goto LABEL_32;
      }
      cc = (c + (0xFFFEC610 >> (i_ % 0x1F))) % 0xA;
      if ( (i_ & 1) != cc < 1 )                 // if i & 1, then cc < 1, means for i = 1, 3, 5, ..., cc == 0
        should_be_0 = 1;
    }
    else
    {
      cc = last_cc;
      is_last_round = 1;
    }
    cc_ = cc;
    v26 = *(_DWORD *)&const2[16] ^ *(_DWORD *)&const2[12] ^ *(_DWORD *)&const2[8] ^ *(_DWORD *)&const2[4] ^ *(_DWORD *)const2;
    if ( is_last_round )
    {
      if ( (((unsigned __int8)v26 ^ (unsigned __int8)(const2[18] ^ const2[14] ^ const2[10] ^ const2[6] ^ const2[2])) & 0x1F) != 0
        || ((HIBYTE(v26) ^ BYTE1(v26)) & 0x1F) != 0 )
      {
        cc_ = -1;                               // avoid
      }
      goto LABEL_26;
    }
    if ( cc >= 1 )
    {
      v12 = BYTE2(v26);
      if ( cc >= 6 )                            //  cc == 6,7,8,9
      {
        if ( (((unsigned __int8)v26 ^ BYTE2(v26)) & 0x1F) != 0 )
        {
LABEL_25:
          cc_ = 9;
          goto LABEL_26;
        }
        v12 = HIBYTE(v26);
        v13 = 13 - cc;                          // v13 == 7,6,5,4
        v14 = BYTE1(v26);
      }
      else                                      //  cc == 1,2,3,4,5
      {
        v13 = 8 - cc;                           // v13 == 7,6,5,4,3
        v14 = v26;
      }
      if ( (v14 << v13) + 0x80 == v12 << v13 )
        goto LABEL_26;
      goto LABEL_25;
    }
LABEL_26:
    last_cc = cc_;
    if ( is_last_round )
    {
      v15 = cc_ < 0;
      should_be_0_ = should_be_0;
      if ( v15 )
        also_should_be_0 = 1;
    }
    else
    {
      v16 = *(_DWORD *)&const1[4 * cc_ + 5];
      *(_DWORD *)const2 ^= v16;
      *(_DWORD *)&const2[4] ^= v16;
      *(_DWORD *)&const2[8] ^= v16;
      *(_DWORD *)&const2[12] ^= v16;
      v8 = cc_ <= 8;
      should_be_0_ = should_be_0;
      v22 ^= v16;
      *(_DWORD *)&const2[16] = v22;
      if ( !v8 && (v7 & 0x10000000) == 0 )
      {
        v7 = ~v7 & 0x7F;                        // v7 = v7 ^ 0x7F
        const1[42] = v7;                        // 42 == 4 * 9 + 5 + 1
      }
    }
LABEL_32:
    i = i_ + 1;
    i_ = i;
    v8 = i < buflen;
  }
  while ( i <= buflen );
  if ( should_be_0_ || (v17 = "OK", also_should_be_0) )
    v17 = "FAIL";
  printf(v17);
  return 0;
}

第一反应感觉用angr能很快得到结果;上手尝试下,速度确实很快,但却返回无解;后面把程序逻辑抄出来自己重新编译为ELF,仍然无解。搞不明白原因,有尝试过的朋友可以留言解答一下。

但是,这个程序理解起来并不特别直观,主要是校验逻辑不是传统的对输入做变换后与常量比较,而是类似于状态机,随着读取输入不断地做状态转移,如果直到输入读取完毕都没有调入失败状态,则认为校验通过。

输入处理部分,检查了每个字符 c 必须位于'0'到'9'的范围,然后按照 cc = (c + (0xFFFEC610 >> (i_ % 0x1F))) % 0xA 转换为 cc 供后面的使用。注意这里是无符号除法(汇编的div指令),所以cc的取值范围为0到9,后面的 (i_ & 1) != cc < 1 限制了索引i_为奇数时cc的取值必须为0。
此阶段的转换很容易逆推,可以等到计算出所有的 cc 值之后再反推回原始输入。

后面的逻辑,首先计算了 v26 = *(_DWORD *)&const2[16] ^ *(_DWORD *)&const2[12] ^ *(_DWORD *)&const2[8] ^ *(_DWORD *)&const2[4] ^ *(_DWORD *)const2 。根据初始化的情况,const2 字符串被用作了5个整数,v26是它们的异或。
然后,根据是否为最后一轮(所有输入字符是否都处理完毕)、cc的取值(当前输入数值),对构成 v26 的4个字节做不同的检查,如果检查不通过直接转入失败状态(将should_be_0 变量赋值为1,控制最后的输出为"FAIL")。
如果中间的检查都通过,会根据变换后 cc_ 变量的取值,从const1视为的9个整数常量取出一个异或到const2的5个整数上,以及是否对const1[42]取反。此处,42 == 4 * 9 + 5 + 1,对 const1[42] 取反等同于对 const1 的第 9 个整数异或上 0x7F00

简化一下,每轮循环中间的检查和变换过程实际只与 v26cc 有关;进入下一轮循环时,v26会异或上10个const1常量之一以及视情况额外异或一下0x7f00,cc被更新为从下一个用户输入字符转换得到的值。
由于两次异或同一个数值相当于没有做任何事情,因此这里一个重要的结论是:v26的取值数量是有限的(不超过2**(10+1)种,即10个const1常量加上0x7f00常量是否被异或在了初始值之上)。

所以,整个循环完全可以视为有限状态机状态转移的过程,v26是当前状态,cc是转移条件,状态机的下一个状态(v26的下一个取值)完全由当前状态(v26)和转移条件(cc)决定;从图论的角度看,v26的所有可能的取值对应图上的节点,状态机两个相邻状态的v26之间有一条有向边,边的属性为在这两个相邻状态之间转换的条件cc

对于每个v26,都可以判定出其能否成为终止状态(即假设此刻输入到达末尾,is_last_round为1,能否直接让程序输出"OK")。根据代码,这里的判定终止的条件应为 (BYTE0(v26) ^ BYTE2(v26)) & 0x1F == 0 并且 HIBYTE(v26) ^ BYTE1(v26)) & 0x1F) == 0

v26的初始状态已知(5个const2整数的异或),所以只要找到一个从初始状态到任意一个终止状态的转移过程即可通过程序的校验;或者,从图论的角度看,等同于找到一条从起点到任何一个终点之间的路径。


以上完成了对程序校验逻辑的数学建模。虽然仍未看出来这个逻辑对应到什么实际问题上,但是图上两点之间找路径有非常成熟的解法,对于做题完全够用。

接下来是实现细节。
一种直观的方法是延续程序的原始流程,随着路径的探索不断生成新的节点并判断是否满足终止条件。这非常适合dfs或bfs,下面的代码分别用这两种算法各找到了一个解,理论上bfs找到的解应该是最短的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
from collections import deque
 
 
def u32(s):
    assert len(s) == 4, s
    return int.from_bytes(s, 'little')
 
 
def BYTE0(n):
    return n & 0xff
 
def BYTE1(n):
    return (n>>8) & 0xff
 
def BYTE2(n):
    return (n>>16) & 0xff
 
def BYTE3(n):
    return (n>>24) & 0xff
 
 
const1 = b"BzcZDnfNIqmQCtkTGlwLyDYeiHIjxSXwkRKzpFPv"
assert len(const1) == 40
s1s = [u32(const1[i:i+4]) for i in range(0, 40, 4)]
 
const2 = b"Can you crack me?^olo^"
assert len(const2) == 22
s2s = [u32(const2[i:i+4]) for i in range(0, 20, 4)]
s2sxor = 0
for c in s2s:
    s2sxor ^= c
 
 
def convert(s):
    # assert s.count(0) % 2 == 1, s
    r = ""
    for i, cc in enumerate(s):
        assert (i & 1) == (cc < 1)
        tmp = 0xFFFEC610 >> (i % 0x1F)
        c = (cc - tmp - 48) % 10
        c += 48
        assert cc == (c + (0xFFFEC610 >> (i % 0x1F))) % 0xA
        r += chr(c)
    return r
 
 
class StateStruct:
    __slots__ = ["choose", "const1_42", "extra_xor", "index"]
    def __init__(self):
        self.choose = [0]*10
        self.extra_xor = 0
        self.const1_42 = 0
        self.index = 0
    def serialize(self):
        return bytes(self.choose)+bytes([self.const1_42, self.extra_xor, self.index & 1])
    def value(self):
        global s1s
        global s2sxor
        r = s2sxor
        for i, c in enumerate(s1s):
            if self.choose[i]:
                r ^= c
        if self.extra_xor:
            r ^= 0x7f00
        # print(hex(r))
        return r
    def is_finish(self):
        tmp2 = self.value()
        b3, b2, b1, b0 = BYTE3(tmp2), BYTE2(tmp2), BYTE1(tmp2), BYTE0(tmp2)
        aa = (b0 ^ b2) & 0x1f
        bb = (b1 ^ b3) & 0x1f
        if aa == 0 and bb == 0:
            return True
        return False
    def copy(self):
        new_state = StateStruct()
        new_state.choose = list(self.choose)
        new_state.const1_42 = self.const1_42
        new_state.index = self.index
        new_state.extra_xor = self.extra_xor
        return new_state
 
 
def get_next_state(state, cc):
    v26 = state.value()
    b3, b2, b1, b0 = BYTE3(v26), BYTE2(v26), BYTE1(v26), BYTE0(v26)
    if cc >= 1:
        v12 = b2
        if cc >= 6:
            if (b0 ^ b2) & 0x1f:
                cc = 9
            else:
                v12 = b3
                v13 = 13 - cc
                v14 = b1
        else:
            v13 = 8 - cc
            v14 = b0
        if cc != 9:
            if ((v14 << v13) + 0x80) & 0xff == (v12 << v13) & 0xff:
                pass
            else:
                cc = 9
    global s1s
    new_state = state.copy()
    new_state.index += 1
    new_state.choose[cc] ^= 1
    if cc == 9:
        if new_state.const1_42:
            new_state.extra_xor ^= 1
        new_state.const1_42 ^= 1
    return new_state
 
def dfs(state, path_seen_states, seen_states):
    # print(state.index*" ", state.index, state.serialize(), len(seen_states))
    state_serialize = state.serialize()
    # print(state.index, state.serialize())
    if state.is_finish():
        return []
    if state_serialize in seen_states:
        return seen_states[state_serialize]
    if state_serialize in path_seen_states:
        return None
    new_path_seen_states = set(path_seen_states)
    new_path_seen_states.add(state_serialize)
    valid_cc = [0] if state.index & 1 else list(range(1, 10))
    for cc in valid_cc:
        next_state = get_next_state(state, cc)
        path = dfs(next_state, new_path_seen_states, seen_states)
        # print(state.index*" ", state.index, path)
        seen_states[next_state.serialize()] = path
        if path is not None:
            return [cc]+path
    return None
 
def bfs(state, seen_states):
    q = deque()
    q.append(state)
    seen_states[state.serialize()] = []
    while len(q):
        state = q.popleft()
        state_serialize = state.serialize()
        current_path = seen_states[state_serialize]
        # print(state.index, state.serialize())
        if state.is_finish():
            return current_path
        valid_cc = [0] if state.index & 1 else list(range(1, 10))
        for cc in valid_cc:
            next_state = get_next_state(state, cc)
            next_state_serialize = next_state.serialize()
            if next_state_serialize not in seen_states:
                q.append(next_state)
                seen_states[next_state_serialize] = current_path + [cc]
 
 
initial_state = StateStruct()
r = dfs(initial_state, set(), {})
print(r)
rr = convert(r)
print(len(rr))  # 478
print(rr)  # 5816065811807463950185018511850490797420271653486937692769276978160658118084439501850135018504907984902716534889276927692779581606581110744395018511850185049099749027165358692769276967695816065821807443950105018501850400797490271683486927692779276958160678118074439511850185018574907974902726534869276947692769581616581180744325018501850195049079749047165348692779276927695856065811807453950185018521850490797400271653486957692769276968160658118094439501850195018504907924902716
 
br = bfs(initial_state, {})
print(br)
brr = convert(br)
print(len(brr))  # 55
print(brr)  # 5816166811207453952185118531851490997400277663487977792

这里找到的两个解:

1
5816065811807463950185018511850490797420271653486937692769276978160658118084439501850135018504907984902716534889276927692779581606581110744395018511850185049099749027165358692769276967695816065821807443950105018501850400797490271683486927692779276958160678118074439511850185018574907974902726534869276947692769581616581180744325018501850195049079749047165348692779276927695856065811807453950185018521850490797400271653486957692769276968160658118094439501850195018504907924902716
1
5816166811207453952185118531851490997400277663487977792

它们都能通过程序的验证,但都不能通过平台的验证。


虽然题目已经做完了,但还可以做一些其他的探索。

上面采用dfs和bfs查找路径从而不必预先生成完整的图;但是由于节点不超过几千个,所以我们可以生成完整的图然后做进一步的分析。

例如,借助networkx库一次找到所有的最短路径:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import networkx as nx
# import matplotlib
# import matplotlib.pyplot
 
'''
omit imports and definitions
'''
 
...
 
def unserialize_state(s):
    assert len(s) == 10+1+1+1
    r = StateStruct()
    for i, c in enumerate(s):
        assert c == 0 or c == 1
        if i < 10:
            r.choose[i] = c
        elif i == 10:
            r.const1_42 = c
        elif i == 11:
            r.extra_xor = c
        elif i == 12:
            r.index = c
        else:
            assert 0
    return r
 
 
def get_all_states():
    r = []
    for i in range(1 << (10+1+1+1)):
        tmp = unserialize_state([(i>>j) & 1 for j in range(10+1+1+1)])
        r.append(tmp)
    return r
 
 
initial_state = StateStruct()
finish_states = []
all_states = get_all_states()
 
g = nx.DiGraph()
for state in all_states:
    state_serialize = state.serialize()
    g.add_node(state_serialize)
    if state.is_finish():
        finish_states.append(state)
 
for state in all_states:
    for cc in range(10):
        if state.index & 1 and cc != 0:
            continue
        next_state = get_next_state(state, cc)
        if next_state is not None:
            g.add_edge(state.serialize(), next_state.serialize(), cc=cc)
 
# fig = matplotlib.pyplot.figure()
# nx.draw(g, ax=fig.add_subplot())
# fig.savefig("graph.png")
 
for finish_state in finish_states:
    try:
        # r = nx.all_simple_paths(g, initial_state.serialize(), finish_state.serialize())
        r = nx.all_shortest_paths(g, initial_state.serialize(), finish_state.serialize())
        paths = list(r)
    except nx.exception.NetworkXNoPath:
        continue
    for path in paths:
        cc_list = []
        for i in range(len(path)-1):
            cc = g[path[i]][path[i+1]]["cc"]
            cc_list.append(cc)
        ans = convert(cc_list)
        # print(len(path))
        # print(len(ans), ans)
        print(ans)

这里找到了20个最短解,每个解的长度为55,全部能通过程序的验证:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
5896866811207453952185118531851490997400277633284977490
3896066811207453952185118531851490997400277633284977490
5826863811207453952185118531851490997400277633284977490
5826063891207453952185118531851490997400277633284977490
5826069891607453952185118531851490997400277633284977490
5826069811605453952185118531851490997400277633284977490
5826069811905423952185118531851490997400277633284977490
5826069811907423752185118531851490997400277633284977490
5826069811907463758185118531851490997400277633284977490
5826069811907463958165118531851490997400277633284977490
5826069811907463951165818531851490997400277633284977490
5826069811907463951185816531851490997400277633284977490
5826069811907463951185316581851490997400277633284977490
5826069811907463951185318581651490997400277633284977490
5826069811907463951185318511658490997400277633284977490
5826069811907463951185318511858470997400277633284977490
5826069811907463951185318511852470597400277633284977490
5826069811907463951185318511852490595400277633284977490
5826069811907463951185318511852490895470277633284977490
5826069811907463951185318511852490897470077633284977490

(有个疑问,前面bfs找到的解长度也是55,但却没有在这里出现??而手动验证发现该解的路径在图中也是存在的,搞不清楚哪里出了问题……)

更进一步,还可以用交互式方式把图展示出来(例如使用neo4j)加深直观感受,留待有兴趣的人完成吧。


[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课

收藏
点赞3
打赏
分享
最新回复 (0)
游客
登录 | 注册 方可回帖
返回