0%

Yulong-Hids Process Monitor Principle Analysis

一、起源

之前针对yulong-hids的分析还剩下进程监控模块,因为此模块有点复杂且涉及内核,所以单独整一篇过来分析下

二、源码分析

go

agent/monitor/process_linux.go模块是execve hook lkm的调用者,从此模块开始分析:

StartProcessMonitor

首先起一个线程调c实现的CapturePrecess函数(感觉应该是编码的师傅写错了 应该是captureprocess才对),根据注释大致可以猜到是connect netlink的操作

1
2
3
4
5
6
7
8
9
10
func StartProcessMonitor(resultChan chan map[string]string) {
log.Println("StartProcessMonitor")
var buf [255]byte
//开启进程监控提取线程
go func() {
ok := C.CapturePrecess()
if ok < 0 {
log.Println("connect syshook netlink error")
}
}()

其次监听本地udp:65530端口,并不断从中读数据,这边有点好奇为啥要从本地端口读数据,按理说直接对接用户态netlink socket就能取到数据了,猜测是调的captureprecess中间做了一层转发,至于为什么要做这层转发目前还不清楚:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
localaddress, _ := net.ResolveUDPAddr("udp", "127.0.0.1:65530")
udplistener, err := net.ListenUDP("udp", localaddress)
if err != nil {
log.Print(err.Error())
return
}
defer udplistener.Close()
var resultdata map[string]string
for {
n, _, err := udplistener.ReadFromUDP(buf[0:])
if err != nil {
log.Println(err.Error())
return
}

在上层对从65530读出来的数据做切割和解析,并将解析出的数据填充进特定格式的进程事件变量resultdata,并将结果返回给更上层调用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
proList := strings.Split(string(buf[0:n-1]), string(0x01))
if len(proList) <= 5 {
log.Println(string(buf[0:n]))
continue
}
//不记录agent执行的命令 || s == os.Getppid()
if s, _ := strconv.Atoi(proList[4]); s == os.Getpid() {
continue
}
//白名单的不记录
if common.InArray(common.Config.Filter.Process, strings.ToLower(resultdata["name"]), true) ||
common.InArray(common.Config.Filter.Process, strings.ToLower(resultdata["command"]), true) {
continue
}
resultdata = make(map[string]string)
resultdata["source"] = "process"
//resultdata["type"] = proList[0]
resultdata["name"] = proList[0]
resultdata["command"] = proList[0] + " " + proList[1]
resultdata["pid"] = proList[2]
resultdata["parentname"] = proList[3]
resultdata["ppid"] = proList[4]
resultdata["info"] = ""
if len(proList) == 6 {
resultdata["info"] = proList[5]
}
// fmt.Println(resultdata)
resultChan <- resultdata
//fmt.Print(string(buf[0:n]))

c

贴下代码,主要关注下captureprecess函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <sys/socket.h>
#include <linux/netlink.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#define NETLINK_USER 31

#define MAX_PAYLOAD 2048
struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
struct iovec iov;
struct msghdr msg;

#define PORT 65530
int CapturePrecess()
{
//udp sock
int sock;
int payload_max_len = 0;

payload_max_len = pathconf("/", _PC_PATH_MAX);
if(payload_max_len < 0) {
return -1;
}

payload_max_len += MAX_PAYLOAD;

sock = socket(AF_INET, SOCK_DGRAM, 0);
if(sock < 0) {
return -1;
}
struct sockaddr_in sockaddrin;
memset(&sockaddrin, 0, sizeof(sockaddrin));
sockaddrin.sin_family = AF_INET;
sockaddrin.sin_port = htons(PORT);
sockaddrin.sin_addr.s_addr = inet_addr("127.0.0.1");

//netlink sock
int sock_fd;
sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_USER);
if (sock_fd < 0) {
return -1;
}

memset(&src_addr, 0, sizeof(src_addr));
memset(&msg, 0, sizeof(msg));

src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
src_addr.nl_groups = 1;
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(payload_max_len));
memset(nlh, 0, NLMSG_SPACE(payload_max_len));

iov.iov_base = (void *)nlh;
iov.iov_len = NLMSG_SPACE(payload_max_len);
msg.msg_name = (void *)&dest_addr;
msg.msg_namelen = sizeof(dest_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;

while (1)
{
recvmsg(sock_fd, &msg, 0);
sendto(sock, (char *)NLMSG_DATA(nlh), strlen((char *)NLMSG_DATA(nlh)), 0, (struct sockaddr *)&sockaddrin, sizeof(sockaddrin));
memset((char *)NLMSG_DATA(nlh), 0, strlen((char *)NLMSG_DATA(nlh)));
}
close(sock_fd);
close(sock);
return 0;
}

captureprecess主要负责进程获取,但本质上它还是一个caller;首先函数调用socket(AF_INET, SOCK_DGRAM, 0);创建udp的socket,返回socket描述符(句柄)到sock,之后设置要通信的服务器地址和端口,初始化要绑定的网络地址结构,项目中设置的是本地的65530端口:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
int CapturePrecess()
{
//udp sock
int sock;
int payload_max_len = 0;

payload_max_len = pathconf("/", _PC_PATH_MAX);
if(payload_max_len < 0) {
return -1;
}

payload_max_len += MAX_PAYLOAD;

sock = socket(AF_INET, SOCK_DGRAM, 0);
if(sock < 0) {
return -1;
}
struct sockaddr_in sockaddrin;
memset(&sockaddrin, 0, sizeof(sockaddrin));
sockaddrin.sin_family = AF_INET;
sockaddrin.sin_port = htons(PORT);
sockaddrin.sin_addr.s_addr = inet_addr("127.0.0.1")

然后创建netlink socket,这边自定义了一个netlink协议用于数据传输,协议号为31,创建好socket后和agent进程进行绑定,之后定义数据传输格式,分配消息数据存储空间:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
//netlink sock
int sock_fd;
sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_USER);
if (sock_fd < 0) {
return -1;
}

memset(&src_addr, 0, sizeof(src_addr));
memset(&msg, 0, sizeof(msg));

src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
src_addr.nl_groups = 1;
bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));
memset(&dest_addr, 0, sizeof(dest_addr));
nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(payload_max_len));
memset(nlh, 0, NLMSG_SPACE(payload_max_len));

iov.iov_base = (void *)nlh;
iov.iov_len = NLMSG_SPACE(payload_max_len);
msg.msg_name = (void *)&dest_addr;
msg.msg_namelen = sizeof(dest_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;

最后在一个while循环里不断从netlink socket中读取事件信息放到msg中,再向创建udp socket时绑定的ip和端口发送接收到的msg数据,每次发送完会用NLMSG_DATA()获取nlmsghdr指向的payload地址,并将其所在地址空间置位:

1
2
3
4
5
6
7
8
9
while (1)
{
recvmsg(sock_fd, &msg, 0);
sendto(sock, (char *)NLMSG_DATA(nlh), strlen((char *)NLMSG_DATA(nlh)), 0, (struct sockaddr *)&sockaddrin, sizeof(sockaddrin));
memset((char *)NLMSG_DATA(nlh), 0, strlen((char *)NLMSG_DATA(nlh)));
}
close(sock_fd);
close(sock);
return 0;

kernel

这边是比较核心的一个点,以上两块本质上都是caller,lkm才是进程事件捕获的真正实现方,首先贴下代码:

syscall_hook.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/file.h>
#include <asm/paravirt.h>
#include <asm/syscall.h>
#include <linux/sys.h>
#include <linux/slab.h>
#include <linux/kallsyms.h>
#include <linux/binfmts.h>
#include <linux/version.h>
#include <net/sock.h>
#include <net/netlink.h>

unsigned long **sys_call_table_ptr;
unsigned long original_cr0;
void *orig_sys_call_table [NR_syscalls];

struct sock *syshook_nl_sk = NULL;
#define SYSHOOK_NL_NUM 31

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
struct user_arg_ptr {
#ifdef CONFIG_COMPAT
bool is_compat;
#endif
union {
const char __user *const __user *native;
#ifdef CONFIG_COMPAT
const compat_uptr_t __user *compat;
#endif
} ptr;
};
struct filename *(*tmp_getname)(const char __user * filename);
void (*tmp_putname)(struct filename *name);
typedef asmlinkage long (*func_execve)(const char __user *,
const char __user * const __user *,
const char __user *const __user *);
extern asmlinkage long monitor_stub_execve_hook (const char __user *,
const char __user *const __user *,
const char __user *const __user *);
#elif LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
typedef asmlinkage long (*func_execve)(const char __user *,
const char __user * const __user *,
const char __user *const __user *,
struct pt_regs *);
extern asmlinkage long monitor_stub_execve_hook(const char __user *,
const char __user * const __user *,
const char __user *const __user *,
struct pt_regs *);
#endif

func_execve orig_stub_execve;

unsigned long **find_sys_call_table(void) {
unsigned long ptr;
unsigned long *p;

pr_err("Start found sys_call_table.\n");

for (ptr = (unsigned long)sys_close;
ptr < (unsigned long)&loops_per_jiffy;
ptr += sizeof(void *)) {

p = (unsigned long *)ptr;

if (p[__NR_close] == (unsigned long)sys_close) {
pr_err("Found the sys_call_table!!! __NR_close[%d] sys_close[%lx]\n"
" __NR_execve[%d] sct[__NR_execve][0x%lx]\n",
__NR_close,
(unsigned long)sys_close,
__NR_execve,
p[__NR_execve]);
return (unsigned long **)p;
}
}

return NULL;
}



#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
static int tmp_count(char __user * __user * argv, int max)
{
int i = 0;

if (argv != NULL) {
for (;;) {
char __user * p;

if (get_user(p, argv))
return -EFAULT;
if (!p)
break;
argv++;
if (i++ >= max)
return -E2BIG;

if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
cond_resched();
}
}
return i;
}

asmlinkage long monitor_execve_hook(char __user *name,
char __user * __user *argv,
char __user * __user *envp,
struct pt_regs *regs)
{
long error = 0;
struct filename *path = NULL;
char __user * native = NULL;
int tmp_argc = 0, tmp_envpc = 0;
int i = 0, len = 0, offset = 0, max_len = 0;
int total_argc_len = 0, total_envpc_len = 0;
char *total_argc_ptr = NULL, *total_envpc_ptr = NULL;
char *per_envp = NULL;
int nl_send_len = 0;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh = NULL;
struct file *file = NULL;
char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
char *path1 = NULL;


path = getname(name);
error = PTR_ERR(path);
if (IS_ERR(path)) {
pr_err("get path failed.\n");
goto err;
}

file = open_exec(path->name);
if (!IS_ERR(file) && tmp) {
memset(tmp, 0, PATH_MAX);
path1 = d_path(&file->f_path, tmp, PATH_MAX);
if (IS_ERR(path1)) {
path1 = NULL;
}

fput(file);
}

error = 0;
tmp_argc = tmp_count(argv, MAX_ARG_STRINGS);
if(tmp_argc < 0) {
error = tmp_argc;
goto err;
}

for(i = 0; i < tmp_argc; i ++) {
if(get_user(native, argv + i)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

total_argc_len += len;
}

total_argc_ptr = kmalloc(total_argc_len + 16 * tmp_argc, GFP_ATOMIC);
if(!total_argc_ptr) {
error = -ENOMEM;
goto err;
}
memset(total_argc_ptr, 0, total_argc_len + 16 * tmp_argc);

for(i = 0; i < tmp_argc; i ++) {
if(i == 0) {
continue;
}
if(get_user(native, argv + i)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(offset + len > total_argc_len + 16 * tmp_argc) {
break;
}

if (copy_from_user(total_argc_ptr + offset, native, len)) {
error = -EFAULT;
goto err;
}

offset += len - 1;
*(total_argc_ptr + offset) = ' ';
offset += 1;
}

/*--------envp--------------*/
len = 0;
offset = 0;
tmp_envpc = tmp_count(envp, MAX_ARG_STRINGS);
if(tmp_envpc < 0) {
error = tmp_envpc;
goto err;
}

for(i = 0; i < tmp_envpc; i ++) {
if(get_user(native, envp + i)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(len > max_len) {
max_len = len;
}
total_envpc_len += len;
}

per_envp = kmalloc(max_len + 16, GFP_KERNEL);
if(!per_envp) {
error = -ENOMEM;
goto err;
}

total_envpc_ptr = kmalloc(total_envpc_len + 16 * tmp_envpc, GFP_ATOMIC);
if(!total_envpc_ptr) {
error = -ENOMEM;
goto err;
}
memset(total_envpc_ptr, 0, total_envpc_len + 16 * tmp_envpc);

for(i = 0; i < tmp_envpc; i ++) {
if(get_user(native, envp + i)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(offset + len > total_envpc_len + 16 * tmp_envpc) {
break;
}

memset(per_envp, 0, max_len);
if(copy_from_user(per_envp, native, len)) {
error = -EFAULT;
goto err;
}

if(!strstr(per_envp, "PWD") && !strstr(per_envp, "LOGNAME") && !strstr(per_envp, "USER")) {
continue;
}

if (copy_from_user(total_envpc_ptr + offset, native, len)) {
error = -EFAULT;
goto err;
}

offset += len - 1;
*(total_envpc_ptr + offset) = ' ';
offset += 1;
}

nl_send_len = (path1 != NULL ? strlen(path1) : strlen(path->name)) + strlen(current->parent->comm) + 128;
if(!ZERO_OR_NULL_PTR(total_envpc_ptr)) {
nl_send_len += strlen(total_envpc_ptr);
}
if(!ZERO_OR_NULL_PTR(total_argc_ptr)) {
nl_send_len += strlen(total_argc_ptr);
}

nl_send_len = nl_send_len < PATH_MAX + 2048 ? nl_send_len : PATH_MAX + 2048;
skb = alloc_skb(NLMSG_SPACE(nl_send_len), GFP_ATOMIC);
if(!skb) {
error = -ENOMEM;
goto err;
}

nlh = (struct nlmsghdr *)skb->data;
nlh->nlmsg_len = NLMSG_SPACE(nl_send_len);
nlh->nlmsg_pid = 0;
nlh->nlmsg_flags = 0;
nlh = nlmsg_put(skb, 0, 0, 0, NLMSG_SPACE(nl_send_len) - sizeof (struct nlmsghdr), 0);
if(!nlh) {
kfree_skb(skb);
pr_err("nlh get failed.\n");
goto err;
}

snprintf(NLMSG_DATA(nlh), nl_send_len, "%s%c%s%c%u%c%s%c%d%c%s", path1 != NULL ? path1 : path->name, 0x1, (uint64_t)total_argc_ptr == 0x10 ? "N/A" : total_argc_ptr, 0x1, current->tgid, 0x1, current->parent->comm, 0x1, current->parent->tgid, 0x1, (uint64_t)total_envpc_ptr == 0x10 ? "N/A" : total_envpc_ptr);
NETLINK_CB(skb).pid = 0;
NETLINK_CB(skb).dst_group = 1;
error = netlink_broadcast(syshook_nl_sk, skb, 0, 1, GFP_KERNEL);
if(error != 0 && error != -3) {
pr_err("send nl broadcast failed.\n");
goto err;
}

//pr_err("%s|%s|%u|%s|%d|%s\n", path1 != NULL ? path1 : path->name, total_argc_ptr, current->tgid, current->parent->comm, current->parent->tgid, total_envpc_ptr);

err:
if(tmp) {
kfree(tmp);
tmp = NULL;
}
if(total_envpc_ptr) {
kfree(total_envpc_ptr);
total_envpc_ptr = NULL;
}
if(per_envp) {
kfree(per_envp);
per_envp = NULL;
}
if(total_argc_ptr) {
kfree(total_argc_ptr);
total_argc_ptr = NULL;
}
putname(path);
return 0;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
{
const char __user *native;

#ifdef CONFIG_COMPAT
if (unlikely(argv.is_compat)) {
compat_uptr_t compat;

if (get_user(compat, argv.ptr.compat + nr))
return ERR_PTR(-EFAULT);

return compat_ptr(compat);
}
#endif

if (get_user(native, argv.ptr.native + nr))
return ERR_PTR(-EFAULT);

return native;
}

static int tmp_count(struct user_arg_ptr argv, int max)
{
int i = 0;

if (argv.ptr.native != NULL) {
for (;;) {
const char __user *p = get_user_arg_ptr(argv, i);

if (!p)
break;

if (IS_ERR(p))
return -EFAULT;

if (i >= max)
return -E2BIG;
++i;

if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
cond_resched();
}
}
return i;
}

asmlinkage long monitor_execve_hook(const char __user *filename,
const char __user *const __user *argv,
const char __user *const __user *envp)
{
int error = 0, i = 0, len = 0, offset = 0, max_len = 0;
struct filename *path = NULL;
const char __user * native = NULL;
char *total_argc_ptr = NULL;
char *total_envpc_ptr = NULL;
char *per_envp = NULL;
int tmp_argc = 0, total_argc_len = 0;
int tmp_envpc = 0, total_envpc_len = 0;
struct user_arg_ptr argvx = { .ptr.native = argv };
struct user_arg_ptr envpx = { .ptr.native = envp };
int nl_send_len = 0;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh = NULL;
struct file *file = NULL;
char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
char *path1 = NULL;

path = tmp_getname(filename);
// printk("path:%s\n",path->name);
error = PTR_ERR(path);
if (IS_ERR(path)) {
goto err;
}

file = open_exec(path->name);
if (!IS_ERR(file) && tmp) {
memset(tmp, 0, PATH_MAX);
path1 = d_path(&file->f_path, tmp, PATH_MAX);
if (IS_ERR(path1)) {
path1 = NULL;
}
fput(file);
}

error = 0;

tmp_argc = tmp_count(argvx, MAX_ARG_STRINGS);
if(tmp_argc < 0) {
error = tmp_argc;
goto err;
}

for(i = 0; i < tmp_argc; i ++) {
native = get_user_arg_ptr(argvx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}
// printk("native0:%s\n",native);
len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

total_argc_len += len;
printk("len:%d\n",len);
printk("total_len:%d\n",total_argc_len);
}

total_argc_ptr = kmalloc(total_argc_len + 16 * tmp_argc, GFP_ATOMIC);
if(!total_argc_ptr) {
error = -ENOMEM;
goto err;
}
memset(total_argc_ptr, 0, total_argc_len + 16 * tmp_argc);

for(i = 0; i < tmp_argc; i ++) {
if(i == 0) {
continue;
}
native = get_user_arg_ptr(argvx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(offset + len > total_argc_len + 16 * tmp_argc) {
break;
}

if (copy_from_user(total_argc_ptr + offset, native, len)) {
error = -EFAULT;
goto err;
}
offset += len - 1;
*(total_argc_ptr + offset) = ' ';
offset += 1;
printk("total_argc_ptr_f:%s\n",total_argc_ptr);
}
printk("total_argc_ptr:%s\n",total_argc_ptr);
/*--------envpx--------------*/
len = 0;
offset = 0;
tmp_envpc = tmp_count(envpx, MAX_ARG_STRINGS);
if(tmp_envpc < 0) {
error = tmp_envpc;
goto err;
}

for(i = 0; i < tmp_envpc; i ++) {
native = get_user_arg_ptr(envpx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(len > max_len) {
max_len = len;
}

total_envpc_len += len;
}

per_envp = kmalloc(max_len + 16, GFP_KERNEL);
if(!per_envp) {
error = -ENOMEM;
goto err;
}

total_envpc_ptr = kmalloc(total_envpc_len + 16 * tmp_envpc, GFP_KERNEL);
if(!total_envpc_ptr) {
error = -ENOMEM;
goto err;
}
memset(total_envpc_ptr, 0, total_envpc_len + 16 * tmp_envpc);

for(i = 0; i < tmp_envpc; i ++) {
native = get_user_arg_ptr(envpx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(offset + len > total_envpc_len + 16 * tmp_envpc) {
break;
}

memset(per_envp, 0, max_len);
if(copy_from_user(per_envp, native, len)) {
error = -EFAULT;
goto err;
}

if(!strstr(per_envp, "PWD") && !strstr(per_envp, "LOGNAME") && !strstr(per_envp, "USER")) {
continue;
}

//printk("total_envpc_ptr0:%s\n",total_envpc_ptr);
if (copy_from_user(total_envpc_ptr + offset, native, len)) {
error = -EFAULT;
goto err;
}
offset += len - 1;
*(total_envpc_ptr + offset) = ' ';
offset += 1;
}

nl_send_len = (path1 != NULL ? strlen(path1) : strlen(path->name)) + strlen(current->parent->comm) + 128;
if(!ZERO_OR_NULL_PTR(total_envpc_ptr)) {
nl_send_len += strlen(total_envpc_ptr);
}
printk("total_envpc_ptr1:%s\n",total_envpc_ptr);
if(!ZERO_OR_NULL_PTR(total_argc_ptr)) {
nl_send_len += strlen(total_argc_ptr);
}
nl_send_len = nl_send_len < PATH_MAX + 2048 ? nl_send_len : PATH_MAX + 2048;
skb = alloc_skb(NLMSG_SPACE(nl_send_len), GFP_ATOMIC);
if(!skb) {
error = -ENOMEM;
goto err;
}

nlh = (struct nlmsghdr *)skb->data;
nlh->nlmsg_len = NLMSG_SPACE(nl_send_len);
nlh->nlmsg_pid = 0;
nlh->nlmsg_flags = 0;
nlh = nlmsg_put(skb, 0, 0, 0, NLMSG_SPACE(nl_send_len) - sizeof (struct nlmsghdr), 0);
if(!nlh) {
kfree_skb(skb);
pr_err("nlh get failed.\n");
goto err;
}

snprintf(NLMSG_DATA(nlh), nl_send_len, "%s%c%s%c%u%c%s%c%d%c%s", path1 != NULL ? path1 : path->name, 0x1, (uint64_t)total_argc_ptr == 0x10 ? "N/A" : total_argc_ptr, 0x1, current->tgid, 0x1, current->parent->comm, 0x1, current->parent->tgid, 0x1, (uint64_t)total_envpc_ptr == 0x10 ? "N/A" : total_envpc_ptr);
printk("%s|%s|%u|%s|%d|%s\n",path->name,total_argc_ptr,current->tgid,current->parent->comm,current->parent->tgid,total_envpc_ptr);
NETLINK_CB(skb).portid = 0;
NETLINK_CB(skb).dst_group = 1;
error = netlink_broadcast(syshook_nl_sk, skb, 0, 1, 0);
if(error != 0 && error != -3) {
pr_err("send nl broadcast failed.\n");
goto err;
}

//pr_err("%s|%s|%u|%s|%d|%s\n", path->name, total_argc_ptr, current->tgid, current->parent->comm, current->parent->tgid, total_envpc_ptr);

err:
if(tmp) {
kfree(tmp);
tmp = NULL;
}
if(per_envp) {
kfree(per_envp);
per_envp = NULL;
}
if(total_argc_ptr) {
kfree(total_argc_ptr);
total_argc_ptr = NULL;
}
if(total_envpc_ptr) {
kfree(total_envpc_ptr);
total_envpc_ptr = NULL;
}

tmp_putname(path);
return 0;
}
#else
asmlinkage long monitor_execve_hook(void)
{
return 0;
}
#endif

static int __init monitor_execve_init(void)
{
int i = 0;

if (!(sys_call_table_ptr = find_sys_call_table())){
pr_err("Get sys_call_table failed.\n");
return -1;
}

#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
/*NetLink do not recv from userSpace*/
syshook_nl_sk = netlink_kernel_create(&init_net, SYSHOOK_NL_NUM, 0, NULL, NULL, THIS_MODULE);
if(!syshook_nl_sk) {
pr_err("syshook: can not create netlink socket.\n");
return -EIO;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
syshook_nl_sk = netlink_kernel_create(&init_net, SYSHOOK_NL_NUM, NULL);
if(!syshook_nl_sk) {
pr_err("syshook: can not create netlink socket.\n");
return -EIO;
}
#endif
pr_err("syshook: create netlink success.\n");


#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
tmp_getname = (void *)kallsyms_lookup_name("getname");
if(!tmp_getname) {
pr_err("unknow Symbol getname\n");
return -1;
}

tmp_putname = (void *)kallsyms_lookup_name("putname");
if(!tmp_putname) {
pr_err("unknow Symbol putname\n");
return -1;
}
#endif

original_cr0 = read_cr0();
write_cr0(original_cr0 & ~0x00010000);
pr_err("Loading module monitor_execve, sys_call_table at %p\n", sys_call_table_ptr);

for(i = 0; i < NR_syscalls - 1; i ++) {
orig_sys_call_table[i] = sys_call_table_ptr[i];
}

orig_stub_execve = (void *)(sys_call_table_ptr[__NR_execve]);
sys_call_table_ptr[__NR_execve]= (void *)monitor_stub_execve_hook;

write_cr0(original_cr0);
return 0;
}

static void __exit monitor_execve_exit(void)
{
netlink_kernel_release(syshook_nl_sk);

if (!sys_call_table_ptr){
return;
}

write_cr0(original_cr0 & ~0x00010000);
sys_call_table_ptr[__NR_execve] = (void *)orig_stub_execve;
write_cr0(original_cr0);

sys_call_table_ptr = NULL;
pr_err("unload syshook_execve succ.\n");
}

module_init(monitor_execve_init);
module_exit(monitor_execve_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("mlsm <454667707@qq.com>");
MODULE_DESCRIPTION("Monitor Syscall sys_execve");

sys_call_stub_hook.S

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
.text
.global monitor_stub_execve_hook

monitor_stub_execve_hook:
pushq %rbx
pushq %rdi
pushq %rsi
pushq %rdx
pushq %rcx
pushq %rax
pushq %r8
pushq %r9
pushq %r10
pushq %r11

call monitor_execve_hook

pop %r11
pop %r10
pop %r9
pop %r8
pop %rax
pop %rcx
pop %rdx
pop %rsi
pop %rdi

pop %rbx
jmp *orig_sys_call_table(, %rax, 8)

Makefile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
obj-m := syshook_execve.o

syshook_execve-objs := syscall_hook.o syscall_stub_hook.o

KERNEL := /lib/modules/`uname -r`/build

all:
make -C $(KERNEL) M=`pwd` modules

install:
make -C $(KERNEL) M=`pwd` modules_install
depmod -A

clean:
make -C $(KERNEL) M=`pwd` clean

从makefile可以看到最终的syshook_execve.ko是由syscall_hook和syscall_stub_hook共同编译的

先从module_init宏开始看起,可以看到内核模块初始化时调用了monitor_execve_init,跟进:

1
2
3
4
5
6
module_init(monitor_execve_init);
module_exit(monitor_execve_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("mlsm <454667707@qq.com>");
MODULE_DESCRIPTION("Monitor Syscall sys_execve");

monitor_execve_init

跟进函数首先可以看到有个find_sys_call_table的操作;一般hook系统调用都会首先拿到系统调用表syscall_table地址,然后才能进行系统调用表覆写:

1
2
3
4
5
6
7
8
static int __init monitor_execve_init(void)
{
int i = 0;

if (!(sys_call_table_ptr = find_sys_call_table())){
pr_err("Get sys_call_table failed.\n");
return -1;
}

发现是通过暴力搜索的方式从sys_close出发寻找对应地址+__NR_close指针指向的内存空间存储的数据 == sys_close的地址项,对应地址项就是sys_call_table起始地址;这边是从sys_close地址出发的,不是很理解,后续需要调研下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
unsigned long **find_sys_call_table(void) {
unsigned long ptr;
unsigned long *p;

pr_err("Start found sys_call_table.\n");

for (ptr = (unsigned long)sys_close;
ptr < (unsigned long)&loops_per_jiffy;
ptr += sizeof(void *)) {

p = (unsigned long *)ptr;

if (p[__NR_close] == (unsigned long)sys_close) {
pr_err("Found the sys_call_table!!! __NR_close[%d] sys_close[%lx]\n"
" __NR_execve[%d] sct[__NR_execve][0x%lx]\n",
__NR_close,
(unsigned long)sys_close,
__NR_execve,
p[__NR_execve]);
return (unsigned long **)p;
}
}

return NULL;
}

内核版本不同,相关内核函数也会有一些差异,主要是参数这块,参数传递出问题可能导致内核hang死;代码中有对2.6.32和3.10.0内核版本做区分,这边以3.10.0为准;netlink_kernel_create是内核创建netlink的api,SYSHOOK_NL_NUM宏为31,结合用户态netlink socket那块可知是自定义的netlink协议类型,netlink_kernel_create创建时也需要指定:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
/*NetLink do not recv from userSpace*/
syshook_nl_sk = netlink_kernel_create(&init_net, SYSHOOK_NL_NUM, 0, NULL, NULL, THIS_MODULE);
if(!syshook_nl_sk) {
pr_err("syshook: can not create netlink socket.\n");
return -EIO;
}
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
syshook_nl_sk = netlink_kernel_create(&init_net, SYSHOOK_NL_NUM, NULL);
if(!syshook_nl_sk) {
pr_err("syshook: can not create netlink socket.\n");
return -EIO;
}
#endif
pr_err("syshook: create netlink success.\n");

调kallsyms_lookup_name获取getname和putname的内核函数地址:

1
2
3
4
5
6
7
8
9
10
11
12
13
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
tmp_getname = (void *)kallsyms_lookup_name("getname");
if(!tmp_getname) {
pr_err("unknow Symbol getname\n");
return -1;
}

tmp_putname = (void *)kallsyms_lookup_name("putname");
if(!tmp_putname) {
pr_err("unknow Symbol putname\n");
return -1;
}
#endif

然后就是函数尾部的syscall_table的覆写了,系统调用表在内核中是只读的,直接写会导致内核oops,设置cr0寄存器中内存写保护的状态位;之后将64位系统下全量的系统调用地址和execve系统调用地址分别进行保存,方便hook后的恢复,这边为啥要全量没搞懂,按理说保存hook的那个就好了;最终,覆写系统调用表中execve地址为monitor_stub_execve_hook函数地址;覆写完毕后恢复cr0:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
    original_cr0 = read_cr0();
write_cr0(original_cr0 & ~0x00010000);
pr_err("Loading module monitor_execve, sys_call_table at %p\n", sys_call_table_ptr);

for(i = 0; i < NR_syscalls - 1; i ++) {
orig_sys_call_table[i] = sys_call_table_ptr[i];
}

orig_stub_execve = (void *)(sys_call_table_ptr[__NR_execve]);
sys_call_table_ptr[__NR_execve]= (void *)monitor_stub_execve_hook;

write_cr0(original_cr0);
return 0;
**monitor_stub_execve_hook**

monitor_stub_execve_hook函数在syscall_stub_hook.S中,函数逻辑是这样的,首先全部寄存器压栈保存环境,防止真正execve执行时候出问题;调用monitor_execve_hook函数,恢复寄存器环境,跳到orig_sys_call_table表中index为rax(系统调用号)的地址执行,即原生的execve系统调用:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
.text
.global monitor_stub_execve_hook

monitor_stub_execve_hook:
pushq %rbx
pushq %rdi
pushq %rsi
pushq %rdx
pushq %rcx
pushq %rax
pushq %r8
pushq %r9
pushq %r10
pushq %r11

call monitor_execve_hook

pop %r11
pop %r10
pop %r9
pop %r8
pop %rax
pop %rcx
pop %rdx
pop %rsi
pop %rdi

pop %rbx
jmp *orig_sys_call_table(, %rax, 8)

monitor_execve_hook

首先是一些初始化动作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
asmlinkage long monitor_execve_hook(const char __user *filename, 
const char __user *const __user *argv,
const char __user *const __user *envp)
{
int error = 0, i = 0, len = 0, offset = 0, max_len = 0;
struct filename *path = NULL;
const char __user * native = NULL;
char *total_argc_ptr = NULL;
char *total_envpc_ptr = NULL;
char *per_envp = NULL;
int tmp_argc = 0, total_argc_len = 0;
int tmp_envpc = 0, total_envpc_len = 0;
struct user_arg_ptr argvx = { .ptr.native = argv };
struct user_arg_ptr envpx = { .ptr.native = envp };
int nl_send_len = 0;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh = NULL;
struct file *file = NULL;
char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
char *path1 = NULL;

tmp_getname保存了getname内核函数的地址,用其获取filename指针指向的路径名,之所以无法直接使用filename是因为本质上filename指针指向的用户空间地址,得先用getname拷贝到内核空间中;利用open_exec获取文件对应的file结构指针,调d_path获取文件绝对路径,最后用fput减少文件对象的引用计数并判断是否进行释放:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
path = tmp_getname(filename);
// printk("path:%s\n",path->name);
error = PTR_ERR(path);
if (IS_ERR(path)) {
goto err;
}

file = open_exec(path->name);
if (!IS_ERR(file) && tmp) {
memset(tmp, 0, PATH_MAX);
path1 = d_path(&file->f_path, tmp, PATH_MAX);
if (IS_ERR(path1)) {
path1 = NULL;
}
fput(file);
}

error = 0;

首先调tmp_count判断execve执行进程对应的参数数量,tmp_count内部逻辑是这样的,遍历argv索引,调get_user_arg_ptr从用户态拿对应索引处的字符串指针,一旦检测到指针为null,则返回i,即程序执行的参数数量;返回值赋值给tmp_argc;之后根据argc值循环获取每一个参数长度并累加;最后根据total_argc_len长度分配内核空间并memset置位:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
tmp_argc = tmp_count(argvx, MAX_ARG_STRINGS);
if(tmp_argc < 0) {
error = tmp_argc;
goto err;
}

for(i = 0; i < tmp_argc; i ++) {
native = get_user_arg_ptr(argvx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}
// printk("native0:%s\n",native);
len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

total_argc_len += len;
printk("total_len:%n\n",total_argc_len);
}
total_argc_ptr = kmalloc(total_argc_len + 16 * tmp_argc, GFP_ATOMIC);
if(!total_argc_ptr) {
error = -ENOMEM;
goto err;
}
memset(total_argc_ptr, 0, total_argc_len + 16 * tmp_argc);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static int tmp_count(struct user_arg_ptr argv, int max)
{
int i = 0;

if (argv.ptr.native != NULL) {
for (;;) {
const char __user *p = get_user_arg_ptr(argv, i);

if (!p)
break;

if (IS_ERR(p))
return -EFAULT;

if (i >= max)
return -E2BIG;
++i;

if (fatal_signal_pending(current))
return -ERESTARTNOHAND;
cond_resched();
}
}
return i;
}

根据参数数量获取每一个参数字符串指针,再用copy_from_user完成字符串从用户态到内核态的内存的覆写:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
	for(i = 0; i < tmp_argc; i ++) {
if(i == 0) {
continue;
}
native = get_user_arg_ptr(argvx, i);
if(IS_ERR(native)) {
error = -EFAULT;
goto err;
}

len = strnlen_user(native, MAX_ARG_STRLEN);
if(!len) {
error = -EFAULT;
goto err;
}

if(offset + len > total_argc_len + 16 * tmp_argc) {
break;
}

if (copy_from_user(total_argc_ptr + offset, native, len)) {
error = -EFAULT;
goto err;
}
offset += len - 1;
*(total_argc_ptr + offset) = ' ';
offset += 1;
}

下面的envpx环境变量和argv的获取是基本一致的,不再赘述

计算需要发送的数据长度,检查对应指针是否正确,分配socket buffer(skb),设置netlink msg格式,包括msg length、pid、flags等;调用nlmsg_put把一个新的netlink message放到skb中;根据task_struct获取pexe、参数、pid、pname、ppid、环境变量并将其格式化(0x1组合)放到netlink header后,同时设置skb相关参数,其中,portid表示源,dst_group代表skb传输的目标组id:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
nl_send_len = (path1 != NULL ? strlen(path1) : strlen(path->name)) + strlen(current->parent->comm) + 128;
if(!ZERO_OR_NULL_PTR(total_envpc_ptr)) {
nl_send_len += strlen(total_envpc_ptr);
}
if(!ZERO_OR_NULL_PTR(total_argc_ptr)) {
nl_send_len += strlen(total_argc_ptr);
}
nl_send_len = nl_send_len < PATH_MAX + 2048 ? nl_send_len : PATH_MAX + 2048;
skb = alloc_skb(NLMSG_SPACE(nl_send_len), GFP_ATOMIC);
if(!skb) {
error = -ENOMEM;
goto err;
}

nlh = (struct nlmsghdr *)skb->data;
nlh->nlmsg_len = NLMSG_SPACE(nl_send_len);
nlh->nlmsg_pid = 0;
nlh->nlmsg_flags = 0;
nlh = nlmsg_put(skb, 0, 0, 0, NLMSG_SPACE(nl_send_len) - sizeof (struct nlmsghdr), 0);
if(!nlh) {
kfree_skb(skb);
pr_err("nlh get failed.\n");
goto err;
}
snprintf(NLMSG_DATA(nlh), nl_send_len, "%s%c%s%c%u%c%s%c%d%c%s", path1 != NULL ? path1 : path->name, 0x1, (uint64_t)total_argc_ptr == 0x10 ? "N/A" : total_argc_ptr, 0x1, current->tgid, 0x1, current->parent->comm, 0x1, current->parent->tgid, 0x1, (uint64_t)total_envpc_ptr == 0x10 ? "N/A" : total_envpc_ptr);
NETLINK_CB(skb).portid = 0;
NETLINK_CB(skb).dst_group = 1;

netlink_broadcast

调用netlink_broadcast向连接到netlink的多播组1进行广播(多播),传输的消息体存放在skb中,skb的data段保存了要发送的netlink消息结构:

1
error = netlink_broadcast(syshook_nl_sk, skb, 0, 1, 0);

debug

Dmesg

image-20201023183519589

三、总结

分析到这边就结束了, 总结一下:

1、go调用层监听本地的udp:65530端口并循环从socket中读取lkm捕获的进程事件,上层做事件解析和过滤;

2、c调用层创建自定义协议的netlink socket并不断从中获取来自内核的事件;同时创建本地udp socket,将从内核获取的事件通过udp socket传输发送给另一方

3、kernel实现层通过hook execve捕获进程创建事件并将消息放入skb中,之后通过netlink_broadcast进行广播,将消息发送给指定播组

关于netlink这块的一些细节还不是特别清楚,之后会继续学习源码,期待后面的netlink通信原理分析