0%

Netstat Source Code Debug

前言

之前就对linux上附加的一些小工具像netstat、ps、top等的功能比较感兴趣,虽然知道本质上还是去解析vfs中的信息,但对其细节比较好奇,这边做下源码分析,同时也为后续对其它相对于netstat更复杂的程序/项目的分析做下铺垫,如sysdig、osquery、linux kernel等等;这块spoock江城子师傅已经写过类似的文章,个人觉得写的很好,要点都get到了,作为读者受益匪浅,贴下链接致敬下引路人:https://blog.spoock.com/2019/05/26/netstat-learn/。

下面所做分析可能无法面面俱到,非核心的函数和功能分析将被drop。

调试环境

此次调试是在vscode下使用gdbserver launch远程进行的,调试端的launch.json配置如下,被调端gdbserver挂住就好了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "(gdb) Launch",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/netstat",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"linux": {
"MIMode": "gdb",
"miDebuggerPath": "/usr/bin/gdb",
"miDebuggerServerAddress": "10.211.55.4:7777"
},
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
}
]
}

源码分析

2286行之前大量操作用在判断程序启动参数上,并给当作参数标志位使用的变量置位,以及其它一些初始化操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#if I18N
setlocale (LC_ALL, "");
bindtextdomain("net-tools", "/usr/share/locale");
textdomain("net-tools");
#endif
getroute_init(); /* Set up AF routing support */

afname[0] = '\0';
while ((i = getopt_long(argc, argv, "A:CFMacdeghilnNoprsStuUvVWw2fx64?Z", longopts, &lop)) != EOF)
switch (i) {
case -1:
break;
case 1:
if (lop < 0 || lop >= AFTRANS_CNT) {
EINTERN("netstat.c", "longopts 1 range");
break;
}
if (aftrans_opt(longopts[lop].name))
exit(1);
break;
case 'A':
if (aftrans_opt(optarg))
exit(1);
break;
case 'M':
flag_mas++;
break;
case 'a':
flag_all++;
break;
case 'l':
flag_lst++;
break;
case 'c':
flag_cnt++;
break;

case 'd':
flag_deb++;
break;
case 'g':
flag_igmp++;
break;
case 'e':
flag_exp++;
break;
case 'p':
flag_prg++;
break;
case 'i':
flag_int++;
break;
case 'W':
flag_wide++;
break;
case 'n':
flag_not |= FLAG_NUM;
break;
case '!':
flag_not |= FLAG_NUM_HOST;
break;
case '@':
flag_not |= FLAG_NUM_PORT;
break;
case '#':
flag_not |= FLAG_NUM_USER;
break;
case 'N':
flag_not |= FLAG_SYM;
break;
case 'C':
flag_cf |= FLAG_CACHE;
break;
case 'F':
flag_cf |= FLAG_FIB;
break;
case 'o':
flag_opt++;
break;
case '6':
if (aftrans_opt("inet6"))
exit(1);
break;
case '4':
if (aftrans_opt("inet"))
exit(1);
break;
case 'V':
version();
/*NOTREACHED */
case 'v':
flag_ver |= FLAG_VERBOSE;
break;
case 'r':
flag_rou++;
break;
case 't':
flag_tcp++;
break;
case 'S':
flag_sctp++;
break;
case 'u':
flag_udp++;
break;
case 'U':
flag_udplite++;
break;
case 'w':
flag_raw++;
break;
case '2':
flag_l2cap++;
break;
case 'f':
flag_rfcomm++;
break;
case 'x':
if (aftrans_opt("unix"))
exit(1);
break;
case 'Z':
#if HAVE_SELINUX
if (is_selinux_enabled() <= 0) {
fprintf(stderr, _("SELinux is not enabled on this machine.\n"));
exit(1);
}
flag_prg++;
flag_selinux++;
#else
fprintf(stderr, _("SELinux is not enabled for this application.\n"));
exit(1);
#endif

break;
case '?':
usage(E_OPTERR);
case 'h':
usage(E_USAGE);
case 's':
flag_sta++;
}

if (flag_int + flag_rou + flag_mas + flag_sta > 1)
usage(E_OPTERR);

if ((flag_inet || flag_inet6 || flag_sta) &&
!(flag_tcp || flag_sctp || flag_udp || flag_udplite || flag_raw))
flag_noprot = flag_tcp = flag_sctp = flag_udp = flag_udplite = flag_raw = 1;

if ((flag_tcp || flag_sctp || flag_udp || flag_udplite || flag_raw || flag_igmp) &&
!(flag_inet || flag_inet6))
flag_inet = flag_inet6 = 1;

if (flag_bluetooth && !(flag_l2cap || flag_rfcomm))
flag_l2cap = flag_rfcomm = 1;

flag_arg = flag_tcp + flag_sctp + flag_udplite + flag_udp + flag_raw + flag_unx
+ flag_ipx + flag_ax25 + flag_netrom + flag_igmp + flag_x25 + flag_rose
+ flag_l2cap + flag_rfcomm;

if (flag_mas) {
#if HAVE_FW_MASQUERADE && HAVE_AFINET
#if MORE_THAN_ONE_MASQ_AF
if (!afname[0])
safe_strncpy(afname, DFLT_AF, sizeof(afname));
#endif
for (;;) {
i = ip_masq_info(flag_not & FLAG_NUM_HOST,
flag_not & FLAG_NUM_PORT, flag_exp);
if (i || !flag_cnt)
break;
wait_continous();
}
#else
ENOSUPP("netstat", "FW_MASQUERADE");
i = -1;
#endif
return (i);
}

if (flag_sta) {
if (!afname[0])
safe_strncpy(afname, DFLT_AF, sizeof(afname));

if (!strcmp(afname, "inet")) {
#if HAVE_AFINET
parsesnmp(flag_raw, flag_tcp, flag_udp, flag_sctp);
#else
ENOSUPP("netstat", "AF INET");
#endif
} else if(!strcmp(afname, "inet6")) {
#if HAVE_AFINET6
parsesnmp6(flag_raw, flag_tcp, flag_udp);
#else
ENOSUPP("netstat", "AF INET6");
#endif
} else {
printf(_("netstat: No statistics support for specified address family: %s\n"), afname);
exit(1);
}
exit(0);
}

if (flag_rou) {
int options = 0;

if (!afname[0])
safe_strncpy(afname, DFLT_AF, sizeof(afname));

if (flag_exp == 2)
flag_exp = 1;
else if (flag_exp == 1)
flag_exp = 2;

options = (flag_exp & FLAG_EXT) | flag_not | flag_cf | flag_ver;
if (!flag_cf)
options |= FLAG_FIB;

for (;;) {
i = route_info(afname, options);
if (i || !flag_cnt)
break;
wait_continous();
}
return (i);
}
if (flag_int) {
for (;;) {
i = iface_info();
if (!flag_cnt || i)
break;
wait_continous();
}
return (i);
}

prg_cache_load

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#if HAVE_AFINET
prg_cache_load();
printf(_("Active Internet connections ")); /* xxx */

if (flag_all)
printf(_("(servers and established)"));
else {
if (flag_lst)
printf(_("(only servers)"));
else
printf(_("(w/o servers)"));
}
printf(_("\nProto Recv-Q Send-Q Local Address Foreign Address State ")); /* xxx */
if (flag_exp > 1)
printf(_(" User Inode "));
print_progname_banner();
print_selinux_banner();
if (flag_opt)
printf(_(" Timer")); /* xxx */
printf("\n");
#else
if (flag_arg) {
i = 1;
ENOSUPP("netstat", "AF INET");
}
#endif

​ 2286行之后,首先需要引起注意的是#if HAVE_AFINET这句判断,需要了解的是,socket套接字协议类型是种类繁多的,官方将不同种类和功能的socket根据协议分类到不同的socket协议族中,如AF_BLUETOOTH、AF_NETLINK、AF_INET、AF_INET6等,这些不同协议族中的socket有的是用于进程间通信,有的是用于和硬件通信,有的是用于网络通信等等,不同类型功能各不相同,所以需要分类去显示,#if HAVE_xxxx也就是用于netstat去分类显示用的。

​ 其次比较重要的是prg_cache_load这个函数,先贴下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
static void prg_cache_load(void)
{
char line[LINE_MAX], eacces=0;
int procfdlen, fd, cmdllen, lnamelen;
char lname[30], cmdlbuf[512], finbuf[PROGNAME_WIDTH];
unsigned long inode;
const char *cs, *cmdlp;
DIR *dirproc = NULL, *dirfd = NULL;
struct dirent *direproc, *direfd;
#if HAVE_SELINUX
security_context_t scon = NULL;
#endif

if (prg_cache_loaded || !flag_prg) return;
prg_cache_loaded = 1;
cmdlbuf[sizeof(cmdlbuf) - 1] = '\0';
if (!(dirproc=opendir(PATH_PROC))) goto fail;
while (errno = 0, direproc = readdir(dirproc)) {
for (cs = direproc->d_name; *cs; cs++)
if (!isdigit(*cs))
break;
if (*cs)
continue;
procfdlen = snprintf(line,sizeof(line),PATH_PROC_X_FD,direproc->d_name);
if (procfdlen <= 0 || procfdlen >= sizeof(line) - 5)
continue;
errno = 0;
dirfd = opendir(line);
if (! dirfd) {
if (errno == EACCES)
eacces = 1;
continue;
}
line[procfdlen] = '/';
cmdlp = NULL;
while ((direfd = readdir(dirfd))) {
/* Skip . and .. */
if (!isdigit(direfd->d_name[0]))
continue;
if (procfdlen + 1 + strlen(direfd->d_name) + 1 > sizeof(line))
continue;
memcpy(line + procfdlen - PATH_FD_SUFFl, PATH_FD_SUFF "/",
PATH_FD_SUFFl + 1);
safe_strncpy(line + procfdlen + 1, direfd->d_name,
sizeof(line) - procfdlen - 1);
lnamelen = readlink(line, lname, sizeof(lname) - 1);
if (lnamelen == -1)
continue;
lname[lnamelen] = '\0'; /*make it a null-terminated string*/

if (extract_type_1_socket_inode(lname, &inode) < 0)
if (extract_type_2_socket_inode(lname, &inode) < 0)
continue;

if (!cmdlp) {
if (procfdlen - PATH_FD_SUFFl + PATH_CMDLINEl >=
sizeof(line) - 5)
continue;
safe_strncpy(line + procfdlen - PATH_FD_SUFFl, PATH_CMDLINE,
sizeof(line) - procfdlen + PATH_FD_SUFFl);
fd = open(line, O_RDONLY);
if (fd < 0)
continue;
cmdllen = read(fd, cmdlbuf, sizeof(cmdlbuf) - 1);
if (close(fd))
continue;
if (cmdllen == -1)
continue;
if (cmdllen < sizeof(cmdlbuf) - 1)
cmdlbuf[cmdllen]='\0';
if (cmdlbuf[0] == '/' && (cmdlp = strrchr(cmdlbuf, '/')))
cmdlp++;
else
cmdlp = cmdlbuf;
}

snprintf(finbuf, sizeof(finbuf), "%s/%s", direproc->d_name, cmdlp);
#if HAVE_SELINUX
if (getpidcon(atoi(direproc->d_name), &scon) == -1) {
scon=xstrdup("-");
}
prg_cache_add(inode, finbuf, scon);
freecon(scon);
#else
prg_cache_add(inode, finbuf, "-");
#endif
}
closedir(dirfd);
dirfd = NULL;
}
if (dirproc)
closedir(dirproc);
if (dirfd)
closedir(dirfd);
if (!eacces)
return;
if (prg_cache_loaded == 1) {
fail:
fprintf(stderr,_("(No info could be read for \"-p\": geteuid()=%d but you should be root.)\n"),
geteuid());
}
else
fprintf(stderr, _("(Not all processes could be identified, non-owned process info\n"
" will not be shown, you would have to be root to see it all.)\n"));
}

​ 这部分代码主要用来初始化保存pid/cmdline-inode之间对应关系的缓存链表,看上去很长,其实逻辑很清晰,首先if (prg_cache_loaded || !flag_prg) return;一句通过判断prg_cache是否已经加载以及netstat -p参数是否置位,来决定是否进入此分支;一旦进入此分支,则利用readdir进行二次循环,第一次拿到/proc下pid,第二次解析出/proc/pid/fd下的inode以及拿到cmdline等数据;最后,利用prg_cache_add(inode, finbuf, "-")将解析出的inode及findbuf放入prg_node链表中,此处的finbuf是pid和cmdline的拼接,第三个参数scon为”-“,后面可知由于proc下pid具有瞬时特性,pid数据可能存在丢失,一旦netstat未解析出网络数据对应的pid,则用”-“在pid/program一栏进行代替。

​ 看下prg_cache_add,如下,与之联动对链表进行增、查、删操作的还有prg_cache_get和prg_cache_get_con以及prg_cache_clear,一并贴下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
static void prg_cache_add(unsigned long inode, char *name, const char *scon)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node **pnp,*pn;

prg_cache_loaded = 2;
for (pnp = prg_hash + hi; (pn = *pnp); pnp = &pn->next) {
if (pn->inode == inode) {
/* Some warning should be appropriate here
as we got multiple processes for one i-node */
return;
}
}
if (!(*pnp = malloc(sizeof(**pnp))))
return;
pn = *pnp;
pn->next = NULL;
pn->inode = inode;
safe_strncpy(pn->name, name, sizeof(pn->name));

{
int len = (strlen(scon) - sizeof(pn->scon)) + 1;
if (len > 0)
safe_strncpy(pn->scon, &scon[len + 1], sizeof(pn->scon));
else
safe_strncpy(pn->scon, scon, sizeof(pn->scon));
}

}
static const char *prg_cache_get(unsigned long inode)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node *pn;

for (pn = prg_hash[hi]; pn; pn = pn->next)
if (pn->inode == inode)
return (pn->name);
return ("-");
}

static const char *prg_cache_get_con(unsigned long inode)
{
unsigned hi = PRG_HASHIT(inode);
struct prg_node *pn;

for (pn = prg_hash[hi]; pn; pn = pn->next)
if (pn->inode == inode)
return (pn->scon);
return ("-");
}

static void prg_cache_clear(void)
{
struct prg_node **pnp,*pn;

if (prg_cache_loaded == 2)
for (pnp = prg_hash; pnp < prg_hash + PRG_HASH_SIZE; pnp++)
while ((pn = *pnp)) {
*pnp = pn->next;
free(pn);
}
prg_cache_loaded = 0;
}

tcp_info

Prg_load之后正式进入解析流程,首先依旧是判断socket协议族类型,不同协议族进入不同分支进行处理,这边主要关注HAVE_AFINET协议,HAVE_AFINET协议族内部又包含不同socket类型,具体包括tcp、udp、raw_info等,此处主要分析tcp,因为流程都类似。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
if HAVE_AFINET
if (!flag_arg || flag_tcp) {
i = tcp_info();
if (i)
return (i);
}

if (!flag_arg || flag_sctp) {
i = sctp_info();
if (i)
return (i);
}

if (!flag_arg || flag_udp) {
i = udp_info();
if (i)
return (i);
}

if (!flag_arg || flag_udplite) {
i = udplite_info();
if (i)
return (i);
}

if (!flag_arg || flag_raw) {
i = raw_info();
if (i)
return (i);
}
1
2
3
4
5
static int tcp_info(void)
{
INFO_GUTS6(_PATH_PROCNET_TCP, _PATH_PROCNET_TCP6, "AF INET (tcp)",
tcp_do_one, "tcp", "tcp6");
}

tcp_info调用了INFO_GUTS6,INFO_GUTS6有六个参数,前两个是lib/pathnames.h中定义的宏,分表vfs下的tcp4/6存储文件:

1
2
#define _PATH_PROCNET_TCP		"/proc/net/tcp"
#define _PATH_PROCNET_TCP6 "/proc/net/tcp6"

剩下的tcp_do_one看上去像函数指针,猜测是在INFO_GUTS6中进行了调用,跟进INFO_GUTS6

1
2
3
4
5
6
7
8
9
10
11
#define INFO_GUTS6(file,file6,name,proc,prot4,prot6)	\
char buffer[8192]; \
int rc = 0; \
int lnr = 0; \
if (!flag_arg || flag_inet) { \
INFO_GUTS1(file,name,proc,prot4) \
} \
if (!flag_arg || flag_inet6) { \
INFO_GUTS2(file6,proc,prot6) \
} \
INFO_GUTS3

这边是用的宏去定义函数,可以看到其根据数据包ipv4/ipv6协议类型分设了两条分支,这边只看ipv4,进入INFO_GUTS1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#define INFO_GUTS1(file,name,proc,prot)			\
procinfo = proc_fopen((file)); \
if (procinfo == NULL) { \
if (errno != ENOENT && errno != EACCES) { \
perror((file)); \
return -1; \
} \
if (!flag_noprot && (flag_arg || flag_ver)) \
ESYSNOT("netstat", (name)); \
if (!flag_noprot && flag_arg) \
rc = 1; \
} else { \
do { \
if (fgets(buffer, sizeof(buffer), procinfo)) \
(proc)(lnr++, buffer,prot); \
} while (!feof(procinfo)); \
fclose(procinfo); \
}

同样是一个宏,首先利用proc_fopn获取/proc/net/tcp文件句柄,然后fgets读取文件中的每一行到8192字节的buffer中,(proc)(lnr++, buffer,prot);调用了INFO_GUTS6传入的tcp_do_one函数指针,将/proc/net/tcp中的每一行传入作为tcp_do_one的参数进行循环处理,猜测是对buffer进行内容进行解析。

tcp_do_one

首先贴下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
static void tcp_do_one(int lnr, const char *line, const char *prot)
{
unsigned long rxq, txq, time_len, retr, inode;
int num, local_port, rem_port, d, state, uid, timer_run, timeout;
char rem_addr[128], local_addr[128], timers[64];
const struct aftype *ap;
struct sockaddr_storage localsas, remsas;
struct sockaddr_in *localaddr = (struct sockaddr_in *)&localsas;
struct sockaddr_in *remaddr = (struct sockaddr_in *)&remsas;
#if HAVE_AFINET6
char addr6[INET6_ADDRSTRLEN];
struct in6_addr in6;
extern struct aftype inet6_aftype;
#endif
long clk_tck = ticks_per_second();

if (lnr == 0)
return;

num = sscanf(line,
"%d: %64[0-9A-Fa-f]:%X %64[0-9A-Fa-f]:%X %X %lX:%lX %X:%lX %lX %d %d %lu %*s\n",
&d, local_addr, &local_port, rem_addr, &rem_port, &state,
&txq, &rxq, &timer_run, &time_len, &retr, &uid, &timeout, &inode);

if (num < 11) {
fprintf(stderr, _("warning, got bogus tcp line.\n"));
return;
}

if (!flag_all && ((flag_lst && rem_port) || (!flag_lst && !rem_port)))
return;

if (strlen(local_addr) > 8) {
#if HAVE_AFINET6
/* Demangle what the kernel gives us */
sscanf(local_addr, "%08X%08X%08X%08X",
&in6.s6_addr32[0], &in6.s6_addr32[1],
&in6.s6_addr32[2], &in6.s6_addr32[3]);
inet_ntop(AF_INET6, &in6, addr6, sizeof(addr6));
inet6_aftype.input(1, addr6, &localsas);
sscanf(rem_addr, "%08X%08X%08X%08X",
&in6.s6_addr32[0], &in6.s6_addr32[1],
&in6.s6_addr32[2], &in6.s6_addr32[3]);
inet_ntop(AF_INET6, &in6, addr6, sizeof(addr6));
inet6_aftype.input(1, addr6, &remsas);
localsas.ss_family = AF_INET6;
remsas.ss_family = AF_INET6;
#endif
} else {
sscanf(local_addr, "%X", &localaddr->sin_addr.s_addr);
sscanf(rem_addr, "%X", &remaddr->sin_addr.s_addr);
localsas.ss_family = AF_INET;
remsas.ss_family = AF_INET;
}

if ((ap = get_afntype(localsas.ss_family)) == NULL) {
fprintf(stderr, _("netstat: unsupported address family %d !\n"),
localsas.ss_family);
return;
}

addr_do_one(local_addr, sizeof(local_addr), 22, ap, &localsas, local_port, "tcp");
addr_do_one(rem_addr, sizeof(rem_addr), 22, ap, &remsas, rem_port, "tcp");

timers[0] = '\0';
if (flag_opt)
switch (timer_run) {
case 0:
snprintf(timers, sizeof(timers), _("off (0.00/%ld/%d)"), retr, timeout);
break;

case 1:
snprintf(timers, sizeof(timers), _("on (%2.2f/%ld/%d)"),
(double) time_len / clk_tck, retr, timeout);
break;

case 2:
snprintf(timers, sizeof(timers), _("keepalive (%2.2f/%ld/%d)"),
(double) time_len / clk_tck, retr, timeout);
break;

case 3:
snprintf(timers, sizeof(timers), _("timewait (%2.2f/%ld/%d)"),
(double) time_len / clk_tck, retr, timeout);
break;

case 4:
snprintf(timers, sizeof(timers), _("probe (%2.2f/%ld/%d)"),
(double) time_len / clk_tck, retr, timeout);
break;

default:
snprintf(timers, sizeof(timers), _("unkn-%d (%2.2f/%ld/%d)"),
timer_run, (double) time_len / clk_tck, retr, timeout);
break;
}

printf("%-4s %6ld %6ld %-*s %-*s %-11s",
prot, rxq, txq, (int)netmax(23,strlen(local_addr)), local_addr, (int)netmax(23,strlen(rem_addr)), rem_addr, _(tcp_state[state]));

finish_this_one(uid,inode,timers);
}

几个点,首先解析/proc/net/tcp文件中的每一行数据,将每个字段的数据赋值到不同变量:

1
2
3
num = sscanf(line,"%d: %64[0-9A-Fa-f]:%X %64[0-9A-Fa-f]:%X %X %lX:%lX %X:%lX %lX %d %d %lu %*s\n",
&d, local_addr, &local_port, rem_addr, &rem_port, &state,
&txq, &rxq, &timer_run, &time_len, &retr, &uid, &timeout, &inode);

其次针对local/remote address进行解析,将解析出的字符数组转为hex整形数据:

1
2
sscanf(local_addr, "%X", &localaddr->sin_addr.s_addr);
sscanf(rem_addr, "%X", &remaddr->sin_addr.s_addr);

再之后,adore_do_one函数,对addr及port进行操作:

1
2
addr_do_one(local_addr, sizeof(local_addr), 22, ap, &localsas, local_port, "tcp");
addr_do_one(rem_addr, sizeof(rem_addr), 22, ap, &remsas, rem_port, "tcp");

跟进:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
static void addr_do_one(char *buf, size_t buf_len, size_t short_len, const struct aftype *ap,
const struct sockaddr_storage *addr,
int port, const char *proto
)
{
const char *sport, *saddr;
size_t port_len, addr_len;

saddr = ap->sprint(addr, flag_not & FLAG_NUM_HOST);
sport = get_sname(htons(port), proto, flag_not & FLAG_NUM_PORT);
addr_len = strlen(saddr);
port_len = strlen(sport);
if (!flag_wide && (addr_len + port_len > short_len)) {
/* Assume port name is short */
port_len = netmin(port_len, short_len - 4);
addr_len = short_len - port_len;
strncpy(buf, saddr, addr_len);
buf[addr_len] = '\0';
strcat(buf, ":");
strncat(buf, sport, port_len);
} else
snprintf(buf, buf_len, "%s:%s", saddr, sport);
}

发现其addr和port进行拼接,比较有意思的一点是利用short_len对addr和port拼接后的长度进行限制,若长度超过23,且-W参数对应变量置位,则对addr进行截断显示。

最后对addr:port拼接后的数据及前面在/proc/net/tcp中解析出的字段进行输出:

1
printf("%-4s  %6ld %6ld %-*s %-*s %-11s",prot, rxq, txq, (int)netmax(23,strlen(local_addr)), local_addr, (int)netmax(23,strlen(rem_addr)), rem_addr, _(tcp_state[state]));

剩下的最后一个 finish_this_one函数,跟进分析后发现其主要是在以上的基础输出外检测变量是否置位,即参数使用情况来输出指定类型的数据栏目:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void finish_this_one(int uid, unsigned long inode, const char *timers)
{
struct passwd *pw;

if (flag_exp > 1) {
if (!(flag_not & FLAG_NUM_USER) && ((pw = getpwuid(uid)) != NULL))
printf(" %-10s ", pw->pw_name);
else
printf(" %-10d ", uid);
printf("%-10lu",inode);
}
if (flag_prg)
printf(" %-" PROGNAME_WIDTHs "s",prg_cache_get(inode));
if (flag_selinux)
printf(" %-" SELINUX_WIDTHs "s",prg_cache_get_con(inode));

if (flag_opt)
printf(" %s", timers);
putchar('\n');
}

其中flag_exp、flag_prg、flag_selinux、flag_opt分别对应-e、-p、-Z、-o:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
	case 'A':
if (aftrans_opt(optarg))
exit(1);
break;
case 'M':
flag_mas++;
break;
case 'a':
flag_all++;
break;
case 'l':
flag_lst++;
break;
case 'c':
flag_cnt++;
break;

case 'd':
flag_deb++;
break;
case 'g':
flag_igmp++;
break;
case 'e':
flag_exp++;
break;
case 'p':
flag_prg++;
break;
case 'i':
flag_int++;
break;
case 'W':
flag_wide++;
break;
case 'n':
flag_not |= FLAG_NUM;
break;
case '!':
flag_not |= FLAG_NUM_HOST;
break;
case '@':
flag_not |= FLAG_NUM_PORT;
break;
case '#':
flag_not |= FLAG_NUM_USER;
break;
case 'N':
flag_not |= FLAG_SYM;
break;
case 'C':
flag_cf |= FLAG_CACHE;
break;
case 'F':
flag_cf |= FLAG_FIB;
break;
case 'o':
flag_opt++;
break;
case '6':
if (aftrans_opt("inet6"))
exit(1);
break;
case '4':
if (aftrans_opt("inet"))
exit(1);
break;
case 'V':
version();
/*NOTREACHED */
case 'v':
flag_ver |= FLAG_VERBOSE;
break;
case 'r':
flag_rou++;
break;
case 't':
flag_tcp++;
break;
case 'S':
flag_sctp++;
break;
case 'u':
flag_udp++;
break;
case 'U':
flag_udplite++;
break;
case 'w':
flag_raw++;
break;
case '2':
flag_l2cap++;
break;
case 'f':
flag_rfcomm++;
break;
case 'x':
if (aftrans_opt("unix"))
exit(1);
break;
case 'Z':
#if HAVE_SELINUX
if (is_selinux_enabled() <= 0) {
fprintf(stderr, _("SELinux is not enabled on this machine.\n"));
exit(1);
}
flag_prg++;
flag_selinux++;

具体参数对应含义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static void usage(int rc)
{
FILE *fp = rc ? stderr : stdout;
fprintf(fp, _("usage: netstat [-vWeenNcCF] [<Af>] -r netstat {-V|--version|-h|--help}\n"));
fprintf(fp, _(" netstat [-vWnNcaeol] [<Socket> ...]\n"));
fprintf(fp, _(" netstat { [-vWeenNac] -i | [-cnNe] -M | -s [-6tuw] }\n\n"));

fprintf(fp, _(" -r, --route display routing table\n"));
fprintf(fp, _(" -i, --interfaces display interface table\n"));
fprintf(fp, _(" -g, --groups display multicast group memberships\n"));
fprintf(fp, _(" -s, --statistics display networking statistics (like SNMP)\n"));
#if HAVE_FW_MASQUERADE
fprintf(fp, _(" -M, --masquerade display masqueraded connections\n\n"));
#endif

fprintf(fp, _(" -v, --verbose be verbose\n"));
fprintf(fp, _(" -W, --wide don't truncate IP addresses\n"));
fprintf(fp, _(" -n, --numeric don't resolve names\n"));
fprintf(fp, _(" --numeric-hosts don't resolve host names\n"));
fprintf(fp, _(" --numeric-ports don't resolve port names\n"));
fprintf(fp, _(" --numeric-users don't resolve user names\n"));
fprintf(fp, _(" -N, --symbolic resolve hardware names\n"));
fprintf(fp, _(" -e, --extend display other/more information\n"));
fprintf(fp, _(" -p, --programs display PID/Program name for sockets\n"));
fprintf(fp, _(" -o, --timers display timers\n"));
fprintf(fp, _(" -c, --continuous continuous listing\n\n"));
fprintf(fp, _(" -l, --listening display listening server sockets\n"));
fprintf(fp, _(" -a, --all display all sockets (default: connected)\n"));
fprintf(fp, _(" -F, --fib display Forwarding Information Base (default)\n"));
fprintf(fp, _(" -C, --cache display routing cache instead of FIB\n"));
#if HAVE_SELINUX
fprintf(fp, _(" -Z, --context display SELinux security context for sockets\n"));
#endif

fprintf(fp, _("\n <Socket>={-t|--tcp} {-u|--udp} {-U|--udplite} {-S|--sctp} {-w|--raw}\n"));
fprintf(fp, _(" {-x|--unix} --ax25 --ipx --netrom\n"));
fprintf(fp, _(" <AF>=Use '-6|-4' or '-A <af>' or '--<af>'; default: %s\n"), DFLT_AF);
fprintf(fp, _(" List of possible address families (which support routing):\n"));
print_aflist(1); /* 1 = routeable */
exit(rc);
}

else

剩下的就是af_inet协议族下其它类型的socket解析及输出以及其它协议族的解析输出,类似tcp_info,尾部使用prg_cache_clear清理缓存链表,不再赘述。

总结

​ 代码逻辑是非常清晰的,但是调试过程中发现mac下的ide的debug功能实在不是太友好,无法查看/修改整块的内存,数据的显示也和其本身格式不一致,以及单步过程中遇到了可视化界面中的光标位置和真实elf执行流程不匹配的问题,总体下来,感觉不是很灵活的样子,不止是vscode,clion也有这种问题;

​ 正如spoock师傅所说的,netstat工具在网络进程频繁开关socket通道的环境中性能压力是很大的,联想到之前写的网络日志采集demo中有模块是复用了部分netstat源码,但是总体效果不是非常好,原因无他,遍历proc这块的io性能压力比较大,若放到实际业务环境中,如openstack这种大流量和进程频繁操作的云环境,业务影响之大可想而知;