acrret = alloc_chrdev_region(&dev, 0, num_cpus + 1, PROBE_DEVICE_NAME); if (acrret < 0) { pr_err("could not allocate major number for %s\n", PROBE_DEVICE_NAME); ret = -ENOMEM; goto init_module_err; }
g_ppm_class = class_create(THIS_MODULE, PROBE_DEVICE_NAME); if (IS_ERR(g_ppm_class)) { pr_err("can't allocate device class\n"); ret = -EFAULT; goto init_module_err; }
if (cdev_add(&g_ppm_devs[j].cdev, g_ppm_devs[j].dev, 1) < 0) { pr_err("could not allocate chrdev for %s\n", PROBE_DEVICE_NAME); ret = -EFAULT; goto init_module_err; }
if (IS_ERR(device)) { pr_err("error creating the device for %s\n", PROBE_DEVICE_NAME); cdev_del(&g_ppm_devs[j].cdev); ret = -EFAULT; goto init_module_err; }
/** * cdev_init() - initialize a cdev structure * @cdev: the structure to initialize * @fops: the file_operations for this device * * Initializes @cdev, remembering @fops, making it ready to add to the * system with cdev_add(). */ voidcdev_init(struct cdev *cdev, const struct file_operations *fops) { memset(cdev, 0, sizeof *cdev); INIT_LIST_HEAD(&cdev->list); kobject_init(&cdev->kobj, &ktype_cdev_default); cdev->ops = fops; }
/** * cdev_add() - add a char device to the system * @p: the cdev structure for the device * @dev: the first device number for which this device is responsible * @count: the number of consecutive minor numbers corresponding to this * device * * cdev_add() adds the device represented by @p to the system, making it * live immediately. A negative error code is returned on failure. */ intcdev_add(struct cdev *p, dev_t dev, unsigned count) { int error;
if (dpi_lookahead_init() != PPM_SUCCESS) { pr_err("initializing lookahead-based snaplen failed\n"); ret = -EFAULT; goto init_module_err; }
/* * Set up our callback in case we get a hotplug even while we are * initializing the cpu structures */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)) hp_ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "sysdig/probe:online", sysdig_cpu_online, sysdig_cpu_offline); if (hp_ret <= 0) { pr_err("error registering cpu hotplug callback\n"); ret = hp_ret; goto init_module_err; } hp_state = hp_ret; #else register_cpu_notifier(&cpu_notifier); #endif
/* * All ok. Final initializations. */ g_tracepoint_registered = false;
TRACEPOINT_PROBE(syscall_exit_probe, struct pt_regs *regs, long ret) { int id; long table_index; conststructsyscall_evt_pair *cur_g_syscall_table = g_syscall_table; constenum ppm_syscall_code *cur_g_syscall_code_routing_table = g_syscall_code_routing_table; bool compat = false; #ifdef __NR_socketcall int socketcall_syscall = __NR_socketcall; #else int socketcall_syscall = -1; #endif
id = syscall_get_nr(current, regs);
#if defined(CONFIG_X86_64) && defined(CONFIG_IA32_EMULATION) /* * When a process does execve from 64bit to 32bit, TS_COMPAT is marked true * but the id of the syscall is __NR_execve, so to correctly parse it we need to * use 64bit syscall table. On 32bit __NR_execve is equal to __NR_ia32_oldolduname * which is a very old syscall, not used anymore by most applications */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0) if (in_ia32_syscall() && id != __NR_execve) { #else if (unlikely((task_thread_info(current)->status & TS_COMPAT) && id != __NR_execve)) { #endif cur_g_syscall_table = g_syscall_ia32_table; cur_g_syscall_code_routing_table = g_syscall_ia32_code_routing_table; socketcall_syscall = __NR_ia32_socketcall; compat = true; } #endif
g_n_tracepoint_hit_inc();
table_index = id - SYSCALL_TABLE_ID0; if (likely(table_index >= 0 && table_index < SYSCALL_TABLE_SIZE)) { structevent_data_tevent_data; int used = cur_g_syscall_table[table_index].flags & UF_USED; enum syscall_flags drop_flags = cur_g_syscall_table[table_index].flags; enum ppm_event_type type;
/* * Simple mode event filtering */ if (g_simple_mode_enabled) { if ((drop_flags & UF_SIMPLEDRIVER_KEEP) == 0) { return; } }
#ifdef _HAS_SOCKETCALL if (id == socketcall_syscall) { used = true; drop_flags = UF_NEVER_DROP; type = PPME_GENERIC_X; } else type = cur_g_syscall_table[table_index].exit_event_type; #else type = cur_g_syscall_table[table_index].exit_event_type; #endif
if (used) record_event_all_consumers(type, drop_flags, &event_data); else record_event_all_consumers(PPME_GENERIC_X, UF_ALWAYS_DROP, &event_data); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/** * syscall_get_nr - find what system call a task is executing * @task: task of interest, must be blocked * @regs: task_pt_regs() of @task * * If @task is executing a system call or is at system call * tracing about to attempt one, returns the system call number. * If @task is not executing a system call, i.e. it's blocked * inside the kernel for a fault or signal, returns -1. * * Note this returns int even on 64-bit machines. Only 32 bits of * system call number can be meaningful. If the actual arch value * is 64 bits, this truncates to 32 bits so 0xffffffff means -1. * * It's only valid to call this when @task is known to be blocked. */ intsyscall_get_nr(struct task_struct *task, struct pt_regs *regs);
/* * Make sure we have enough space for the event header. * We need at least space for the header plus 16 bit per parameter for the lengths. */ if (likely(freespace >= sizeof(struct ppm_evt_hdr) + args.arg_data_offset)) { /* * Populate the header */ structppm_evt_hdr *hdr = (structppm_evt_hdr *)(ring->buffer + head);
res = val_to_ring(args, val, 0, false, 0); if (unlikely(res != PPM_SUCCESS)) return res;
/* * name */ syscall_get_arguments_deprecated(current, args->regs, 1, 1, &val); res = val_to_ring(args, val, 0, true, 0); if (unlikely(res != PPM_SUCCESS)) return res;
/* * Flags * Note that we convert them into the ppm portable representation before pushing them to the ring */ syscall_get_arguments_deprecated(current, args->regs, 2, 1, &flags); res = val_to_ring(args, open_flags_to_scap(flags), 0, false, 0); if (unlikely(res != PPM_SUCCESS)) return res;
switch (param_info->type) { case PT_CHARBUF: case PT_FSPATH: if (likely(val != 0)) { if (fromuser) { len = ppm_strncpy_from_user(args->buffer + args->arg_data_offset, (constchar __user *)(unsignedlong)val, max_arg_size);
if (unlikely(len < 0)) return PPM_FAILURE_INVALID_USER_MEMORY; } else { len = strlcpy(args->buffer + args->arg_data_offset, (constchar *)(unsignedlong)val, max_arg_size);
if (++len > max_arg_size) len = max_arg_size; }
/* * Make sure the string is null-terminated */ *(char *)(args->buffer + args->arg_data_offset + len) = 0; } else { /* * Handle NULL pointers */ len = strlcpy(args->buffer + args->arg_data_offset, "(NULL)", max_arg_size);
if (++len > max_arg_size) len = max_arg_size; }
break; case PT_BYTEBUF: if (likely(val != 0)) { if (fromuser) { /* * Copy the lookahead portion of the buffer that we will use DPI-based * snaplen calculation */ u32 dpi_lookahead_size = DPI_LOOKAHEAD_SIZE;
if (dpi_lookahead_size > val_len) dpi_lookahead_size = val_len;
if (unlikely(dpi_lookahead_size >= max_arg_size)) return PPM_FAILURE_BUFFER_FULL;
len = (int)ppm_copy_from_user(args->buffer + args->arg_data_offset, (constvoid __user *)(unsignedlong)val, dpi_lookahead_size);
if (unlikely(len != 0)) return PPM_FAILURE_INVALID_USER_MEMORY;
/* * Check if there's more to copy */ if (likely((dpi_lookahead_size != val_len))) { /* * Calculate the snaplen */ if (likely(args->enforce_snaplen)) { u32 sl = args->consumer->snaplen;
len = val_len; } } else { /* * Handle NULL pointers */ len = 0; }
break; case PT_SOCKADDR: case PT_SOCKTUPLE: case PT_FDLIST: if (likely(val != 0)) { if (unlikely(val_len >= max_arg_size)) return PPM_FAILURE_BUFFER_FULL;
if (fromuser) { len = (int)ppm_copy_from_user(args->buffer + args->arg_data_offset, (constvoid __user *)(unsignedlong)val, val_len);
if (unlikely(len != 0)) return PPM_FAILURE_INVALID_USER_MEMORY;
len = val_len; } } else { /* * Handle NULL pointers */ len = 0; }
break; case PT_FLAGS8: case PT_UINT8: case PT_SIGTYPE: if (likely(max_arg_size >= sizeof(u8))) { *(u8 *)(args->buffer + args->arg_data_offset) = (u8)val; len = sizeof(u8); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_FLAGS16: case PT_UINT16: case PT_SYSCALLID: if (likely(max_arg_size >= sizeof(u16))) { *(u16 *)(args->buffer + args->arg_data_offset) = (u16)val; len = sizeof(u16); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_FLAGS32: case PT_UINT32: case PT_MODE: case PT_UID: case PT_GID: case PT_SIGSET: if (likely(max_arg_size >= sizeof(u32))) { *(u32 *)(args->buffer + args->arg_data_offset) = (u32)val; len = sizeof(u32); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_RELTIME: case PT_ABSTIME: case PT_UINT64: if (likely(max_arg_size >= sizeof(u64))) { *(u64 *)(args->buffer + args->arg_data_offset) = (u64)val; len = sizeof(u64); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_INT8: if (likely(max_arg_size >= sizeof(s8))) { *(s8 *)(args->buffer + args->arg_data_offset) = (s8)(long)val; len = sizeof(s8); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_INT16: if (likely(max_arg_size >= sizeof(s16))) { *(s16 *)(args->buffer + args->arg_data_offset) = (s16)(long)val; len = sizeof(s16); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_INT32: if (likely(max_arg_size >= sizeof(s32))) { *(s32 *)(args->buffer + args->arg_data_offset) = (s32)(long)val; len = sizeof(s32); } else { return PPM_FAILURE_BUFFER_FULL; }
break; case PT_INT64: case PT_ERRNO: case PT_FD: case PT_PID: if (likely(max_arg_size >= sizeof(s64))) { *(s64 *)(args->buffer + args->arg_data_offset) = (s64)(long)val; len = sizeof(s64); } else { return PPM_FAILURE_BUFFER_FULL; }
break; default: ASSERT(0); #ifndef UDIG pr_err("val_to_ring: invalid argument type %d. Event %u (%s) might have less parameters than what has been declared in nparams\n", (int)g_event_info[args->event_type].params[args->curarg].type, (u32)args->event_type, g_event_info[args->event_type].name); #endif return PPM_FAILURE_BUG; }
if (likely(cbres == PPM_SUCCESS)) { /* * Validate that the filler added the right number of parameters */ if (likely(args.curarg == args.nargs)) { /* * The event was successfully inserted in the buffer */ event_size = sizeof(struct ppm_evt_hdr) + args.arg_data_offset; hdr->len = event_size; drop = 0; } else { pr_err("corrupted filler for event type %d (added %u args, should have added %u)\n", event_type, args.curarg, args.nargs); ASSERT(0); } } }
if (unlikely(next >= RING_BUF_SIZE)) { /* * If something has been written in the cushion space at the end of * the buffer, copy it to the beginning and wrap the head around. * Note, we don't check that the copy fits because we assume that * filler_callback failed if the space was not enough. */ if (next > RING_BUF_SIZE) { memcpy(ring->buffer, ring->buffer + RING_BUF_SIZE, next - RING_BUF_SIZE); }
next -= RING_BUF_SIZE; }
/* * Make sure all the memory has been written in real memory before * we update the head and the user space process (on another CPU) * can access the buffer. */ smp_wmb();