staticlongsyscall_trace_enter(struct pt_regs *regs, long syscall, unsignedlong work) { long ret = 0;
/* * Handle Syscall User Dispatch. This must comes first, since * the ABI here can be something that doesn't make sense for * other syscall_work features. */ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) { if (syscall_user_dispatch(regs)) return-1L; }
/* Handle ptrace */ if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { ret = ptrace_report_syscall_entry(regs); if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) return-1L; }
/* Do seccomp after ptrace, to catch any tracer changes. */ if (work & SYSCALL_WORK_SECCOMP) { ret = __secure_computing(NULL); if (ret == -1L) return ret; }
/* Either of the above might have changed the syscall number */ syscall = syscall_get_nr(current, regs);
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall);
static __always_inline long __syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsignedlong work = READ_ONCE(current_thread_info()->syscall_work);
if (work & SYSCALL_WORK_ENTER) syscall = syscall_trace_enter(regs, syscall, work);
/* * this isn't the same as continuing with a signal, but it will do * for normal use. strace only continues with a signal if the * stopping signal is not SIGTRAP. -brl */ if (signr) send_sig(signr, current, 1);
return fatal_signal_pending(current); }
/** * ptrace_report_syscall_entry - task is about to attempt a system call * @regs: user register state of current task * * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just * entered the kernel for a system call. Full user register state is * available here. Changing the values in @regs can affect the system * call number and arguments to be tried. It is safe to block here, * preventing the system call from beginning. * * Returns zero normally, or nonzero if the calling arch code should abort * the system call. That must prevent normal entry so no system call is * made. If @task ever returns to user mode after this, its register state * is unspecified, but should be something harmless like an %ENOSYS error * return. It should preserve enough information so that syscall_rollback() * can work (see asm-generic/syscall.h). * * Called without locks, just after entering kernel mode. */ staticinline __must_check intptrace_report_syscall_entry( struct pt_regs *regs) { return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); }
正如注释所说,通过ptrace拦截系统调用后,对于寄存器的修改,都是在这个时间发生的。
This will be called if %SYSCALL_WORK_SYSCALL_TRACE or
%SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just
entered the kernel for a system call. Full user register state is
available here. Changing the values in @regs can affect the system
call number and arguments to be tried. It is safe to block here,
preventing the system call from beginning.>
/* * We can return 0 to resume the syscall or anything else to go to phase * 2. If we resume the syscall, we need to put something appropriate in * regs->orig_ax. * * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax * are fully functional. * * For phase 2's benefit, our return value is: * 0: resume the syscall * 1: go to phase 2; no seccomp phase 2 needed * anything else: go to phase 2; pass return value to seccomp */ unsignedlongsyscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) { structthread_info *ti = pt_regs_to_thread_info(regs); unsignedlong ret = 0; u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current));
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
#ifdef CONFIG_SECCOMP /* * Do seccomp first -- it should minimize exposure of other * code, and keeping seccomp fast is probably more valuable * than the rest of this. */ if (work & _TIF_SECCOMP) { structseccomp_datasd;
ret = seccomp_phase1(&sd); if (ret == SECCOMP_PHASE1_SKIP) { regs->orig_ax = -1; ret = 0; } elseif (ret != SECCOMP_PHASE1_OK) { return ret; /* Go directly to phase 2 */ }
work &= ~_TIF_SECCOMP; } #endif
/* Do our best to finish without phase 2. */ if (work == 0) return ret; /* seccomp and/or nohz only (ret == 0 here) */
#ifdef CONFIG_AUDITSYSCALL if (work == _TIF_SYSCALL_AUDIT) { /* * If there is no more work to be done except auditing, * then audit in phase 1. Phase 2 always audits, so, if * we audit here, then we can't go on to phase 2. */ do_audit_syscall_entry(regs, arch); return0; } #endif
return1; /* Something is enabled that we can't handle in phase 1 */ }
/* Returns the syscall nr to run (which should match regs->orig_ax). */ longsyscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, unsignedlong phase1_result) { structthread_info *ti = pt_regs_to_thread_info(regs); long ret = 0; u32 work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current));
#ifdef CONFIG_SECCOMP /* * Call seccomp_phase2 before running the other hooks so that * they can see any changes made by a seccomp tracer. */ if (phase1_result > 1 && seccomp_phase2(phase1_result)) { /* seccomp failures shouldn't expose any additional code. */ return-1; } #endif
if (unlikely(work & _TIF_SYSCALL_EMU)) ret = -1L;
if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && tracehook_report_syscall_entry(regs)) ret = -1L;
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax);