aboutsummaryrefslogtreecommitdiffstats
path: root/exec
diff options
context:
space:
mode:
authorPo Lu2024-07-01 18:11:58 +0800
committerPo Lu2024-07-01 18:11:58 +0800
commitebf5bcb9f0b6adeb97a3918b8f3845844c9091b0 (patch)
treef7660b9380f715d48dfe4ff30a57e12965b6166d /exec
parent7c8d4e96ba6db19bdca20a87bafed024a84eb517 (diff)
downloademacs-ebf5bcb9f0b6adeb97a3918b8f3845844c9091b0.tar.gz
emacs-ebf5bcb9f0b6adeb97a3918b8f3845844c9091b0.zip
Optimize process execution on Android
* exec/configure.ac (REENTRANT): Remove option for reentrancy. (PROGRAM_COUNTER, HAVE_SECCOMP): Define register providing the program counter and enable seccomp if its headers are available. * exec/exec.c (write_load_command): Avoid defining unused variable. (exec_0): Remove code specific to REENTRANT configurations. * exec/exec.h (struct exec_tracee) <exec_data, data_size>: New fields for loader instructions and their size. * exec/exec1.c (main): Call exec_init before forking. * exec/mipsel-user.h (ELF_NGREG): Delete definition. (struct mipsel_regs): Reduce number of gregs to 32, but introduce separate fields for special registers. * exec/trace.c (use_seccomp_p): New variable; defile to false if !HAVE_SECCOMP. (remove_tracee): Cease providing for non-reentrant configurations. Release executable data if present. (handle_clone_prepare): Likewise. Resume process with PTRACE_CONT if seccomp-based interception is enabled. (handle_clone, check_signal): Resume processes as above. (handle_exec): Divide into two functions, with only rewriting the system call and generating instructions for the loader remaining in the first, and copying such instructions into the loader's stack removed into a new function, `finish_exec'. (finish_exec): New function. (handle_readlinkat, handle_openat): Abolish non-REENTRANT configurations. (process_system_call): Divide exec system calls into two phases, disambiguated by the value of tracee->waiting_for_syscall. Typo fixes. Accommodate syscall-exit-stops where the signal was initially intercepted by `seccomp_system_call'. (interesting_syscalls): New array. (ARRAYELTS): New macro. (seccomp_system_call, establish_seccomp_filter): New function. (tracing_execve) [HAVE_SECCOMP]: Establish a seccomp filter if this is to be enabled. (after_fork): Provide PTRACE_O_TRACESECCOMP. Resume process with PTRACE_CONT if seccomp-based interception is enabled. (exec_waitpid): Resume process with PTRACE_CONT if seccomp-based interception is enabled. Dispatch stops identifying as PTRACE_EVENT_SECCOMP to `seccomp_system_call'. (exec_init): Establish whether it is possible to enable seccomp.
Diffstat (limited to 'exec')
-rw-r--r--exec/configure.ac26
-rw-r--r--exec/exec.c17
-rw-r--r--exec/exec.h15
-rw-r--r--exec/exec1.c6
-rw-r--r--exec/mipsel-user.h14
-rw-r--r--exec/trace.c807
6 files changed, 666 insertions, 219 deletions
diff --git a/exec/configure.ac b/exec/configure.ac
index 5be8a983718..c3e895740be 100644
--- a/exec/configure.ac
+++ b/exec/configure.ac
@@ -42,11 +42,6 @@ General Public License for more details.
42You should have received a copy of the GNU General Public License 42You should have received a copy of the GNU General Public License
43along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */]) 43along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */])
44 44
45AC_ARG_WITH([reentrancy],
46 [AS_HELP_STRING([--with-reentrancy],
47 [Generate library which can be used within a signal handler.])],
48 [AC_DEFINE([REENTRANT], [1])])
49
50AC_USE_SYSTEM_EXTENSIONS 45AC_USE_SYSTEM_EXTENSIONS
51AC_PROG_CC 46AC_PROG_CC
52AC_PROG_CPP 47AC_PROG_CPP
@@ -74,9 +69,9 @@ AC_CHECK_FUNC([process_vm_readv],
74 ]])])]) 69 ]])])])
75AC_CHECK_HEADERS([sys/param.h sys/uio.h]) 70AC_CHECK_HEADERS([sys/param.h sys/uio.h])
76AC_CHECK_MEMBERS([siginfo_t.si_syscall], [], [], 71AC_CHECK_MEMBERS([siginfo_t.si_syscall], [], [],
77 [[ 72[[
78#include <signal.h> 73#include <signal.h>
79 ]]) 74]])
80 75
81AH_BOTTOM([ 76AH_BOTTOM([
82#ifdef HAVE_STDBOOL_H 77#ifdef HAVE_STDBOOL_H
@@ -120,6 +115,7 @@ AH_TEMPLATE([SYSCALL_ARG2_REG], [Define to register holding arg2 to system calls
120AH_TEMPLATE([SYSCALL_ARG3_REG], [Define to register holding arg3 to system calls.]) 115AH_TEMPLATE([SYSCALL_ARG3_REG], [Define to register holding arg3 to system calls.])
121AH_TEMPLATE([SYSCALL_RET_REG], [Define to register holding value of system calls.]) 116AH_TEMPLATE([SYSCALL_RET_REG], [Define to register holding value of system calls.])
122AH_TEMPLATE([STACK_POINTER], [Define to register holding the stack pointer.]) 117AH_TEMPLATE([STACK_POINTER], [Define to register holding the stack pointer.])
118AH_TEMPLATE([PROGRAM_COUNTER], [Define to register holding the program counter.])
123AH_TEMPLATE([EXEC_SYSCALL], [Define to number of the `exec' system call.]) 119AH_TEMPLATE([EXEC_SYSCALL], [Define to number of the `exec' system call.])
124AH_TEMPLATE([USER_WORD], [Define to word type used by tracees.]) 120AH_TEMPLATE([USER_WORD], [Define to word type used by tracees.])
125AH_TEMPLATE([USER_SWORD], [Define to signed word type used by tracees.]) 121AH_TEMPLATE([USER_SWORD], [Define to signed word type used by tracees.])
@@ -134,7 +130,8 @@ AH_TEMPLATE([READLINK_SYSCALL], [Define to number of the `readlink' system call.
134AH_TEMPLATE([READLINKAT_SYSCALL], [Define to number of the `readlinkat' system call.]) 130AH_TEMPLATE([READLINKAT_SYSCALL], [Define to number of the `readlinkat' system call.])
135AH_TEMPLATE([OPEN_SYSCALL], [Define to number of the `open' system call.]) 131AH_TEMPLATE([OPEN_SYSCALL], [Define to number of the `open' system call.])
136AH_TEMPLATE([OPENAT_SYSCALL], [Define to number of the `openat' system call.]) 132AH_TEMPLATE([OPENAT_SYSCALL], [Define to number of the `openat' system call.])
137AH_TEMPLATE([REENTRANT], [Define to 1 if the library is used within a signal handler.]) 133AH_TEMPLATE([HAVE_SECCOMP], [Define to 1 if secure computing filters are available
134to accelerate interception of system calls.])
138 135
139AC_CANONICAL_HOST 136AC_CANONICAL_HOST
140 137
@@ -250,6 +247,7 @@ AS_CASE([$host], [x86_64-*linux*],
250 AC_DEFINE([SYSCALL_ARG2_REG], [rdx]) 247 AC_DEFINE([SYSCALL_ARG2_REG], [rdx])
251 AC_DEFINE([SYSCALL_ARG3_REG], [r10]) 248 AC_DEFINE([SYSCALL_ARG3_REG], [r10])
252 AC_DEFINE([STACK_POINTER], [rsp]) 249 AC_DEFINE([STACK_POINTER], [rsp])
250 AC_DEFINE([PROGRAM_COUNTER], [rip])
253 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 251 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
254 AC_DEFINE([USER_WORD], [uintptr_t]) 252 AC_DEFINE([USER_WORD], [uintptr_t])
255 AC_DEFINE([USER_SWORD], [intptr_t]) 253 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -283,6 +281,7 @@ AS_CASE([$host], [x86_64-*linux*],
283 AC_DEFINE([SYSCALL_ARG2_REG], [edx]) 281 AC_DEFINE([SYSCALL_ARG2_REG], [edx])
284 AC_DEFINE([SYSCALL_ARG3_REG], [esi]) 282 AC_DEFINE([SYSCALL_ARG3_REG], [esi])
285 AC_DEFINE([STACK_POINTER], [esp]) 283 AC_DEFINE([STACK_POINTER], [esp])
284 AC_DEFINE([PROGRAM_COUNTER], [eip])
286 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 285 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
287 AC_DEFINE([USER_WORD], [uintptr_t]) 286 AC_DEFINE([USER_WORD], [uintptr_t])
288 AC_DEFINE([USER_SWORD], [intptr_t]) 287 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -314,6 +313,7 @@ AS_CASE([$host], [x86_64-*linux*],
314 AC_DEFINE([SYSCALL_ARG2_REG], [[regs[2]]]) 313 AC_DEFINE([SYSCALL_ARG2_REG], [[regs[2]]])
315 AC_DEFINE([SYSCALL_ARG3_REG], [[regs[3]]]) 314 AC_DEFINE([SYSCALL_ARG3_REG], [[regs[3]]])
316 AC_DEFINE([STACK_POINTER], [sp]) 315 AC_DEFINE([STACK_POINTER], [sp])
316 AC_DEFINE([PROGRAM_COUNTER], [pc])
317 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 317 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
318 AC_DEFINE([USER_WORD], [uintptr_t]) 318 AC_DEFINE([USER_WORD], [uintptr_t])
319 AC_DEFINE([USER_SWORD], [intptr_t]) 319 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -346,6 +346,7 @@ AS_CASE([$host], [x86_64-*linux*],
346 AC_DEFINE([SYSCALL_ARG2_REG], [[uregs[2]]]) 346 AC_DEFINE([SYSCALL_ARG2_REG], [[uregs[2]]])
347 AC_DEFINE([SYSCALL_ARG3_REG], [[uregs[3]]]) 347 AC_DEFINE([SYSCALL_ARG3_REG], [[uregs[3]]])
348 AC_DEFINE([STACK_POINTER], [[uregs[13]]]) 348 AC_DEFINE([STACK_POINTER], [[uregs[13]]])
349 AC_DEFINE([PROGRAM_COUNTER], [[uregs[15]]])
349 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 350 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
350 AC_DEFINE([USER_WORD], [uintptr_t]) 351 AC_DEFINE([USER_WORD], [uintptr_t])
351 AC_DEFINE([USER_SWORD], [intptr_t]) 352 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -371,6 +372,7 @@ AS_CASE([$host], [x86_64-*linux*],
371 AC_DEFINE([SYSCALL_ARG2_REG], [[uregs[2]]]) 372 AC_DEFINE([SYSCALL_ARG2_REG], [[uregs[2]]])
372 AC_DEFINE([SYSCALL_ARG3_REG], [[uregs[3]]]) 373 AC_DEFINE([SYSCALL_ARG3_REG], [[uregs[3]]])
373 AC_DEFINE([STACK_POINTER], [[uregs[13]]]) 374 AC_DEFINE([STACK_POINTER], [[uregs[13]]])
375 AC_DEFINE([STACK_POINTER], [[uregs[15]]])
374 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 376 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
375 AC_DEFINE([USER_WORD], [uintptr_t]) 377 AC_DEFINE([USER_WORD], [uintptr_t])
376 AC_DEFINE([USER_SWORD], [intptr_t]) 378 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -402,6 +404,7 @@ AS_CASE([$host], [x86_64-*linux*],
402 AC_DEFINE([SYSCALL_ARG2_REG], [[gregs[4]]]) # a2 404 AC_DEFINE([SYSCALL_ARG2_REG], [[gregs[4]]]) # a2
403 AC_DEFINE([SYSCALL_ARG3_REG], [[gregs[5]]]) # a3 405 AC_DEFINE([SYSCALL_ARG3_REG], [[gregs[5]]]) # a3
404 AC_DEFINE([STACK_POINTER], [[gregs[29]]]) # sp 406 AC_DEFINE([STACK_POINTER], [[gregs[29]]]) # sp
407 AC_DEFINE([PROGRAM_COUNTER], [[cp0_epc]]) # pc
405 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 408 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
406 AC_DEFINE([USER_WORD], [uintptr_t]) 409 AC_DEFINE([USER_WORD], [uintptr_t])
407 AC_DEFINE([USER_SWORD], [intptr_t]) 410 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -432,6 +435,7 @@ AS_CASE([$host], [x86_64-*linux*],
432 AC_DEFINE([SYSCALL_ARG2_REG], [[gregs[4]]]) # a2 435 AC_DEFINE([SYSCALL_ARG2_REG], [[gregs[4]]]) # a2
433 AC_DEFINE([SYSCALL_ARG3_REG], [[gregs[5]]]) # a3 436 AC_DEFINE([SYSCALL_ARG3_REG], [[gregs[5]]]) # a3
434 AC_DEFINE([STACK_POINTER], [[gregs[29]]]) # sp 437 AC_DEFINE([STACK_POINTER], [[gregs[29]]]) # sp
438 AC_DEFINE([PROGRAM_COUNTER], [[cp0_epc]]) # pc
435 AC_DEFINE([EXEC_SYSCALL], [__NR_execve]) 439 AC_DEFINE([EXEC_SYSCALL], [__NR_execve])
436 AC_DEFINE([USER_WORD], [uintptr_t]) 440 AC_DEFINE([USER_WORD], [uintptr_t])
437 AC_DEFINE([USER_SWORD], [intptr_t]) 441 AC_DEFINE([USER_SWORD], [intptr_t])
@@ -480,6 +484,12 @@ AC_ARG_VAR([LOADERFLAGS], [Flags used to link the loader.])
480AC_ARG_VAR([ARFLAGS], [Flags for the archiver.]) 484AC_ARG_VAR([ARFLAGS], [Flags for the archiver.])
481AC_ARG_VAR([ASFLAGS], [Flags for the assembler.]) 485AC_ARG_VAR([ASFLAGS], [Flags for the assembler.])
482 486
487# Search for seccomp headers and declarations.
488AC_CHECK_HEADERS([linux/seccomp.h linux/filter.h],
489 [AC_CHECK_DECLS([SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_RET_TRACE],
490 [AC_DEFINE([HAVE_SECCOMP], [1])], [],
491 [[#include <linux/seccomp.h>]])])
492
483# Make the assembler optimize for code size. Don't do this on MIPS, 493# Make the assembler optimize for code size. Don't do this on MIPS,
484# as the assembler code manages branch delays manually. 494# as the assembler code manages branch delays manually.
485 495
diff --git a/exec/exec.c b/exec/exec.c
index cbe22d4f18c..fad345c532c 100644
--- a/exec/exec.c
+++ b/exec/exec.c
@@ -292,7 +292,9 @@ write_load_command (program_header *header, bool use_alternate,
292 struct exec_map_command command1; 292 struct exec_map_command command1;
293 USER_WORD start, end; 293 USER_WORD start, end;
294 bool need_command1; 294 bool need_command1;
295#ifndef PAGE_MASK
295 static long pagesize; 296 static long pagesize;
297#endif /* !PAGE_MASK */
296 298
297 /* First, write the commands necessary to map the specified segment 299 /* First, write the commands necessary to map the specified segment
298 itself. 300 itself.
@@ -306,14 +308,14 @@ write_load_command (program_header *header, bool use_alternate,
306#ifdef HAVE_GETPAGESIZE 308#ifdef HAVE_GETPAGESIZE
307 if (!pagesize) 309 if (!pagesize)
308 pagesize = getpagesize (); 310 pagesize = getpagesize ();
309#else /* HAVE_GETPAGESIZE */ 311#else /* !HAVE_GETPAGESIZE */
310 if (!pagesize) 312 if (!pagesize)
311 pagesize = sysconf (_SC_PAGESIZE); 313 pagesize = sysconf (_SC_PAGESIZE);
312#endif /* HAVE_GETPAGESIZE */ 314#endif /* !HAVE_GETPAGESIZE */
313 315
314#define PAGE_MASK (~(pagesize - 1)) 316#define PAGE_MASK (~(pagesize - 1))
315#define PAGE_SIZE (pagesize) 317#define PAGE_SIZE (pagesize)
316#endif /* PAGE_MASK */ 318#endif /* !PAGE_MASK */
317 319
318 start = header->p_vaddr & PAGE_MASK; 320 start = header->p_vaddr & PAGE_MASK;
319 end = ((header->p_vaddr + header->p_filesz 321 end = ((header->p_vaddr + header->p_filesz
@@ -895,10 +897,6 @@ format_pid (char *in, unsigned int pid)
895 with #!; in that case, find the program to open and use that 897 with #!; in that case, find the program to open and use that
896 instead. 898 instead.
897 899
898 If REENTRANT is not defined, NAME is actually a buffer of size
899 PATH_MAX + 80. In that case, copy over the file name actually
900 opened.
901
902 Next, read the executable header, and add the necessary memory 900 Next, read the executable header, and add the necessary memory
903 mappings for each file. Finally, return the action data and its 901 mappings for each file. Finally, return the action data and its
904 size in *SIZE. 902 size in *SIZE.
@@ -976,11 +974,6 @@ exec_0 (char *name, struct exec_tracee *tracee,
976 rewrite = buffer1 + link_size; 974 rewrite = buffer1 + link_size;
977 remaining = buffer1 + sizeof buffer1 - rewrite - 1; 975 remaining = buffer1 + sizeof buffer1 - rewrite - 1;
978 memcpy (rewrite, name, strnlen (name, remaining)); 976 memcpy (rewrite, name, strnlen (name, remaining));
979
980 /* Replace name with buffer1. */
981#ifndef REENTRANT
982 strcpy (name, buffer1);
983#endif /* REENTRANT */
984 } 977 }
985 } 978 }
986 979
diff --git a/exec/exec.h b/exec/exec.h
index 3ce06c35311..59963587573 100644
--- a/exec/exec.h
+++ b/exec/exec.h
@@ -152,6 +152,16 @@ struct exec_tracee
152 completion. */ 152 completion. */
153 USER_WORD sp; 153 USER_WORD sp;
154 154
155 /* Name of the executable being run. */
156 char *exec_file;
157
158 /* Pointer to a storage area holding instructions for loading an
159 executable if an `exec' system call is outstanding, or NULL. */
160 char *exec_data;
161
162 /* Number of bytes in exec_data. */
163 size_t data_size;
164
155 /* The thread ID of this process. */ 165 /* The thread ID of this process. */
156 pid_t pid; 166 pid_t pid;
157 167
@@ -162,11 +172,6 @@ struct exec_tracee
162 /* Whether or not the tracee has been created but is not yet 172 /* Whether or not the tracee has been created but is not yet
163 processed by `handle_clone'. */ 173 processed by `handle_clone'. */
164 bool new_child : 1; 174 bool new_child : 1;
165
166#ifndef REENTRANT
167 /* Name of the executable being run. */
168 char *exec_file;
169#endif /* !REENTRANT */
170}; 175};
171 176
172 177
diff --git a/exec/exec1.c b/exec/exec1.c
index aaff9a94c62..cbd756d3d5c 100644
--- a/exec/exec1.c
+++ b/exec/exec1.c
@@ -42,6 +42,9 @@ main (int argc, char **argv)
42 extern char **environ; 42 extern char **environ;
43 int wstatus; 43 int wstatus;
44 44
45 /* Provide the file name of the loader. */
46 exec_init (argv[1]);
47
45 pid1 = getpid (); 48 pid1 = getpid ();
46 pid = fork (); 49 pid = fork ();
47 50
@@ -58,9 +61,6 @@ main (int argc, char **argv)
58 } 61 }
59 else 62 else
60 { 63 {
61 /* Provide the file name of the loader. */
62 exec_init (argv[1]);
63
64 if (after_fork (pid)) 64 if (after_fork (pid))
65 exit (127); 65 exit (127);
66 66
diff --git a/exec/mipsel-user.h b/exec/mipsel-user.h
index 04f4a2a5089..14d8f6d0d5e 100644
--- a/exec/mipsel-user.h
+++ b/exec/mipsel-user.h
@@ -24,10 +24,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
24 24
25#include <sys/user.h> 25#include <sys/user.h>
26 26
27#ifndef ELF_NGREG
28#define ELF_NGREG 45
29#endif /* ELF_NGREG */
30
31 27
32 28
33/* This file defines a structure containing user mode general purpose 29/* This file defines a structure containing user mode general purpose
@@ -36,7 +32,15 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
36struct mipsel_regs 32struct mipsel_regs
37{ 33{
38 /* General purpose registers. */ 34 /* General purpose registers. */
39 uint64_t gregs[ELF_NGREG]; 35 uint64_t gregs[32];
36
37 /* Saved special registers. */
38 uint64_t lo;
39 uint64_t hi;
40 uint64_t cp0_epc;
41 uint64_t cp0_badvaddr;
42 uint64_t cp0_status;
43 uint64_t cp0_cause;
40}; 44};
41 45
42#endif /* _MIPSEL_USER_H_ */ 46#endif /* _MIPSEL_USER_H_ */
diff --git a/exec/trace.c b/exec/trace.c
index 05d862f5b9f..7cf95ed5733 100644
--- a/exec/trace.c
+++ b/exec/trace.c
@@ -49,11 +49,21 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
49 49
50#ifndef SYS_SECCOMP 50#ifndef SYS_SECCOMP
51#define SYS_SECCOMP 1 51#define SYS_SECCOMP 1
52#endif /* SYS_SECCOMP */ 52#endif /* !defined SYS_SECCOMP */
53 53
54#ifndef PTRACE_GETEVENTMSG 54#ifndef PTRACE_GETEVENTMSG
55#define PTRACE_GETEVENTMSG 0x4201 55#define PTRACE_GETEVENTMSG 0x4201
56#endif /* PTRACE_GETEVENTMSG */ 56#endif /* !defined PTRACE_GETEVENTMSG */
57
58#ifdef HAVE_SECCOMP
59#include <linux/seccomp.h>
60#include <linux/filter.h>
61
62#include <sys/utsname.h>
63#include <sys/prctl.h>
64
65#include <stdio.h>
66#endif /* !defined HAVE_SECCOMP */
57 67
58 68
59 69
@@ -70,6 +80,15 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
70/* Number of tracees children are allowed to create. */ 80/* Number of tracees children are allowed to create. */
71#define MAX_TRACEES 4096 81#define MAX_TRACEES 4096
72 82
83#ifdef HAVE_SECCOMP
84
85/* Whether to enable seccomp acceleration. */
86static bool use_seccomp_p;
87
88#else /* !HAVE_SECCOMP */
89#define use_seccomp_p (false)
90#endif /* HAVE_SECCOMP */
91
73#ifdef __aarch64__ 92#ifdef __aarch64__
74 93
75/* Place PID's registers into *REGS. Return 1 upon failure, else 94/* Place PID's registers into *REGS. Return 1 upon failure, else
@@ -105,8 +124,7 @@ aarch64_set_regs (pid_t pid, USER_REGS_STRUCT *regs,
105 iov.iov_base = regs; 124 iov.iov_base = regs;
106 iov.iov_len = sizeof *regs; 125 iov.iov_len = sizeof *regs;
107 126
108 rc = ptrace (PTRACE_SETREGSET, pid, NT_PRSTATUS, 127 rc = ptrace (PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
109 &iov);
110 if (rc < 0) 128 if (rc < 0)
111 return 1; 129 return 1;
112 130
@@ -367,14 +385,17 @@ remove_tracee (struct exec_tracee *tracee)
367 /* Link the tracee onto the list of free tracees. */ 385 /* Link the tracee onto the list of free tracees. */
368 tracee->next = free_tracees; 386 tracee->next = free_tracees;
369 387
370#ifndef REENTRANT
371 /* Free the exec file, if any. */ 388 /* Free the exec file, if any. */
372 free (tracee->exec_file); 389 free (tracee->exec_file);
373 tracee->exec_file = NULL; 390 tracee->exec_file = NULL;
374#endif /* REENTRANT */
375 391
376 free_tracees = tracee; 392 /* Likewise with any loader instructions that might be
393 present. */
394 free (tracee->exec_data);
395 tracee->exec_data = NULL;
377 396
397 /* Return this tracee to the list of free ones. */
398 free_tracees = tracee;
378 return; 399 return;
379 } 400 }
380 else 401 else
@@ -419,7 +440,6 @@ find_tracee (pid_t process)
419static void 440static void
420handle_clone_prepare (struct exec_tracee *parent) 441handle_clone_prepare (struct exec_tracee *parent)
421{ 442{
422#ifndef REENTRANT
423 long rc; 443 long rc;
424 unsigned long pid; 444 unsigned long pid;
425 struct exec_tracee *tracee; 445 struct exec_tracee *tracee;
@@ -440,7 +460,8 @@ handle_clone_prepare (struct exec_tracee *parent)
440 assert (tracee->new_child); 460 assert (tracee->new_child);
441 tracee->new_child = false; 461 tracee->new_child = false;
442 tracee->exec_file = NULL; 462 tracee->exec_file = NULL;
443 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0); 463 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
464 tracee->pid, 0, 0);
444 465
445 if (parent->exec_file) 466 if (parent->exec_file)
446 tracee->exec_file = strdup (parent->exec_file); 467 tracee->exec_file = strdup (parent->exec_file);
@@ -457,12 +478,9 @@ handle_clone_prepare (struct exec_tracee *parent)
457 tracee = &static_tracees[tracees]; 478 tracee = &static_tracees[tracees];
458 tracees++; 479 tracees++;
459 } 480 }
460#ifndef REENTRANT 481 /* Try to allocate a tracee using `malloc'. */
461 /* Try to allocate a tracee using `malloc' if this library is
462 not being built to run inside a signal handler. */
463 else if ((tracee = malloc (sizeof *tracee))) 482 else if ((tracee = malloc (sizeof *tracee)))
464 ; 483 ;
465#endif /* REENTRANT */
466 else 484 else
467 return; 485 return;
468 486
@@ -477,7 +495,6 @@ handle_clone_prepare (struct exec_tracee *parent)
477 495
478 if (parent->exec_file) 496 if (parent->exec_file)
479 tracee->exec_file = strdup (parent->exec_file); 497 tracee->exec_file = strdup (parent->exec_file);
480#endif /* REENTRANT */
481} 498}
482 499
483/* Handle the completion of a `clone' or `clone3' system call, 500/* Handle the completion of a `clone' or `clone3' system call,
@@ -513,21 +530,18 @@ handle_clone (struct exec_tracee *tracee, pid_t pid)
513 tracee = &static_tracees[tracees]; 530 tracee = &static_tracees[tracees];
514 tracees++; 531 tracees++;
515 } 532 }
516#ifndef REENTRANT 533 /* Try to allocate a tracee using `malloc'. */
517 /* Try to allocate a tracee using `malloc' if this library is
518 not being built to run inside a signal handler. */
519 else if ((tracee = malloc (sizeof *tracee))) 534 else if ((tracee = malloc (sizeof *tracee)))
520 ; 535 ;
521#endif /* REENTRANT */
522 else 536 else
523 return 1; 537 return 1;
524 538
525 tracee->pid = pid; 539 tracee->pid = pid;
526 tracee->next = tracing_processes; 540 tracee->next = tracing_processes;
527 tracee->waiting_for_syscall = false; 541 tracee->waiting_for_syscall = false;
528#ifndef REENTRANT
529 tracee->exec_file = NULL; 542 tracee->exec_file = NULL;
530#endif /* REENTRANT */ 543 tracee->exec_data = NULL;
544 tracee->data_size = 0;
531 tracing_processes = tracee; 545 tracing_processes = tracee;
532 tracee->new_child = true; 546 tracee->new_child = true;
533 547
@@ -549,6 +563,11 @@ handle_clone (struct exec_tracee *tracee, pid_t pid)
549 flags |= PTRACE_O_TRACESYSGOOD; 563 flags |= PTRACE_O_TRACESYSGOOD;
550 flags |= PTRACE_O_TRACEEXIT; 564 flags |= PTRACE_O_TRACEEXIT;
551 565
566#ifdef HAVE_SECCOMP
567 if (use_seccomp_p)
568 flags |= PTRACE_O_TRACESECCOMP;
569#endif /* HAVE_SECCOMP */
570
552 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags); 571 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags);
553 572
554 if (rc) 573 if (rc)
@@ -559,7 +578,8 @@ handle_clone (struct exec_tracee *tracee, pid_t pid)
559 /* The new tracee is currently stopped. Continue it until the next 578 /* The new tracee is currently stopped. Continue it until the next
560 system call. */ 579 system call. */
561 580
562 rc = ptrace (PTRACE_SYSCALL, pid, 0, 0); 581 rc = ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
582 pid, 0, 0);
563 583
564 if (rc) 584 if (rc)
565 goto bail; 585 goto bail;
@@ -618,9 +638,11 @@ check_signal (struct exec_tracee *tracee, int status)
618 { 638 {
619 if (siginfo.si_code < 0) 639 if (siginfo.si_code < 0)
620 /* SIGTRAP delivered from userspace. Pass it on. */ 640 /* SIGTRAP delivered from userspace. Pass it on. */
621 ptrace (PTRACE_SYSCALL, tracee->pid, 0, SIGTRAP); 641 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
642 tracee->pid, 0, SIGTRAP);
622 else 643 else
623 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0); 644 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
645 tracee->pid, 0, 0);
624 646
625 return 1; 647 return 1;
626 } 648 }
@@ -639,26 +661,28 @@ check_signal (struct exec_tracee *tracee, int status)
639 it. */ 661 it. */
640#ifdef HAVE_SIGINFO_T_SI_SYSCALL 662#ifdef HAVE_SIGINFO_T_SI_SYSCALL
641#ifndef __arm__ 663#ifndef __arm__
642 ptrace (PTRACE_SYSCALL, tracee->pid, 664 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL), tracee->pid,
643 0, ((siginfo.si_code == SYS_SECCOMP 665 0, ((siginfo.si_code == SYS_SECCOMP
644 && siginfo.si_syscall == -1) 666 && siginfo.si_syscall == -1)
645 ? 0 : status)); 667 ? 0 : status));
646#else /* __arm__ */ 668#else /* __arm__ */
647 ptrace (PTRACE_SYSCALL, tracee->pid, 669 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL), tracee->pid,
648 0, ((siginfo.si_code == SYS_SECCOMP 670 0, ((siginfo.si_code == SYS_SECCOMP
649 && siginfo.si_syscall == 222) 671 && siginfo.si_syscall == 222)
650 ? 0 : status)); 672 ? 0 : status));
651#endif /* !__arm__ */ 673#endif /* !__arm__ */
652#else /* !HAVE_SIGINFO_T_SI_SYSCALL */ 674#else /* !HAVE_SIGINFO_T_SI_SYSCALL */
653 /* Drop this signal, since what caused it is unknown. */ 675 /* Drop this signal, since what caused it is unknown. */
654 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0); 676 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL), tracee->pid,
677 0, 0);
655#endif /* HAVE_SIGINFO_T_SI_SYSCALL */ 678#endif /* HAVE_SIGINFO_T_SI_SYSCALL */
656 return 1; 679 return 1;
657#endif /* SIGSYS */ 680#endif /* SIGSYS */
658 681
659 default: 682 default:
660 /* Continue the process until the next syscall. */ 683 /* Continue the process until the next syscall. */
661 ptrace (PTRACE_SYSCALL, tracee->pid, 0, status); 684 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
685 tracee->pid, 0, status);
662 return 1; 686 return 1;
663 } 687 }
664 688
@@ -667,17 +691,16 @@ check_signal (struct exec_tracee *tracee, int status)
667 691
668 692
669 693
670/* Handle an `exec' system call from the given TRACEE. REGS are the 694/* Handle the first stage of an `exec' system call from the given
671 tracee's current user-mode registers. 695 TRACEE. REGS are the tracee's current user-mode registers.
672 696
673 Rewrite the system call arguments to use the loader binary. Then, 697 Rewrite the system call arguments to use the loader binary. Then,
674 continue the system call until the loader is loaded. Write the 698 resume the process till the loader is loaded and about to begin
675 information necessary to load the original executable into the 699 execution. Save instructions to load the original executable into
676 loader's stack. 700 TRACEE->exec_data.
677 701
678 Value is 0 upon success, 1 upon a generic failure before the loader 702 Value is 0 upon success, 1 upon a generic failure before the loader
679 is loaded, 2 if the process has stopped, and 3 if something failed, 703 is loaded.
680 but it is too late to handle it.
681 704
682 Set errno appropriately upon returning a generic failure. */ 705 Set errno appropriately upon returning a generic failure. */
683 706
@@ -687,16 +710,10 @@ handle_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
687 char buffer[PATH_MAX + 80], *area; 710 char buffer[PATH_MAX + 80], *area;
688 USER_REGS_STRUCT original; 711 USER_REGS_STRUCT original;
689 size_t size, loader_size; 712 size_t size, loader_size;
690 USER_WORD loader, size1, sp; 713 USER_WORD loader;
691 int rc, wstatus;
692 siginfo_t siginfo;
693
694 /* Save the old stack pointer. */
695 sp = regs->STACK_POINTER;
696 714
697 /* Read the file name. */ 715 /* Read the file name. */
698 read_memory (tracee, buffer, PATH_MAX, 716 read_memory (tracee, buffer, PATH_MAX, regs->SYSCALL_ARG_REG);
699 regs->SYSCALL_ARG_REG);
700 717
701 /* Make sure BUFFER is NULL terminated. */ 718 /* Make sure BUFFER is NULL terminated. */
702 719
@@ -722,8 +739,18 @@ handle_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
722 return 1; 739 return 1;
723 } 740 }
724 741
725 /* Rewrite the first argument to point to the loader. */ 742 /* Save this area in the tracee. */
743 assert (!tracee->exec_data);
744 tracee->exec_data = malloc (size);
745 if (!tracee->exec_data)
746 {
747 errno = ENOMEM;
748 return 1;
749 }
750 memcpy (tracee->exec_data, area, size);
751 tracee->data_size = size;
726 752
753 /* Rewrite the first argument to point to the loader. */
727 loader_size = strlen (loader_name) + 1; 754 loader_size = strlen (loader_name) + 1;
728 loader = user_alloca (tracee, &original, regs, 755 loader = user_alloca (tracee, &original, regs,
729 loader_size); 756 loader_size);
@@ -731,14 +758,14 @@ handle_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
731 if (!loader) 758 if (!loader)
732 { 759 {
733 errno = ENOMEM; 760 errno = ENOMEM;
734 return 1; 761 goto free_data_error;
735 } 762 }
736 763
737 if (user_copy (tracee, (unsigned char *) loader_name, 764 if (user_copy (tracee, (unsigned char *) loader_name,
738 loader, loader_size)) 765 loader, loader_size))
739 { 766 {
740 errno = EIO; 767 errno = EIO;
741 return 1; 768 goto free_data_error;
742 } 769 }
743 770
744 regs->SYSCALL_ARG_REG = loader; 771 regs->SYSCALL_ARG_REG = loader;
@@ -748,151 +775,113 @@ handle_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
748 if (aarch64_set_regs (tracee->pid, regs, false)) 775 if (aarch64_set_regs (tracee->pid, regs, false))
749 { 776 {
750 errno = EIO; 777 errno = EIO;
751 return 1; 778 goto free_data_error;
752 } 779 }
753 780
754#else /* !__aarch64__ */ 781#else /* !__aarch64__ */
755 782
756 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, 783 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, regs))
757 regs))
758 { 784 {
759 errno = EIO; 785 errno = EIO;
760 return 1; 786 goto free_data_error;
761 } 787 }
762 788
763#endif /* __aarch64__ */ 789#endif /* __aarch64__ */
764 790
765 /* Continue the system call until loader starts. */ 791 /* Resume the process till the loader is executed. */
766 792
767 if (ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL)) 793 if (ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL))
768 { 794 {
769 errno = EIO; 795 errno = EIO;
770 return 1; 796 goto free_data_error;
771 } 797 }
772 798
773#ifndef REENTRANT 799 /* Now that the loader has been executed, record the value to
774 /* Now that the loader has started, record the value to use for 800 substitute for /proc/self/exe. Don't give up just because strdup
775 /proc/self/exe. Don't give up just because strdup fails. 801 fails.
776 802
777 Note that exec_0 copies the absolute file name into buffer. */ 803 Note that exec_0 copies the absolute file name into buffer. */
778 804
779 if (tracee->exec_file) 805 if (tracee->exec_file)
780 free (tracee->exec_file); 806 free (tracee->exec_file);
781 tracee->exec_file = strdup (buffer); 807 tracee->exec_file = strdup (buffer);
782#endif /* REENTRANT */ 808 return 0;
783
784 again:
785 rc = waitpid (tracee->pid, &wstatus, __WALL);
786 if (rc == -1 && errno == EINTR)
787 goto again;
788
789 if (rc < 0)
790 return 1;
791
792 if (!WIFSTOPPED (wstatus))
793 /* The process has been killed in response to a signal.
794 In this case, simply return 2. */
795 return 2;
796 else
797 {
798 /* Then, check if STATUS is not a syscall-stop, and try again if
799 it isn't. */
800 rc = check_signal (tracee, wstatus);
801
802 if (rc == -1)
803 return 2;
804 else if (rc)
805 goto again;
806
807 /* Retrieve the signal information and determine whether or not
808 the system call has completed. */
809
810 if (ptrace (PTRACE_GETSIGINFO, tracee->pid, 0,
811 &siginfo))
812 return 3;
813
814 if (!syscall_trap_p (&siginfo))
815 {
816 /* Continue. */
817 if (ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0))
818 return 3;
819
820 goto again;
821 }
822 }
823
824#ifdef __aarch64__
825
826 if (aarch64_get_regs (tracee->pid, &original))
827 return 3;
828 809
829#else /* !__aarch64__ */ 810 free_data_error:
811 free (tracee->exec_data);
812 tracee->exec_data = NULL;
813 return 1;
814}
830 815
831 /* The system call has now completed. Get the registers again. */ 816/* Complete an `exec' system call issued by TRACEE. Write the
817 instructions stored in TRACEE->exec_data to an appropriate location
818 in TRACEE's stack, and resume TRACEE, releasing TRACEE->exec_data.
819 REGS should be the TRACEE's user registers. If the reissued system
820 call did not succeed in starting the executable loader, restore
821 TRACEE->sp (recorded by process_system_call or seccomp_system_call),
822 and resume execution, so that the failure may be reported. */
832 823
833 if (ptrace (PTRACE_GETREGS, tracee->pid, NULL, 824static void
834 &original)) 825finish_exec (struct exec_tracee *tracee, USER_REGS_STRUCT *regs)
835 return 3; 826{
827 USER_WORD size1, loader;
828 USER_REGS_STRUCT original;
836 829
837#endif /* __aarch64__ */ 830 size1 = tracee->data_size;
838 831
839 *regs = original; 832 /* Record the registers' values as they originally were. */
833 memcpy (&original, regs, sizeof *regs);
840 834
841 /* Upon failure, wait for the next system call and return 835 /* Any non-zero value of `original.SYSCALL_RET_REG' indicates that the
842 success. */ 836 reissued `exec' call was unsuccessful, and the loader is not
837 executing. Restore the previous stack pointer and permit the
838 tracee to run to completion. */
843 839
844 if (original.SYSCALL_RET_REG) 840 if (original.SYSCALL_RET_REG)
845 { 841 {
846 /* Restore the original stack pointer. */ 842 regs->STACK_POINTER = tracee->sp;
847 regs->STACK_POINTER = sp;
848
849#ifdef __aarch64__ 843#ifdef __aarch64__
850 aarch64_set_regs (tracee->pid, regs, false); 844 aarch64_set_regs (tracee->pid, regs, false);
851#else /* !__aarch64__ */ 845#else /* !__aarch64__ */
852 ptrace (PTRACE_SETREGS, tracee->pid, NULL, regs); 846 ptrace (PTRACE_SETREGS, tracee->pid, NULL, regs);
853#endif /* __aarch64__ */ 847#endif /* __aarch64__ */
854 848 return;
855 goto exec_failure;
856 } 849 }
857 850
858 /* Write the loader area to the stack, followed by its size and the 851 /* Write the loader area to the stack, followed by its size and the
859 original stack pointer. */ 852 original stack pointer. */
860 853
861 loader = user_alloca (tracee, &original, regs, 854 loader = user_alloca (tracee, &original, regs,
862 size + sizeof loader * 2); 855 size1 + sizeof loader * 2);
863 if (!loader) 856 if (!loader)
864 return 3; 857 goto error;
865
866 size1 = size;
867 858
868#ifndef STACK_GROWS_DOWNWARDS 859#ifndef STACK_GROWS_DOWNWARDS
869 860 not implemented, you lose.
870 NOT_IMPLEMENTED;
871
872#else /* STACK_GROWS_DOWNWARDS */ 861#else /* STACK_GROWS_DOWNWARDS */
873 862
874 if (user_copy (tracee, (unsigned char *) area, 863 if (user_copy (tracee, (unsigned char *) tracee->exec_data,
875 loader + sizeof size1 * 2, size) 864 loader + sizeof size1 * 2, size1)
876 || user_copy (tracee, (unsigned char *) &size1, 865 || user_copy (tracee, (unsigned char *) &size1,
877 loader + sizeof size1, sizeof size1)) 866 loader + sizeof size1, sizeof size1))
878 return 3; 867 goto error;
879 868
880 size1 = original.STACK_POINTER; 869 size1 = original.STACK_POINTER;
881 870
882 if (user_copy (tracee, (unsigned char *) &size1, 871 if (user_copy (tracee, (unsigned char *) &size1,
883 loader, sizeof size1)) 872 loader, sizeof size1))
884 return 3; 873 goto error;
885 874
886#endif /* STACK_GROWS_DOWNWARDS */ 875#endif /* STACK_GROWS_DOWNWARDS */
887 876
888 /* Continue. */ 877 /* Continue. */
889 if (ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0)) 878 if (ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
890 return 3; 879 tracee->pid, 0, 0))
891 880 goto error;
892 return 0;
893 881
894 exec_failure: 882 error:
895 return 3; 883 free (tracee->exec_data);
884 tracee->exec_data = NULL;
896} 885}
897 886
898 887
@@ -1007,13 +996,6 @@ static int
1007handle_readlinkat (USER_WORD callno, USER_REGS_STRUCT *regs, 996handle_readlinkat (USER_WORD callno, USER_REGS_STRUCT *regs,
1008 struct exec_tracee *tracee, USER_WORD *result) 997 struct exec_tracee *tracee, USER_WORD *result)
1009{ 998{
1010#ifdef REENTRANT
1011 /* readlinkat cannot be handled specially when the library is built
1012 to be reentrant, as the file name information cannot be
1013 recorded. */
1014 return 0;
1015#else /* !REENTRANT */
1016
1017 char buffer[PATH_MAX + 1]; 999 char buffer[PATH_MAX + 1];
1018 USER_WORD address, return_buffer, size; 1000 USER_WORD address, return_buffer, size;
1019 size_t length; 1001 size_t length;
@@ -1086,7 +1068,6 @@ handle_readlinkat (USER_WORD callno, USER_REGS_STRUCT *regs,
1086 1068
1087 *result = length; 1069 *result = length;
1088 return 2; 1070 return 2;
1089#endif /* REENTRANT */
1090} 1071}
1091 1072
1092/* Handle an `open' or `openat' system call. 1073/* Handle an `open' or `openat' system call.
@@ -1104,12 +1085,6 @@ static int
1104handle_openat (USER_WORD callno, USER_REGS_STRUCT *regs, 1085handle_openat (USER_WORD callno, USER_REGS_STRUCT *regs,
1105 struct exec_tracee *tracee, USER_WORD *result) 1086 struct exec_tracee *tracee, USER_WORD *result)
1106{ 1087{
1107#ifdef REENTRANT
1108 /* readlinkat cannot be handled specially when the library is built
1109 to be reentrant, as the file name information cannot be
1110 recorded. */
1111 return 0;
1112#else /* !REENTRANT */
1113 char buffer[PATH_MAX + 1]; 1088 char buffer[PATH_MAX + 1];
1114 USER_WORD address; 1089 USER_WORD address;
1115 size_t length; 1090 size_t length;
@@ -1199,7 +1174,6 @@ handle_openat (USER_WORD callno, USER_REGS_STRUCT *regs,
1199 fail: 1174 fail:
1200 errno = EIO; 1175 errno = EIO;
1201 return 1; 1176 return 1;
1202#endif /* REENTRANT */
1203} 1177}
1204 1178
1205/* Process the system call at which TRACEE is stopped. If the system 1179/* Process the system call at which TRACEE is stopped. If the system
@@ -1229,30 +1203,43 @@ process_system_call (struct exec_tracee *tracee)
1229 /* Save the stack pointer. */ 1203 /* Save the stack pointer. */
1230 sp = regs.STACK_POINTER; 1204 sp = regs.STACK_POINTER;
1231 1205
1232 /* Now dispatch based on the system call. */ 1206 /* Now dispatch based on the system call. If TRACEE->exec_data is
1233 callno = regs.SYSCALL_NUM_REG; 1207 set, this must be exec, whatever the value of SYSCALL_NUM_REG,
1208 which is erased when exec loads another image. */
1209
1210 callno = (!tracee->exec_data ? regs.SYSCALL_NUM_REG : EXEC_SYSCALL);
1234 switch (callno) 1211 switch (callno)
1235 { 1212 {
1236 case EXEC_SYSCALL: 1213 case EXEC_SYSCALL:
1237 1214
1238 /* exec system calls should be handled synchronously. */ 1215 if (!tracee->waiting_for_syscall)
1239 assert (!tracee->waiting_for_syscall);
1240 rc = handle_exec (tracee, &regs);
1241
1242 switch (rc)
1243 { 1216 {
1244 case 3: 1217 /* The outstanding syscall flag must not be inconsistent with
1245 /* It's too late to do anything about this error,. */ 1218 the presence of instructions for the loader. */
1246 break; 1219 assert (!tracee->exec_data);
1220 rc = handle_exec (tracee, &regs);
1247 1221
1248 case 2: 1222 if (rc)
1249 /* The process has gone away. */ 1223 /* An error has occurred; errno is set to the error. */
1250 remove_tracee (tracee); 1224 goto report_syscall_error;
1251 break; 1225
1226 /* The process has been resumed. Assert that the instructions
1227 for loading this executable have been generated and
1228 recorded, and set waiting_for_syscall. */
1229 tracee->waiting_for_syscall = true;
1230 assert (tracee->exec_data);
1252 1231
1253 case 1: 1232 /* Record the initial stack pointer also. */
1254 /* An error has occurred; errno is set to the error. */ 1233 tracee->sp = sp;
1255 goto report_syscall_error; 1234 }
1235 else
1236 {
1237 assert (tracee->exec_data);
1238 finish_exec (tracee, &regs);
1239
1240 /* The process has been resumed and has become capable of
1241 executing independently. */
1242 tracee->waiting_for_syscall = false;
1256 } 1243 }
1257 1244
1258 break; 1245 break;
@@ -1311,7 +1298,7 @@ process_system_call (struct exec_tracee *tracee)
1311 regs.STACK_POINTER = tracee->sp; 1298 regs.STACK_POINTER = tracee->sp;
1312 1299
1313#ifdef __aarch64__ 1300#ifdef __aarch64__
1314 if (aarch64_set_regs (tracee->pid, &regs, true)) 1301 if (aarch64_set_regs (tracee->pid, &regs, false))
1315 return; 1302 return;
1316#else /* !__aarch64__ */ 1303#else /* !__aarch64__ */
1317 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, &regs)) 1304 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, &regs))
@@ -1327,11 +1314,35 @@ process_system_call (struct exec_tracee *tracee)
1327 will DTRT upon the next call to PTRACE_SYSCALL after the 1314 will DTRT upon the next call to PTRACE_SYSCALL after the
1328 syscall-trap signal is delivered. */ 1315 syscall-trap signal is delivered. */
1329 1316
1330 rc = ptrace (PTRACE_SYSCALL, tracee->pid, 1317 rc = ptrace (((use_seccomp_p
1318 /* open and openat are not processed synchronously,
1319 nor can they afford to dispense with
1320 post-syscall finalization. */
1321
1322 && ((callno != OPENAT_SYSCALL
1323#ifdef OPEN_SYSCALL
1324 && callno != OPEN_SYSCALL
1325#endif /* OPEN_SYSCALL */
1326 )
1327 /* Since syscall initialization should be
1328 reserved for seccomp_system_call, resume the
1329 process if this system call is already
1330 complete. */
1331 || !tracee->waiting_for_syscall))
1332 ? PTRACE_CONT : PTRACE_SYSCALL), tracee->pid,
1331 NULL, NULL); 1333 NULL, NULL);
1332 if (rc < 0) 1334 if (rc < 0)
1333 return; 1335 return;
1334 1336
1337#ifdef HAVE_SECCOMP
1338 if (!(use_seccomp_p
1339 && ((callno != OPENAT_SYSCALL
1340#ifdef OPEN_SYSCALL
1341 && callno != OPEN_SYSCALL
1342#endif /* OPEN_SYSCALL */
1343 )
1344 || !tracee->waiting_for_syscall)))
1345#endif /* !HAVE_SECCOMP */
1335 tracee->waiting_for_syscall = !tracee->waiting_for_syscall; 1346 tracee->waiting_for_syscall = !tracee->waiting_for_syscall;
1336 } 1347 }
1337 1348
@@ -1345,9 +1356,10 @@ process_system_call (struct exec_tracee *tracee)
1345 reporting_error = false; 1356 reporting_error = false;
1346 common: 1357 common:
1347 1358
1348 /* Reporting an error or emulating a system call works by setting 1359 /* Reporting an error or emulating a system call works by replacing
1349 the system call number to -1, letting it continue, and then 1360 the system call number with -1 or another nonexistent syscall,
1350 substituting errno for ENOSYS in the case of an error. 1361 letting it continue, and then substituting errno for ENOSYS in the
1362 case of an error.
1351 1363
1352 Make sure that the stack pointer is restored to its original 1364 Make sure that the stack pointer is restored to its original
1353 position upon exit, or bad things can happen. */ 1365 position upon exit, or bad things can happen. */
@@ -1426,13 +1438,13 @@ process_system_call (struct exec_tracee *tracee)
1426#endif /* __aarch64__ */ 1438#endif /* __aarch64__ */
1427 1439
1428 /* Now wait for the next system call to happen. */ 1440 /* Now wait for the next system call to happen. */
1429 ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL); 1441 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1442 tracee->pid, NULL, NULL);
1430 } 1443 }
1431 else 1444 else
1432 { 1445 {
1433 /* No error is being reported. Return the result in the 1446 /* No error is being reported. Return the result in the
1434 appropriate registers. */ 1447 appropriate registers. */
1435
1436#ifdef __mips__ 1448#ifdef __mips__
1437 /* MIPS systems place errno in v0 and set a3 to 1. */ 1449 /* MIPS systems place errno in v0 and set a3 to 1. */
1438 regs.gregs[2] = result; 1450 regs.gregs[2] = result;
@@ -1449,14 +1461,367 @@ process_system_call (struct exec_tracee *tracee)
1449#endif /* __aarch64__ */ 1461#endif /* __aarch64__ */
1450 1462
1451 /* Now wait for the next system call to happen. */ 1463 /* Now wait for the next system call to happen. */
1452 ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL); 1464 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1465 tracee->pid, NULL, NULL);
1453 } 1466 }
1454} 1467}
1455 1468
1456 1469
1457 1470
1471#ifdef HAVE_SECCOMP
1472
1473/* Seccomp acceleration.
1474
1475 Seccomp enables selectively filtering signals so that the tracing
1476 process is only notified of such system calls as it is interested in
1477 intercepting, i.e., exec and open*. This improves performance
1478 enormously over the traditional approach of pausing the tracee before
1479 each system call. */
1480
1481/* Whether the kernel's version is 4.7.x or earlier. */
1482static bool kernel_4_7_or_earlier;
1483
1484/* Array of system calls this module is interested in intercepting. */
1485static int interesting_syscalls[] =
1486 {
1487 EXEC_SYSCALL,
1488#ifdef OPEN_SYSCALL
1489 OPEN_SYSCALL,
1490#endif /* OPEN_SYSCALL */
1491 OPENAT_SYSCALL,
1492#ifdef READLINK_SYSCALL
1493 READLINK_SYSCALL,
1494#endif /* READLINK_SYSCALL */
1495 READLINKAT_SYSCALL,
1496 };
1497
1498/* Number of elements in an array. */
1499#define ARRAYELTS(arr) (sizeof (arr) / sizeof (arr)[0])
1500
1501/* Install a secure computing filter that will notify attached tracers
1502 when a system call of interest to this module is received. Value is
1503 0 if successful, 1 otherwise. */
1504
1505static int
1506establish_seccomp_filter (void)
1507{
1508 struct sock_filter statements[1 + ARRAYELTS (interesting_syscalls) + 2];
1509 struct sock_fprog program;
1510 int index, rc;
1511
1512 index = 0;
1513
1514 /* As the exec wrapper will reject executables for an inappropriate
1515 architecture, verifying the same here would only be redundant.
1516 Proceed to load the current system call number. */
1517
1518 statements[index++] = ((struct sock_filter)
1519 BPF_STMT (BPF_LD + BPF_W + BPF_ABS,
1520 offsetof (struct seccomp_data, nr)));
1521
1522 /* Search for system calls of interest. */
1523
1524 statements[index]
1525 = ((struct sock_filter)
1526 BPF_JUMP (BPF_JMP + BPF_JEQ + BPF_K, EXEC_SYSCALL,
1527 ARRAYELTS (interesting_syscalls), 0)); index++;
1528#ifdef OPEN_SYSCALL
1529 statements[index]
1530 = ((struct sock_filter)
1531 BPF_JUMP (BPF_JMP + BPF_JEQ + BPF_K, OPEN_SYSCALL,
1532 ARRAYELTS (interesting_syscalls) - index + 1, 0)); index++;
1533#endif /* OPEN_SYSCALL */
1534 statements[index]
1535 = ((struct sock_filter)
1536 BPF_JUMP (BPF_JMP + BPF_JEQ + BPF_K, OPENAT_SYSCALL,
1537 ARRAYELTS (interesting_syscalls) - index + 1, 0)); index++;
1538#ifdef READLINK_SYSCALL
1539 statements[index]
1540 = ((struct sock_filter)
1541 BPF_JUMP (BPF_JMP + BPF_JEQ + BPF_K, READLINK_SYSCALL,
1542 ARRAYELTS (interesting_syscalls) - index + 1, 0)); index++;
1543#endif /* READLINK_SYSCALL */
1544 statements[index]
1545 = ((struct sock_filter)
1546 BPF_JUMP (BPF_JMP + BPF_JEQ + BPF_K, READLINKAT_SYSCALL,
1547 ARRAYELTS (interesting_syscalls) - index + 1, 0)); index++;
1548
1549 /* If not intercepted above, permit this system call to execute as
1550 normal. */
1551 statements[index++]
1552 = (struct sock_filter) BPF_STMT (BPF_RET + BPF_K, SECCOMP_RET_ALLOW);
1553 statements[index++]
1554 = (struct sock_filter) BPF_STMT (BPF_RET + BPF_K, SECCOMP_RET_TRACE);
1555
1556 rc = prctl (PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1557 if (rc)
1558 return 1;
1559
1560 program.len = ARRAYELTS (statements);
1561 program.filter = statements;
1562 rc = prctl (PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &program);
1563 if (rc)
1564 return 1;
1565
1566 return 0;
1567}
1568
1569/* Intercept or resume and dismiss the system call at which TRACEE is
1570 paused, similarly to process_system_call. */
1571
1572static void
1573seccomp_system_call (struct exec_tracee *tracee)
1574{
1575 USER_REGS_STRUCT regs;
1576 int rc, wstatus, save_errno;
1577 USER_WORD callno, sp;
1578 USER_WORD result;
1579 bool reporting_error;
1580
1581 if (kernel_4_7_or_earlier)
1582 {
1583 /* On kernel 4.7 and earlier, following a PTRACE_EVENT_SECCOMP by
1584 a PTRACE_SYSCALL will give rise to a syscall-entry stop event,
1585 and seccomp filters will be suppressed till the system call
1586 runs its course. */
1587 ptrace (PTRACE_SYSCALL, tracee->pid, 0, 0);
1588 return;
1589 }
1590
1591#ifdef __aarch64__
1592 rc = aarch64_get_regs (tracee->pid, &regs);
1593#else /* !__aarch64__ */
1594 rc = ptrace (PTRACE_GETREGS, tracee->pid, NULL,
1595 &regs);
1596#endif /* __aarch64__ */
1597
1598 /* TODO: what to do if this fails? */
1599 if (rc < 0)
1600 return;
1601
1602 /* On kernel 4.8, processes resumed after being paused so as to
1603 produce a PTRACE_EVENT_SECCOMP will execute till the system call
1604 completes, or indefinitely if resumed with PTRACE_CONT.
1605
1606 In this context processes are resumed with PTRACE_CONT unless it is
1607 an `open' syscall that is being intercepted, which, if successfully
1608 intercepted, must receive adjustments to their stack pointer upon
1609 completion of said system call. */
1610 assert (!tracee->waiting_for_syscall);
1611
1612 /* Save the stack pointer. */
1613 sp = regs.STACK_POINTER;
1614
1615 /* Now dispatch based on the system call. */
1616 callno = regs.SYSCALL_NUM_REG;
1617 switch (callno)
1618 {
1619 case EXEC_SYSCALL:
1620 assert (!tracee->exec_data);
1621 rc = handle_exec (tracee, &regs);
1622
1623 if (rc)
1624 /* An error has occurred; errno is set to the error. */
1625 goto report_syscall_error;
1626
1627 /* The process has been resumed. Assert that the instructions for
1628 loading this executable have been generated and recorded, and
1629 set waiting_for_syscall. */
1630 tracee->waiting_for_syscall = true;
1631 assert (tracee->exec_data);
1632
1633 /* Record the initial stack pointer also. */
1634 tracee->sp = sp;
1635 break;
1636
1637#ifdef READLINK_SYSCALL
1638 case READLINK_SYSCALL:
1639#endif /* READLINK_SYSCALL */
1640 case READLINKAT_SYSCALL:
1641 /* Handle this readlinkat system call. */
1642 rc = handle_readlinkat (callno, &regs, tracee,
1643 &result);
1644
1645 /* rc means the same as in `handle_exec'. */
1646
1647 if (rc == 1)
1648 goto report_syscall_error;
1649 else if (rc == 2)
1650 goto emulate_syscall;
1651
1652 goto continue_syscall;
1653
1654#ifdef OPEN_SYSCALL
1655 case OPEN_SYSCALL:
1656#endif /* OPEN_SYSCALL */
1657 case OPENAT_SYSCALL:
1658 /* Handle this open system call. */
1659 rc = handle_openat (callno, &regs, tracee, &result);
1660
1661 /* rc means the same as in `handle_exec', except that `open'
1662 is never emulated. */
1663
1664 if (rc == 1)
1665 goto report_syscall_error;
1666
1667 /* The stack pointer must be restored after it was modified
1668 by `user_alloca'; record sp in TRACEE, which will be
1669 restored after this system call completes. */
1670 tracee->sp = sp;
1671
1672 /* As such, arrange to enter `process_system_call' on its
1673 completion. */
1674 rc = ptrace (PTRACE_SYSCALL, tracee->pid,
1675 NULL, NULL);
1676 if (rc < 0)
1677 return;
1678
1679 tracee->waiting_for_syscall = !tracee->waiting_for_syscall;
1680 break;
1681
1682 default:
1683 continue_syscall:
1684 rc = ptrace (PTRACE_CONT, tracee->pid, NULL, NULL);
1685 if (rc < 0)
1686 return;
1687 }
1688
1689 return;
1690
1691 report_syscall_error:
1692 reporting_error = true;
1693 goto common;
1694
1695 emulate_syscall:
1696 reporting_error = false;
1697 common:
1698
1699 /* Reporting an error or emulating a system call works by replacing
1700 the system call number with -1 or another nonexistent syscall,
1701 letting it continue, and then substituting errno for ENOSYS in the
1702 case of an error.
1703
1704 Make sure that the stack pointer is restored to its original
1705 position upon exit, or bad things can happen. */
1706
1707 /* First, save errno; system calls below will clobber it. */
1708 save_errno = errno;
1709
1710 regs.SYSCALL_NUM_REG = -1;
1711 regs.STACK_POINTER = sp;
1712
1713#ifdef __aarch64__
1714 if (aarch64_set_regs (tracee->pid, &regs, true))
1715 return;
1716#else /* !__aarch64__ */
1717
1718#ifdef __arm__
1719 /* On ARM systems, a special request is used to update the system
1720 call number as known to the kernel. In addition, the system call
1721 number must be valid, so use `tuxcall'. Hopefully, nobody will
1722 run this on a kernel with Tux. */
1723
1724 if (ptrace (PTRACE_SET_SYSCALL, tracee->pid, NULL, 222))
1725 return;
1726#endif /* __arm__ */
1727
1728 if (ptrace (PTRACE_SETREGS, tracee->pid, NULL, &regs))
1729 return;
1730#endif /* __aarch64__ */
1731
1732 /* Do this invalid system call. */
1733 if (ptrace (PTRACE_SYSCALL, tracee->pid, NULL, NULL))
1734 return;
1735
1736 again1:
1737 rc = waitpid (tracee->pid, &wstatus, __WALL);
1738 if (rc == -1 && errno == EINTR)
1739 goto again1;
1740
1741 /* Return if waitpid fails. */
1742
1743 if (rc == -1)
1744 return;
1745
1746 /* If the process received a signal, see if the signal is SIGSYS
1747 and/or from seccomp. If so, discard it. */
1748
1749 if (WIFSTOPPED (wstatus))
1750 {
1751 rc = check_signal (tracee, wstatus);
1752
1753 if (rc == -1)
1754 return;
1755 else if (rc)
1756 goto again1;
1757 }
1758
1759 if (!WIFSTOPPED (wstatus))
1760 /* The process has been killed in response to a signal. In this
1761 case, simply unlink the tracee and return. */
1762 remove_tracee (tracee);
1763 else if (reporting_error)
1764 {
1765#ifdef __mips__
1766 /* MIPS systems place errno in v0 and set a3 to 1. */
1767 regs.gregs[2] = save_errno;
1768 regs.gregs[7] = 1;
1769#else /* !__mips__ */
1770 regs.SYSCALL_RET_REG = -save_errno;
1771#endif /* __mips__ */
1772
1773 /* Report errno. */
1774#ifdef __aarch64__
1775 aarch64_set_regs (tracee->pid, &regs, false);
1776#else /* !__aarch64__ */
1777 ptrace (PTRACE_SETREGS, tracee->pid, NULL, &regs);
1778#endif /* __aarch64__ */
1779
1780 /* Resume the process till the next interception by its filter. */
1781 ptrace (PTRACE_CONT, tracee->pid, NULL, NULL);
1782 }
1783 else
1784 {
1785 /* No error is being reported. Return the result in the
1786 appropriate registers. */
1787
1788#ifdef __mips__
1789 /* MIPS systems place errno in v0 and set a3 to 1. */
1790 regs.gregs[2] = result;
1791 regs.gregs[7] = 0;
1792#else /* !__mips__ */
1793 regs.SYSCALL_RET_REG = result;
1794#endif /* __mips__ */
1795
1796 /* Report errno. */
1797#ifdef __aarch64__
1798 aarch64_set_regs (tracee->pid, &regs, false);
1799#else /* !__aarch64__ */
1800 ptrace (PTRACE_SETREGS, tracee->pid, NULL, &regs);
1801#endif /* __aarch64__ */
1802
1803 /* Resume the process till the next interception by its filter. */
1804 ptrace (PTRACE_CONT, tracee->pid, NULL, NULL);
1805 }
1806}
1807
1808#ifndef PTRACE_EVENT_SECCOMP
1809#define PTRACE_EVENT_SECCOMP 7
1810#endif /* !PTRACE_EVENT_SECCOMP */
1811
1812#ifndef PTRACE_O_TRACESECCOMP
1813#define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP)
1814#endif /* !PTRACE_O_TRACESECCOMP */
1815
1816#ifndef SIGSYS
1817#define SIGSYS 31
1818#endif /* !SIGSYS */
1819#endif /* HAVE_SECCOMP */
1820
1821
1822
1458/* Like `execve', but asks the parent to begin tracing this thread. 1823/* Like `execve', but asks the parent to begin tracing this thread.
1459 Fail if tracing is unsuccessful. */ 1824 Fail by returning a non-zero value if tracing is unsuccessful. */
1460 1825
1461int 1826int
1462tracing_execve (const char *file, char *const *argv, 1827tracing_execve (const char *file, char *const *argv,
@@ -1471,6 +1836,13 @@ tracing_execve (const char *file, char *const *argv,
1471 1836
1472 /* Notify the parent to enter signal-delivery-stop. */ 1837 /* Notify the parent to enter signal-delivery-stop. */
1473 raise (SIGSTOP); 1838 raise (SIGSTOP);
1839
1840#ifdef HAVE_SECCOMP
1841 /* Install the seccomp filter. */
1842 if (use_seccomp_p && establish_seccomp_filter ())
1843 return 1;
1844#endif /* HAVE_SECCOMP */
1845
1474 return execve (file, argv, envp); 1846 return execve (file, argv, envp);
1475} 1847}
1476 1848
@@ -1510,6 +1882,10 @@ after_fork (pid_t pid)
1510 flags |= PTRACE_O_TRACEFORK; 1882 flags |= PTRACE_O_TRACEFORK;
1511 flags |= PTRACE_O_TRACESYSGOOD; 1883 flags |= PTRACE_O_TRACESYSGOOD;
1512 flags |= PTRACE_O_TRACEEXIT; 1884 flags |= PTRACE_O_TRACEEXIT;
1885#ifdef HAVE_SECCOMP
1886 if (use_seccomp_p)
1887 flags |= PTRACE_O_TRACESECCOMP;
1888#endif /* HAVE_SECCOMP */
1513 1889
1514 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags); 1890 rc = ptrace (PTRACE_SETOPTIONS, pid, 0, flags);
1515 1891
@@ -1521,8 +1897,10 @@ after_fork (pid_t pid)
1521 return 1; 1897 return 1;
1522 } 1898 }
1523 1899
1524 /* Request that the child stop upon the next system call. */ 1900 /* Request that the child stop upon the next system call, or the next
1525 rc = ptrace (PTRACE_SYSCALL, pid, 0, 0); 1901 filter event. */
1902 rc = ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1903 pid, 0, 0);
1526 if (rc) 1904 if (rc)
1527 return 1; 1905 return 1;
1528 1906
@@ -1543,9 +1921,9 @@ after_fork (pid_t pid)
1543 tracee->next = tracing_processes; 1921 tracee->next = tracing_processes;
1544 tracee->waiting_for_syscall = false; 1922 tracee->waiting_for_syscall = false;
1545 tracee->new_child = false; 1923 tracee->new_child = false;
1546#ifndef REENTRANT
1547 tracee->exec_file = NULL; 1924 tracee->exec_file = NULL;
1548#endif /* REENTRANT */ 1925 tracee->exec_data = NULL;
1926 tracee->data_size = 0;
1549 tracing_processes = tracee; 1927 tracing_processes = tracee;
1550 return 0; 1928 return 0;
1551} 1929}
@@ -1604,9 +1982,11 @@ exec_waitpid (pid_t pid, int *wstatus, int options)
1604 { 1982 {
1605 if (siginfo.si_code < 0) 1983 if (siginfo.si_code < 0)
1606 /* SIGTRAP delivered from userspace. Pass it on. */ 1984 /* SIGTRAP delivered from userspace. Pass it on. */
1607 ptrace (PTRACE_SYSCALL, pid, 0, SIGTRAP); 1985 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1986 pid, 0, SIGTRAP);
1608 else 1987 else
1609 ptrace (PTRACE_SYSCALL, pid, 0, 0); 1988 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1989 pid, 0, 0);
1610 1990
1611 return -1; 1991 return -1;
1612 } 1992 }
@@ -1644,8 +2024,16 @@ exec_waitpid (pid_t pid, int *wstatus, int options)
1644 /* These events are handled by tracing SIGSTOP signals sent 2024 /* These events are handled by tracing SIGSTOP signals sent
1645 to unknown tracees. Make sure not to pass through 2025 to unknown tracees. Make sure not to pass through
1646 status, as there's no signal really being delivered. */ 2026 status, as there's no signal really being delivered. */
1647 ptrace (PTRACE_SYSCALL, pid, 0, 0); 2027 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL), pid, 0, 0);
2028 return -1;
2029
2030#ifdef HAVE_SECCOMP
2031 case SIGTRAP | (PTRACE_EVENT_SECCOMP << 8):
2032 /* Intercept and process this system call if the event was
2033 produced by our filter. */
2034 seccomp_system_call (tracee);
1648 return -1; 2035 return -1;
2036#endif /* HAVE_SECCOMP */
1649 2037
1650#ifdef SIGSYS 2038#ifdef SIGSYS
1651 case SIGSYS: 2039 case SIGSYS:
@@ -1657,17 +2045,20 @@ exec_waitpid (pid_t pid, int *wstatus, int options)
1657 it. */ 2045 it. */
1658#ifdef HAVE_SIGINFO_T_SI_SYSCALL 2046#ifdef HAVE_SIGINFO_T_SI_SYSCALL
1659#ifndef __arm__ 2047#ifndef __arm__
1660 ptrace (PTRACE_SYSCALL, pid, 0, ((siginfo.si_code == SYS_SECCOMP 2048 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1661 && siginfo.si_syscall == -1) 2049 pid, 0, ((siginfo.si_code == SYS_SECCOMP
1662 ? 0 : status)); 2050 && siginfo.si_syscall == -1)
2051 ? 0 : status));
1663#else /* __arm__ */ 2052#else /* __arm__ */
1664 ptrace (PTRACE_SYSCALL, pid, 0, ((siginfo.si_code == SYS_SECCOMP 2053 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL),
1665 && siginfo.si_syscall == 222) 2054 pid, 0, ((siginfo.si_code == SYS_SECCOMP
1666 ? 0 : status)); 2055 && siginfo.si_syscall == 222)
2056 ? 0 : status));
1667#endif /* !__arm__ */ 2057#endif /* !__arm__ */
1668#else /* !HAVE_SIGINFO_T_SI_SYSCALL */ 2058#else /* !HAVE_SIGINFO_T_SI_SYSCALL */
1669 /* Drop this signal, since what caused it is unknown. */ 2059 /* Drop this signal, since what caused it is unknown. */
1670 ptrace (PTRACE_SYSCALL, pid, 0, 0); 2060 ptrace ((use_seccomp_p ? PTRACE_CONT : PTRACE_SYSCALL), pid,
2061 0, 0);
1671#endif /* HAVE_SIGINFO_T_SI_SYSCALL */ 2062#endif /* HAVE_SIGINFO_T_SI_SYSCALL */
1672 return -1; 2063 return -1;
1673#endif /* SIGSYS */ 2064#endif /* SIGSYS */
@@ -1698,5 +2089,49 @@ exec_waitpid (pid_t pid, int *wstatus, int options)
1698void 2089void
1699exec_init (const char *loader) 2090exec_init (const char *loader)
1700{ 2091{
2092#ifdef HAVE_SECCOMP
2093 struct utsname u;
2094 int major, minor;
2095#endif /* HAVE_SECCOMP */
2096
1701 loader_name = loader; 2097 loader_name = loader;
2098#ifdef HAVE_SECCOMP
2099 errno = 0;
2100 prctl (PR_GET_SECCOMP);
2101
2102 /* PR_GET_SECCOMP should not set errno if the kernel was configured
2103 with support for seccomp. */
2104 if (!errno)
2105 use_seccomp_p = true;
2106 else
2107 return;
2108
2109 /* Establish whether the kernel is 4.7.x or older. */
2110 uname (&u);
2111 if ((sscanf (u.release, "%d.%d", &major, &minor) == 2))
2112 {
2113 /* Certain required ptrace features were introduced in kernel
2114 3.5. */
2115 if (major < 3 || (major == 3 && minor < 5))
2116 use_seccomp_p = false;
2117 else
2118 {
2119 if (major < 4 || (major == 4 && minor <= 7))
2120 {
2121#ifndef __ANDROID__
2122 kernel_4_7_or_earlier = true;
2123#else /* __ANDROID __ */
2124 /* Certain Android kernels have received backports of
2125 those new PTRACE_EVENT_SECCOMP semantics which were
2126 introduced in kernel version 4.8, so that it is
2127 necessary to actively establish which variant is in
2128 place. This being much too involved for code I cannot
2129 test, simply disable seccomp on kernel releases subject
2130 to these uncertainties. */
2131 use_seccomp_p = false;
2132#endif /* !__ANDROID__ */
2133 }
2134 }
2135 }
2136#endif /* HAVE_SECCOMP */
1702} 2137}