From bb5f74ee84398a56435baa2ef15e12d8a35e5e03 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 3 Dec 2012 13:42:12 -0800 Subject: Don't let call-process be a zombie factory. Fixing this bug required some cleanup of the signal-handling code. As a side effect, this change also fixes a longstanding rare race condition whereby Emacs could mistakenly kill unrelated processes, and it fixes a bug where a second C-g does not kill a recalcitrant synchronous process in GNU/Linux and similar platforms. The patch should also fix the last vestiges of Bug#9488, a bug which has mostly been fixed on the trunk by other changes. * callproc.c, process.h (synch_process_alive, synch_process_death) (synch_process_termsig, sync_process_retcode): Remove. All uses removed, to simplify analysis and so that less consing is done inside critical sections. * callproc.c (call_process_exited): Remove. All uses replaced with !synch_process_pid. * callproc.c (synch_process_pid, synch_process_fd): New static vars. These take the role of what used to be in unwind-protect arg. All uses changed. (block_child_signal, unblock_child_signal): New functions, to avoid races that could kill innocent-victim processes. (call_process_kill, call_process_cleanup, Fcall_process): Use them. (call_process_kill): Record killed processes as deleted, so that zombies do not clutter up the system. Do this inside a critical section, to avoid a race that would allow the clutter. (call_process_cleanup): Fix code so that the second C-g works again on common platforms such as GNU/Linux. (Fcall_process): Create the child process in a critical section, to fix a race condition. If creating an asynchronous process, record it as deleted so that zombies do not clutter up the system. Do unwind-protect for WINDOWSNT too, as that's simpler in the light of these changes. Omit unnecessary call to emacs_close before failure, as the unwind-protect code does that. * callproc.c (call_process_cleanup): * w32proc.c (waitpid): Simplify now that synch_process_alive is gone. * process.c (record_deleted_pid): New function, containing code refactored out of Fdelete_process. (Fdelete_process): Use it. (process_status_retrieved): Remove. All callers changed to use child_status_change. (record_child_status_change): Remove, folding its contents into ... (handle_child_signal): ... this signal handler. Now, this function is purely a handler for SIGCHLD, and is not called after a synchronous waitpid returns; the synchronous code is moved to wait_for_termination. There is no need to worry about reaping more than one child now. * sysdep.c (get_child_status, child_status_changed): New functions. (wait_for_termination): Now takes int * status and bool interruptible arguments, too. Do not record child status change; that's now the caller's responsibility. All callers changed. Reimplement in terms of get_child_status. (wait_for_termination_1, interruptible_wait_for_termination): Remove. All callers changed to use wait_for_termination. * syswait.h: Include , for bool. (record_child_status_change, interruptible_wait_for_termination): Remove decls. (record_deleted_pid, child_status_changed): New decls. (wait_for_termination): Adjust to API changes noted above. Fixes: debbugs:12980 --- src/process.c | 136 +++++++++++++++++++++++----------------------------------- 1 file changed, 53 insertions(+), 83 deletions(-) (limited to 'src/process.c') diff --git a/src/process.c b/src/process.c index c6139c9f929..27009882c99 100644 --- a/src/process.c +++ b/src/process.c @@ -777,10 +777,23 @@ get_process (register Lisp_Object name) /* Fdelete_process promises to immediately forget about the process, but in reality, Emacs needs to remember those processes until they have been treated by the SIGCHLD handler and waitpid has been invoked on them; - otherwise they might fill up the kernel's process table. */ + otherwise they might fill up the kernel's process table. + + Some processes created by call-process are also put onto this list. */ static Lisp_Object deleted_pid_list; #endif +void +record_deleted_pid (pid_t pid) +{ +#ifdef SIGCHLD + deleted_pid_list = Fcons (make_fixnum_or_float (pid), + /* GC treated elements set to nil. */ + Fdelq (Qnil, deleted_pid_list)); + +#endif +} + DEFUN ("delete-process", Fdelete_process, Sdelete_process, 1, 1, 0, doc: /* Delete PROCESS: kill it and forget about it immediately. PROCESS may be a process, a buffer, the name of a process or buffer, or @@ -807,9 +820,7 @@ nil, indicating the current buffer's process. */) pid_t pid = p->pid; /* No problem storing the pid here, as it is still in Vprocess_alist. */ - deleted_pid_list = Fcons (make_fixnum_or_float (pid), - /* GC treated elements set to nil. */ - Fdelq (Qnil, deleted_pid_list)); + record_deleted_pid (pid); /* If the process has already signaled, remove it from the list. */ if (p->raw_status_new) update_status (p); @@ -6147,35 +6158,37 @@ process has been transmitted to the serial port. */) return process; } -/* If the status of the process DESIRED has changed, return true and - set *STATUS to its exit status; otherwise, return false. - If HAVE is nonnegative, assume that HAVE = waitpid (HAVE, STATUS, ...) - has already been invoked, and do not invoke waitpid again. */ +#ifdef SIGCHLD -static bool -process_status_retrieved (pid_t desired, pid_t have, int *status) -{ - if (have < 0) - { - /* Invoke waitpid only with a known process ID; do not invoke - waitpid with a nonpositive argument. Otherwise, Emacs might - reap an unwanted process by mistake. For example, invoking - waitpid (-1, ...) can mess up glib by reaping glib's subprocesses, - so that another thread running glib won't find them. */ - do - have = waitpid (desired, status, WNOHANG | WUNTRACED); - while (have < 0 && errno == EINTR); - } +/* The main Emacs thread records child processes in three places: - return have == desired; -} + - Vprocess_alist, for asynchronous subprocesses, which are child + processes visible to Lisp. + + - deleted_pid_list, for child processes invisible to Lisp, + typically because of delete-process. These are recorded so that + the processes can be reaped when they exit, so that the operating + system's process table is not cluttered by zombies. -/* If PID is nonnegative, the child process PID with wait status W has - changed its status; record this and return true. + - the local variable PID in Fcall_process, call_process_cleanup and + call_process_kill, for synchronous subprocesses. + record_unwind_protect is used to make sure this process is not + forgotten: if the user interrupts call-process and the child + process refuses to exit immediately even with two C-g's, + call_process_kill adds PID's contents to deleted_pid_list before + returning. - If PID is negative, ignore W, and look for known child processes - of Emacs whose status have changed. For each one found, record its new - status. + The main Emacs thread invokes waitpid only on child processes that + it creates and that have not been reaped. This avoid races on + platforms such as GTK, where other threads create their own + subprocesses which the main thread should not reap. For example, + if the main thread attempted to reap an already-reaped child, it + might inadvertently reap a GTK-created process that happened to + have the same process ID. */ + +/* Handle a SIGCHLD signal by looking for known child processes of + Emacs whose status have changed. For each one found, record its + new status. All we do is change the status; we do not run sentinels or print notifications. That is saved for the next time keyboard input is @@ -6198,20 +6211,15 @@ process_status_retrieved (pid_t desired, pid_t have, int *status) ** Malloc WARNING: This should never call malloc either directly or indirectly; if it does, that is a bug */ -void -record_child_status_change (pid_t pid, int w) +static void +handle_child_signal (int sig) { -#ifdef SIGCHLD - - /* Record at most one child only if we already know one child that - has exited. */ - bool record_at_most_one_child = 0 <= pid; - Lisp_Object tail; /* Find the process that signaled us, and record its status. */ - /* The process can have been deleted by Fdelete_process. */ + /* The process can have been deleted by Fdelete_process, or have + been started asynchronously by Fcall_process. */ for (tail = deleted_pid_list; CONSP (tail); tail = XCDR (tail)) { bool all_pids_are_fixnums @@ -6225,12 +6233,8 @@ record_child_status_change (pid_t pid, int w) deleted_pid = XINT (xpid); else deleted_pid = XFLOAT_DATA (xpid); - if (process_status_retrieved (deleted_pid, pid, &w)) - { - XSETCAR (tail, Qnil); - if (record_at_most_one_child) - return; - } + if (child_status_changed (deleted_pid, 0, 0)) + XSETCAR (tail, Qnil); } } @@ -6239,15 +6243,17 @@ record_child_status_change (pid_t pid, int w) { Lisp_Object proc = XCDR (XCAR (tail)); struct Lisp_Process *p = XPROCESS (proc); - if (p->alive && process_status_retrieved (p->pid, pid, &w)) + int status; + + if (p->alive && child_status_changed (p->pid, &status, WUNTRACED)) { /* Change the status of the process that was found. */ p->tick = ++process_tick; - p->raw_status = w; + p->raw_status = status; p->raw_status_new = 1; /* If process has terminated, stop waiting for its output. */ - if (WIFSIGNALED (w) || WIFEXITED (w)) + if (WIFSIGNALED (status) || WIFEXITED (status)) { int clear_desc_flag = 0; p->alive = 0; @@ -6261,44 +6267,8 @@ record_child_status_change (pid_t pid, int w) FD_CLR (p->infd, &non_keyboard_wait_mask); } } - - /* Tell wait_reading_process_output that it needs to wake up and - look around. */ - if (input_available_clear_time) - *input_available_clear_time = make_emacs_time (0, 0); - - if (record_at_most_one_child) - return; } } - - if (0 <= pid) - { - /* The caller successfully waited for a pid but no asynchronous - process was found for it, so this is a synchronous process. */ - - synch_process_alive = 0; - - /* Report the status of the synchronous process. */ - if (WIFEXITED (w)) - synch_process_retcode = WEXITSTATUS (w); - else if (WIFSIGNALED (w)) - synch_process_termsig = WTERMSIG (w); - - /* Tell wait_reading_process_output that it needs to wake up and - look around. */ - if (input_available_clear_time) - *input_available_clear_time = make_emacs_time (0, 0); - } -#endif -} - -#ifdef SIGCHLD - -static void -handle_child_signal (int sig) -{ - record_child_status_change (-1, 0); } static void -- cgit v1.2.1