From ee7118a8164ca250abd2cc33aa4dbc8a45594682 Mon Sep 17 00:00:00 2001 From: David Dykstra Date: Thu, 9 Jan 2003 19:04:06 +0000 Subject: [PATCH] Fixed bug that caused rsync to lose exit status of its child processes. Based on patch submited by David R. Staples. Todd Vander Does contributed the following test which showed the problem: > mkdir /tmp/nowrite > chmod -w /tmp/nowrite > rsync /etc/group /tmp/nowrite || echo $status mkstemp .group.cUaaeY failed rsync error: partial transfer (code 23) at main.c(518) 23 > rsync -e ssh loki:/etc/group /tmp/nowrite || echo $status mkstemp .group.1rayeY failed > rsync -e ssh loki:/etc/group /tmp/nowrite && echo $status mkstemp .group.fbaGiY failed 0 The remote copy should have returned non-zero exit code like the local copy. --- NEWS | 7 +++++++ main.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 39643896..98788e0e 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,9 @@ rsync changes since last release * Don't report an error if an excluded file disappears during an rsync run. (Eugene Chupriyanov and Bo Kersey) + * Added .svn to --cvs-exclude list to support subversion. (Jon + Middleton) + BUG FIXES: * Fix "forward name lookup failed" errors on AIX 4.3.3. (John @@ -42,6 +45,10 @@ rsync changes since last release contains a duplicate name (if it sorts to the end of the file list) and using --delete. (Wayne Davison) + * Fixed a bug that caused rsync to lose the exit status of its child + processes and sometimes return an exit code of 0 instead of showing + an error. (David R. Staples, Dave Dykstra) + INTERNAL: * Many code cleanups and improved internal documentation. (Martin diff --git a/main.c b/main.c index 14392e0d..3c057199 100644 --- a/main.c +++ b/main.c @@ -26,6 +26,16 @@ time_t starttime = 0; extern struct stats stats; extern int verbose; +/* there's probably never more than at most 2 outstanding child processes, + * but set it higher just in case. + */ +#define MAXCHILDPROCS 5 + +struct pid_status { + pid_t pid; + int status; +} pid_stat_table[MAXCHILDPROCS]; + static void show_malloc_stats(void); /**************************************************************************** @@ -33,11 +43,27 @@ wait for a process to exit, calling io_flush while waiting ****************************************************************************/ void wait_process(pid_t pid, int *status) { - while (waitpid(pid, status, WNOHANG) == 0) { + pid_t waited_pid; + int cnt; + + while ((waited_pid = waitpid(pid, status, WNOHANG)) == 0) { msleep(20); io_flush(); } + if ((waited_pid == -1) && (errno == ECHILD)) { + /* status of requested child no longer available. + * check to see if it was processed by the sigchld_handler. + */ + for (cnt = 0; cnt < MAXCHILDPROCS; cnt++) { + if (pid == pid_stat_table[cnt].pid) { + *status = pid_stat_table[cnt].status; + pid_stat_table[cnt].pid = 0; + break; + } + } + } + /* TODO: If the child exited on a signal, then log an * appropriate error message. Perhaps we should also accept a * message describing the purpose of the child. Also indicate @@ -848,7 +874,24 @@ static RETSIGTYPE sigusr2_handler(int UNUSED(val)) { static RETSIGTYPE sigchld_handler(int UNUSED(val)) { #ifdef WNOHANG - while (waitpid(-1, NULL, WNOHANG) > 0) ; + int cnt, status; + pid_t pid; + /* An empty waitpid() loop was put here by Tridge and we could never + * get him to explain why he put it in, so rather than taking it + * out we're instead saving the child exit statuses for later use. + * The waitpid() loop presumably eliminates all possibility of leaving + * zombie children, maybe that's why he did it. + */ + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { + /* save the child's exit status */ + for (cnt = 0; cnt < MAXCHILDPROCS; cnt++) { + if (pid_stat_table[cnt].pid == 0) { + pid_stat_table[cnt].pid = pid; + pid_stat_table[cnt].status = status; + break; + } + } + } #endif } -- 2.34.1