Source: James Olin Oden Type: Defect Fix Disposition: Description: Very small race condition that could deadlock in rpmsq. From the email announcing this: Before the fork I create and lock a mutex() Then we fork() In the signal handler I unlock the mutex In rpmsqWaitUnregister() I try to aquire the lock Index: rpm-4.3.3/rpmio/rpmsq.c =================================================================== --- rpm-4.3.3.orig/rpmio/rpmsq.c +++ rpm-4.3.3/rpmio/rpmsq.c @@ -208,7 +208,6 @@ sq->id = ME(); ret = pthread_mutex_init(&sq->mutex, NULL); - ret = pthread_cond_init(&sq->cond, NULL); insque(elem, (prev != NULL ? prev : rpmsqQueue)); ret = sigrelse(SIGCHLD); } @@ -230,8 +229,11 @@ ret = sighold (SIGCHLD); if (ret == 0) { remque(elem); - ret = pthread_cond_destroy(&sq->cond); - ret = pthread_mutex_destroy(&sq->mutex); + + /* Unlock the mutex and then destroy it */ + if((ret = pthread_mutex_unlock(&sq->mutex)) == 0) + ret = pthread_mutex_destroy(&sq->mutex); + sq->id = NULL; /*@-bounds@*/ if (sq->pipes[1]) ret = close(sq->pipes[1]); @@ -305,11 +307,20 @@ sq != NULL && sq != rpmsqQueue; sq = sq->q_forw) { + int ret; + if (sq->child != reaped) /*@innercontinue@*/ continue; sq->reaped = reaped; sq->status = status; - (void) pthread_cond_signal(&sq->cond); + + /* Unlock the mutex. The waiter will then be able to + * aquire the lock. + * + * XXX: jbj, wtd, if this fails? + */ + ret = pthread_mutex_unlock(&sq->mutex); + /*@innerbreak@*/ break; } } @@ -381,6 +392,7 @@ { pid_t pid; int xx; + int nothreads = 0; /* XXX: Shouldn't this be a global? */ if (sq->reaper) { xx = rpmsqInsert(sq, NULL); @@ -395,6 +407,24 @@ xx = sighold(SIGCHLD); + /* + * Initialize the cond var mutex. We have to aquire the lock we + * use for the condition before we fork. Otherwise it is possible for + * the child to exit, we get sigchild and the sig handler to send + * the condition signal before we are waiting on the condition. + */ + if (!nothreads) { + if(pthread_mutex_lock(&sq->mutex)) { + /* Yack we did not get the lock, lets just give up */ +/*@-bounds@*/ + xx = close(sq->pipes[0]); + xx = close(sq->pipes[1]); + sq->pipes[0] = sq->pipes[1] = -1; +/*@=bounds@*/ + goto out; + } + } + pid = fork(); if (pid < (pid_t) 0) { /* fork failed. */ /*@-bounds@*/ @@ -452,10 +482,6 @@ /* Protect sq->reaped from handler changes. */ ret = sighold(SIGCHLD); - /* Initialize the cond var mutex. */ - if (!nothreads) - ret = pthread_mutex_lock(&sq->mutex); - /* Start the child, linux often runs child before parent. */ /*@-bounds@*/ if (sq->pipes[0] >= 0) @@ -476,7 +502,13 @@ ret = sigpause(SIGCHLD); else { xx = sigrelse(SIGCHLD); - ret = pthread_cond_wait(&sq->cond, &sq->mutex); + + /* + * We start before the fork with this mutex locked; + * The only one that unlocks this the signal handler. + * So if we get the lock the child has been reaped. + */ + ret = pthread_mutex_lock(&sq->mutex); xx = sighold(SIGCHLD); } } @@ -485,9 +517,6 @@ /* Accumulate stopwatch time spent waiting, potential performance gain. */ sq->ms_scriptlets += rpmswExit(&sq->op, -1)/1000; - /* Tear down cond var mutex, our child has been reaped. */ - if (!nothreads) - xx = pthread_mutex_unlock(&sq->mutex); xx = sigrelse(SIGCHLD); #ifdef _RPMSQ_DEBUG