--- projects/pjob/pjob.c 2006/01/24 20:15:25 1.3 +++ projects/pjob/pjob.c 2006/01/30 21:46:59 1.6 @@ -25,6 +25,9 @@ #include #include #include +#include +#include +#include #define DEFNUMTHREAD 10 #define DEFTIMEOUT 60 @@ -35,6 +38,7 @@ struct _process_t{ gchar *jobname; GTimer *timer; GPid pid; + gint stat_loc; gchar *file_stdout; gchar *file_stderr; @@ -124,12 +128,12 @@ void process_child(gpointer data, gpointer user_data){ /* Exec the job */ if (infile == NULL){ - if( ! g_spawn_async_with_pipes(NULL, execargv, NULL, 0, NULL, NULL, &(proc->pid), NULL, &(outpipes[0]), &(outpipes[1]), &err)){ + if( ! g_spawn_async_with_pipes(NULL, execargv, NULL, G_SPAWN_DO_NOT_REAP_CHILD, NULL, NULL, &(proc->pid), NULL, &(outpipes[0]), &(outpipes[1]), &err)){ g_printerr("Failed to execute job %s: %s\n", proc->jobname, err->message); return; } }else{ - if( ! g_spawn_async_with_pipes(NULL, execargv, NULL, 0, NULL, NULL, &(proc->pid), &(inpipes[1]), &(outpipes[0]), &(outpipes[1]), &err)){ + if( ! g_spawn_async_with_pipes(NULL, execargv, NULL, G_SPAWN_DO_NOT_REAP_CHILD, NULL, NULL, &(proc->pid), &(inpipes[1]), &(outpipes[0]), &(outpipes[1]), &err)){ g_printerr("Failed to execute job %s: %s\n", proc->jobname, err->message); return; } @@ -165,7 +169,7 @@ void process_child(gpointer data, gpointer user_data){ poll(fds, fdssize, -1); /* For stdout and stderr see if there is any data, and read it */ for(x=0; x<2; x++){ - if((fds[x].revents & POLLIN) == 0){ + if((fds[x].revents & POLLIN) != 0){ /* We have data to read */ g_io_channel_read_line(sout[x], &readbuf, &rdatasize, NULL, NULL); if(rdatasize > 0){ @@ -208,8 +212,24 @@ void process_child(gpointer data, gpointer user_data){ if(((fds[0].revents & POLLHUP) != 0) && ((fds[1].revents & POLLHUP) != 0)) break; } + while((waitpid(proc->pid, &(proc->stat_loc), 0)) != proc->pid); + g_timer_stop(proc->timer); + /* If process exited cleanly */ + if (WIFEXITED(proc->stat_loc)){ + /* Get the exit code */ + if (verbose) g_fprintf(stderr, "Job '%s' exited with code %d. Exec time: %.2f\n", proc->jobname, WEXITSTATUS(proc->stat_loc), g_timer_elapsed(proc->timer, 0)); + }else{ + /* Otherwise - find out what it died with */ + /* TODO - this doesn't work quite right.. Mainly because its looking at the shell process, so the + * child of the /bin/sh which does get a signal, this isn't passed up. Although, it handly tells + * us if the /bin/sh gets a SEGV etc ;) + */ + g_fprintf(stderr, "Job %s exited with signal %d. Exec time: %.2f\n", proc->jobname, (WTERMSIG(proc->stat_loc)), g_timer_elapsed(proc->timer, 0)); + } + + g_io_channel_shutdown(sout[0], TRUE, NULL); g_io_channel_shutdown(sout[1], TRUE, NULL); @@ -223,7 +243,6 @@ void process_child(gpointer data, gpointer user_data){ g_spawn_close_pid(proc->pid); - if (verbose) g_fprintf(stderr, "Ending job '%s'\n", proc->jobname); } @@ -313,6 +332,8 @@ int main(int argc, char **argv){ GError *pp_err = NULL, *err = NULL; gint x; + struct rlimit rlp; + GOptionContext *optcontext; optcontext = g_option_context_new(" - parallel job executer"); @@ -342,6 +363,14 @@ int main(int argc, char **argv){ }else{ g_printerr("Threading not supported\n"); } + + /* Up the number of FD's to the "hard" limit. + * This is mainly to get around the very small default + * solaris has, or 256 + */ + getrlimit(RLIMIT_NOFILE, &rlp); + rlp.rlim_cur = rlp.rlim_max; + setrlimit(RLIMIT_NOFILE, &rlp); if(verbose) g_printerr("Creating a threadpool %d in size\n", numthreads); procpool = g_thread_pool_new(process_child, NULL, numthreads, FALSE, &pp_err);