From 6c18490d12ad6837636b54afe97641513042f95d Mon Sep 17 00:00:00 2001 From: Thomas White Date: Thu, 15 Nov 2018 10:33:47 +0100 Subject: indexamajig: Name and shame slow processing stages --- src/im-sandbox.c | 16 +++++++++++++++- src/im-sandbox.h | 7 +++++++ src/process_image.c | 20 ++++++++++++++++++-- src/process_image.h | 3 ++- 4 files changed, 42 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/im-sandbox.c b/src/im-sandbox.c index 80ac4166..ad2bc5fc 100644 --- a/src/im-sandbox.c +++ b/src/im-sandbox.c @@ -153,6 +153,8 @@ static void check_hung_workers(struct sandbox *sb) "frame for more than 10 minutes.\n", i); STATUS("Event ID is: %s\n", sb->shared->last_ev[i]); + STATUS("Task ID is: %s\n", + sb->shared->last_task[i]); sb->shared->warned_long_running[i] = 1; } } @@ -321,6 +323,14 @@ static void shuffle_events(struct sb_shm *sb_shared) } +void set_last_task(char *lt, const char *task) +{ + if ( lt == NULL ) return; + assert(strlen(task) < MAX_TASK_LEN-1); + strcpy(lt, task); +} + + static void run_work(const struct index_args *iargs, Stream *st, int cookie, const char *tmpdir, struct sandbox *sb) { @@ -338,12 +348,14 @@ static void run_work(const struct index_args *iargs, Stream *st, /* Wait until an event is ready */ time_accounts_set(taccs, TACC_EVENTWAIT); + set_last_task(sb->shared->last_task[cookie], "wait_event"); if ( sem_wait(sb->queue_sem) != 0 ) { ERROR("Failed to wait on queue semaphore: %s\n", strerror(errno)); } /* Get the event from the queue */ + set_last_task(sb->shared->last_task[cookie], "read_queue"); pthread_mutex_lock(&sb->shared->queue_lock); if ( sb->shared->no_more ) { pthread_mutex_unlock(&sb->shared->queue_lock); @@ -390,7 +402,7 @@ static void run_work(const struct index_args *iargs, Stream *st, sb->shared->time_last_start[cookie] = get_monotonic_seconds(); process_image(iargs, &pargs, st, cookie, tmpdir, ser, - sb->shared, taccs); + sb->shared, taccs, sb->shared->last_task[cookie]); free_filename_plus_event(pargs.filename_p_e); @@ -705,6 +717,8 @@ static void handle_zombie(struct sandbox *sb, int respawn) i, WTERMSIG(status)); STATUS("Event ID was: %s\n", sb->shared->last_ev[i]); + STATUS("Task ID was: %s\n", + sb->shared->last_task[i]); if ( respawn ) start_worker_process(sb, i); } diff --git a/src/im-sandbox.h b/src/im-sandbox.h index 1412d199..e5f3422b 100644 --- a/src/im-sandbox.h +++ b/src/im-sandbox.h @@ -49,6 +49,10 @@ struct sb_shm; /* Maximum length of an event ID including serial number */ #define MAX_EV_LEN (1024) +/* Maximum length of a task ID, e.g. indexing:xgandalf. + * NB If changing this, also update the value in index.c */ +#define MAX_TASK_LEN (32) + /* Maximum number of workers */ #define MAX_NUM_WORKERS (1024) @@ -61,6 +65,7 @@ struct sb_shm char queue[QUEUE_SIZE][MAX_EV_LEN]; int no_more; char last_ev[MAX_NUM_WORKERS][MAX_EV_LEN]; + char last_task[MAX_NUM_WORKERS][MAX_TASK_LEN]; int pings[MAX_NUM_WORKERS]; time_t time_last_start[MAX_NUM_WORKERS]; int warned_long_running[MAX_NUM_WORKERS]; @@ -73,6 +78,8 @@ struct sb_shm extern char *create_tempdir(const char *temp_location); +extern void set_last_task(char *lt, const char *task); + extern void create_sandbox(struct index_args *iargs, int n_proc, char *prefix, int config_basename, FILE *fh, Stream *stream, const char *tempdir, int serial_start); diff --git a/src/process_image.c b/src/process_image.c index 10863fd3..2fbf0e4a 100644 --- a/src/process_image.c +++ b/src/process_image.c @@ -99,7 +99,8 @@ static void restore_image_data(float **dp, struct detector *det, float **bu) void process_image(const struct index_args *iargs, struct pattern_args *pargs, Stream *st, int cookie, const char *tmpdir, - int serial, struct sb_shm *sb_shared, TimeAccounts *taccs) + int serial, struct sb_shm *sb_shared, TimeAccounts *taccs, + char *last_task) { int check; struct imagefile *imfile; @@ -124,6 +125,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, image.indexed_by = INDEXING_NONE; time_accounts_set(taccs, TACC_HDF5OPEN); + set_last_task(last_task, "open file"); sb_shared->pings[cookie]++; imfile = imagefile_open(image.filename); if ( imfile == NULL ) { @@ -132,6 +134,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, } time_accounts_set(taccs, TACC_HDF5READ); + set_last_task(last_task, "read file"); sb_shared->pings[cookie]++; check = imagefile_read(imfile, &image, image.event); if ( check ) { @@ -140,6 +143,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Take snapshot of image before applying horrible noise filters */ time_accounts_set(taccs, TACC_FILTER); + set_last_task(last_task, "image filter"); sb_shared->pings[cookie]++; prefilter = backup_image_data(image.dp, image.det); @@ -152,6 +156,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, } time_accounts_set(taccs, TACC_RESRANGE); + set_last_task(last_task, "resolution range"); sb_shared->pings[cookie]++; mark_resolution_range_as_bad(&image, iargs->highres, +INFINITY); @@ -162,6 +167,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, struct hdfile *hdfile; case PEAK_HDF5: + set_last_task(last_task, "peaksearch:hdf5"); hdfile = imagefile_get_hdfile(imfile); if ( (hdfile == NULL) || (get_peaks_2(&image, hdfile, iargs->hdf5_peak_path, @@ -178,6 +184,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_CXI: + set_last_task(last_task, "peaksearch:cxi"); hdfile = imagefile_get_hdfile(imfile); if ( (hdfile == NULL) || (get_peaks_cxi_2(&image, hdfile, iargs->hdf5_peak_path, @@ -195,6 +202,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_ZAEF: + set_last_task(last_task, "peaksearch:zaef"); search_peaks(&image, iargs->threshold, iargs->min_gradient, iargs->min_snr, iargs->pk_inn, iargs->pk_mid, iargs->pk_out, @@ -202,6 +210,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_PEAKFINDER8: + set_last_task(last_task, "peaksearch:pf8"); if ( search_peaks_peakfinder8(&image, 2048, iargs->threshold, iargs->min_snr, @@ -224,6 +233,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_PEAKFINDER9: + set_last_task(last_task, "peaksearch:pf9"); if ( search_peaks_peakfinder9(&image, iargs->min_snr_biggest_pix, iargs->min_snr_peak_pix, @@ -297,7 +307,9 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Index the pattern */ time_accounts_set(taccs, TACC_INDEXING); - index_pattern_2(&image, iargs->ipriv, &sb_shared->pings[cookie]); + set_last_task(last_task, "indexing"); + index_pattern_3(&image, iargs->ipriv, &sb_shared->pings[cookie], + last_task); r = chdir(rn); if ( r ) { @@ -309,6 +321,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Set beam/crystal parameters */ time_accounts_set(taccs, TACC_PREDPARAMS); + set_last_task(last_task, "prediction params"); if ( iargs->fix_profile_r >= 0.0 ) { for ( i=0; ipings[cookie]++; integrate_all_5(&image, iargs->int_meth, PMODEL_XSPHERE, iargs->push_res, @@ -342,6 +356,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, streamwrite: time_accounts_set(taccs, TACC_WRITESTREAM); + set_last_task(last_task, "stream write"); sb_shared->pings[cookie]++; ret = write_chunk(st, &image, imfile, iargs->stream_peaks, iargs->stream_refls, @@ -363,6 +378,7 @@ streamwrite: out: /* Count crystals which are still good */ time_accounts_set(taccs, TACC_TOTALS); + set_last_task(last_task, "process_image finalisation"); sb_shared->pings[cookie]++; pthread_mutex_lock(&sb_shared->totals_lock); any_crystals = 0; diff --git a/src/process_image.h b/src/process_image.h index d4dcbf04..d4b1ce59 100644 --- a/src/process_image.h +++ b/src/process_image.h @@ -127,7 +127,8 @@ struct pattern_args extern void process_image(const struct index_args *iargs, struct pattern_args *pargs, Stream *st, int cookie, const char *tmpdir, int serial, - struct sb_shm *sb_shared, TimeAccounts *taccs); + struct sb_shm *sb_shared, TimeAccounts *taccs, + char *last_task); #endif /* PROCESS_IMAGE_H */ -- cgit v1.2.3