From 3fb81ddf7059599dba9b8b223156801af6885fc1 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Fri, 29 Apr 2022 16:53:49 +0200 Subject: Add improved profiling Improvements: - Recursive, so we can see how things are working at different levels - Everything is profiled, even if not explicitly labelled - No possibility of losing time - Detects mistakes if routines aren't wrapped correctly --- libcrystfel/CMakeLists.txt | 2 + libcrystfel/meson.build | 1 + libcrystfel/src/image-hdf5.c | 11 +++ libcrystfel/src/image.c | 39 ++++++-- libcrystfel/src/index.c | 39 +++++++- libcrystfel/src/profile.c | 220 +++++++++++++++++++++++++++++++++++++++++++ libcrystfel/src/profile.h | 42 +++++++++ src/im-sandbox.c | 12 ++- src/process_image.c | 25 +++++ 9 files changed, 380 insertions(+), 11 deletions(-) create mode 100644 libcrystfel/src/profile.c create mode 100644 libcrystfel/src/profile.h diff --git a/libcrystfel/CMakeLists.txt b/libcrystfel/CMakeLists.txt index 69668a8c..3ca50b1f 100644 --- a/libcrystfel/CMakeLists.txt +++ b/libcrystfel/CMakeLists.txt @@ -62,6 +62,7 @@ set(LIBCRYSTFEL_SOURCES src/image-hdf5.c src/fom.c src/image-msgpack.c + src/profile.c ${BISON_symopp_OUTPUTS} ${FLEX_symopl_OUTPUTS} src/indexers/dirax.c @@ -99,6 +100,7 @@ set(LIBCRYSTFEL_HEADERS src/colscale.h src/detgeom.h src/fom.h + src/profile.h ) add_library(${PROJECT_NAME} SHARED diff --git a/libcrystfel/meson.build b/libcrystfel/meson.build index 5b10ce14..d333d554 100644 --- a/libcrystfel/meson.build +++ b/libcrystfel/meson.build @@ -121,6 +121,7 @@ libcrystfel_sources = ['src/image.c', 'src/colscale.c', 'src/detgeom.c', 'src/fom.c', + 'src/profile.c', 'src/image-cbf.c', 'src/image-hdf5.c', 'src/image-msgpack.c', diff --git a/libcrystfel/src/image-hdf5.c b/libcrystfel/src/image-hdf5.c index fd16ac08..529420fb 100644 --- a/libcrystfel/src/image-hdf5.c +++ b/libcrystfel/src/image-hdf5.c @@ -42,6 +42,7 @@ #include "image.h" #include "utils.h" #include "detgeom.h" +#include "profile.h" #include "datatemplate.h" #include "datatemplate_priv.h" @@ -395,13 +396,16 @@ static int load_hdf5_hyperslab(struct panel_template *p, return 1; } + profile_start("H5Dopen2"); dh = H5Dopen2(fh, panel_full_path, H5P_DEFAULT); if ( dh < 0 ) { ERROR("Cannot open data for panel %s (%s)\n", p->name, panel_full_path); + profile_end("H5Dopen2"); free(panel_full_path); return 1; } + profile_end("H5Dopen2"); free(panel_full_path); @@ -509,7 +513,9 @@ static int load_hdf5_hyperslab(struct panel_template *p, return 1; } + profile_start("H5Dread"); r = H5Dread(dh, el_type, memspace, dataspace, H5P_DEFAULT, data); + profile_end("H5Dread"); if ( r < 0 ) { ERROR("Couldn't read data for panel %s\n", p->name); @@ -592,13 +598,16 @@ int image_hdf5_read(struct image *image, /* Set all pointers to NULL for easier clean-up */ for ( i=0; in_panels; i++ ) image->dp[i] = NULL; + profile_start("open-hdf5"); fh = open_hdf5(image); + profile_end("open-hdf5"); if ( fh < 0 ) { ERROR("Failed to open file\n"); return 1; } for ( i=0; in_panels; i++ ) { + profile_start("load-hdf5-hyperslab"); if ( load_hdf5_hyperslab(&dtempl->panels[i], fh, image->ev, (void *)&image->dp[i], H5T_NATIVE_FLOAT, @@ -606,9 +615,11 @@ int image_hdf5_read(struct image *image, dtempl->panels[i].data) ) { ERROR("Failed to load panel data\n"); + profile_end("load-hdf5-hyperslab"); close_hdf5(fh); return 1; } + profile_end("load-hdf5-hyperslab"); } close_hdf5(fh); diff --git a/libcrystfel/src/image.c b/libcrystfel/src/image.c index 7ad8d428..2209ffee 100644 --- a/libcrystfel/src/image.c +++ b/libcrystfel/src/image.c @@ -42,6 +42,7 @@ #include "image-hdf5.h" #include "image-cbf.h" #include "image-msgpack.h" +#include "profile.h" #include "datatemplate.h" #include "datatemplate_priv.h" @@ -1289,28 +1290,54 @@ static int do_image_read(struct image *image, const DataTemplate *dtempl, int no_image_data, int no_mask_data) { int i; + int r; /* Load the image data */ if ( !no_image_data ) { - if ( image_read_image_data(image, dtempl) ) return 1; + int r; + profile_start("load-image-data"); + r = image_read_image_data(image, dtempl); + profile_end("load-image-data"); + if ( r ) return r; } else { - if ( image_set_zero_data(image, dtempl) ) return 1; + int r; + profile_start("set-zero-image-data"); + r = image_set_zero_data(image, dtempl); + profile_end("set-zero-image-data"); + if ( r ) return 1; } - if ( set_image_parameters(image, dtempl) ) { + profile_start("set-image-parameters"); + r = set_image_parameters(image, dtempl); + profile_end("set-image-parameters"); + if ( r ) { ERROR("Failed to read image parameters\n"); return 1; } - if ( create_detgeom(image, dtempl) ) { + + profile_start("create-detgeom"); + r = create_detgeom(image, dtempl); + profile_end("create-detgeom"); + if ( r ) { ERROR("Failed to read geometry information\n"); return 1; } - if ( create_badmap(image, dtempl, no_mask_data) ) return 1; - if ( create_satmap(image, dtempl) ) return 1; + profile_start("create-badmap"); + r = create_badmap(image, dtempl, no_mask_data); + profile_end("create-badmap"); + if ( r ) return 1; + + profile_start("create-satmap"); + r = create_satmap(image, dtempl); + profile_end("create-satmap"); + if ( r ) return 1; + + profile_start("read-headers-to-cache"); for ( i=0; in_headers_to_copy; i++ ) { read_header_to_cache(image, dtempl->headers_to_copy[i]); } + profile_end("read-headers-to-cache"); return 0; } diff --git a/libcrystfel/src/index.c b/libcrystfel/src/index.c index e7b8b8cf..da560dab 100644 --- a/libcrystfel/src/index.c +++ b/libcrystfel/src/index.c @@ -49,6 +49,7 @@ #include "geometry.h" #include "cell-utils.h" #include "predict-refine.h" +#include "profile.h" #include "indexers/dirax.h" #include "indexers/asdf.h" #include "indexers/mosflm.h" @@ -604,47 +605,65 @@ static int try_indexer(struct image *image, IndexingMethod indm, case INDEXING_DIRAX : set_last_task(last_task, "indexing:dirax"); + profile_start("dirax"); r = run_dirax(image, mpriv); + profile_end("dirax"); break; case INDEXING_ASDF : set_last_task(last_task, "indexing:asdf"); + profile_start("asdf"); r = run_asdf(image, mpriv); + profile_end("asdf"); break; case INDEXING_MOSFLM : set_last_task(last_task, "indexing:mosflm"); + profile_start("mosflm"); r = run_mosflm(image, mpriv); + profile_end("mosflm"); break; case INDEXING_XDS : set_last_task(last_task, "indexing:xds"); + profile_start("xds"); r = run_xds(image, mpriv); + profile_end("xds"); break; case INDEXING_FILE : set_last_task(last_task, "indexing:file"); + profile_start("fromfile"); r = fromfile_index(image, mpriv); + profile_end("fromfile"); break; case INDEXING_FELIX : set_last_task(last_task, "indexing:felix"); + profile_start("felix"); r = felix_index(image, mpriv); + profile_end("felix"); break; case INDEXING_TAKETWO : set_last_task(last_task, "indexing:taketwo"); + profile_start("taketwo"); r = taketwo_index(image, mpriv); + profile_end("taketwo"); break; case INDEXING_PINKINDEXER : set_last_task(last_task, "indexing:pinkindexer"); + profile_start("pinkindexer"); r = run_pinkIndexer(image, mpriv, ipriv->n_threads); + profile_end("pinkindexer"); break; case INDEXING_XGANDALF : set_last_task(last_task, "indexing:xgandalf"); + profile_start("xgandalf"); r = run_xgandalf(image, mpriv); + profile_end("xgandalf"); break; default : @@ -670,6 +689,7 @@ static int try_indexer(struct image *image, IndexingMethod indm, for ( i=0; in_crystals - i - 1; /* ... starting at the end of the (complete) list ... */ @@ -680,9 +700,11 @@ static int try_indexer(struct image *image, IndexingMethod indm, crystal_set_mosaicity(cr, 0.0); /* Pre-refinement unit cell check if requested */ - if ( check_cell(ipriv->flags, cr, ipriv->target_cell, - ipriv->tolerance) ) - { + profile_start("prerefine-cell-check"); + r = check_cell(ipriv->flags, cr, ipriv->target_cell, + ipriv->tolerance); + profile_end("prerefine-cell-check"); + if ( r ) { crystal_set_user_flag(cr, 1); continue; } @@ -690,20 +712,27 @@ static int try_indexer(struct image *image, IndexingMethod indm, /* Prediction refinement if requested */ if ( ipriv->flags & INDEXING_REFINE ) { - if ( refine_prediction(image, cr) ) { + int r; + profile_start("refine"); + r = refine_prediction(image, cr); + profile_end("refine"); + if ( r ) { crystal_set_user_flag(cr, 1); continue; } } /* After refinement unit cell check if requested */ + profile_start("postrefine-cell-check"); if ( (ipriv->flags & INDEXING_CHECK_CELL) && !compare_cell_parameters(crystal_get_cell(cr), ipriv->target_cell, ipriv->tolerance) ) { + profile_end("postrefine-cell-check"); crystal_set_user_flag(cr, 1); continue; } + profile_end("postrefine-cell-check"); /* Peak alignment check if requested */ if ( ipriv->flags & INDEXING_CHECK_PEAKS ) @@ -719,6 +748,7 @@ static int try_indexer(struct image *image, IndexingMethod indm, if ( crystal_get_user_flag(cr) ) continue; /* Check if cell is too similar to existing ones */ + profile_start("cell-compare-to-others"); for ( j=0; jcrystals[j]; @@ -738,6 +768,7 @@ static int try_indexer(struct image *image, IndexingMethod indm, crystal_set_user_flag(cr, 1); } } + profile_end("cell-compare-to-others"); } diff --git a/libcrystfel/src/profile.c b/libcrystfel/src/profile.c new file mode 100644 index 00000000..2bbebf1f --- /dev/null +++ b/libcrystfel/src/profile.c @@ -0,0 +1,220 @@ +/* + * profile.c + * + * Simple profiling according to wall clock time + * + * Copyright © 2016-2022 Deutsches Elektronen-Synchrotron DESY, + * a research centre of the Helmholtz Association. + * + * Authors: + * 2016-2022 Thomas White + * + * This file is part of CrystFEL. + * + * CrystFEL is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CrystFEL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CrystFEL. If not, see . + * + */ + +#include + +#include +#include +#include +#include +#include + +#include "profile.h" + +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW (CLOCK_MONOTONIC) +#endif + +#define MAX_PROFILE_CHILDREN 32 + +struct _profile_block +{ + char *name; + + time_t start_sec; + long start_nsec; + double total_time; + + struct _profile_block *parent; + struct _profile_block *children[MAX_PROFILE_CHILDREN]; + int n_children; +}; + + +struct _profiledata +{ + struct _profile_block *root; + struct _profile_block *current; +}; + + +struct _profiledata *pd = NULL; + + +static struct _profile_block *start_profile_block(const char *name) +{ + struct _profile_block *b; + + b = malloc(sizeof(struct _profile_block)); + if ( b == NULL ) return NULL; + + b->name = strdup(name); + if ( b->name == NULL ) { + free(b); + return NULL; + } + b->n_children = 0; + +#ifdef HAVE_CLOCK_GETTIME + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC_RAW, &tp); + b->start_sec = tp.tv_sec; + b->start_nsec = tp.tv_nsec; +#endif + + return b; +} + + +static void stop_profile_block(struct _profile_block *b) +{ +#ifdef HAVE_CLOCK_GETTIME + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC_RAW, &tp); + time_t sec = tp.tv_sec - b->start_sec; + long nsec = tp.tv_nsec - b->start_nsec; + b->total_time = sec + nsec*1e-9; +#else + b->total_time = 0.0; +#endif +} + + +void profile_init() +{ + if ( pd != NULL ) { + fprintf(stderr, "Attempted to initialise profiling twice!\n"); + fflush(stderr); + abort(); + } + + if ( pd == NULL ) { + pd = malloc(sizeof(struct _profiledata)); + if ( pd == NULL ) return; + } + + pd->root = start_profile_block("root"); + pd->current = pd->root; + pd->root->parent = NULL; + +#ifndef HAVE_CLOCK_GETTIME + printf("Profiling disabled because clock_gettime is not available\n"); +#endif +} + + +static void print_profile_block(struct _profile_block *b) +{ + int i; + printf("(%s %.3f", b->name, b->total_time); + for ( i=0; in_children; i++ ) { + printf(" "); + print_profile_block(b->children[i]); + } + printf(")"); +} + + +static void free_profile_block(struct _profile_block *b) +{ + int i; + for ( i=0; in_children; i++ ) { + free_profile_block(b->children[i]); + } + free(b->name); + free(b); +} + + +void profile_print_and_reset() +{ + if ( pd == NULL ) { + fprintf(stderr, "Profiling not initialised yet!\n"); + fflush(stderr); + abort(); + } + + if ( pd->current != pd->root ) { + fprintf(stderr, "Attempted to finalise profiling while not " + "on root block (%s)\n", pd->current->name); + fflush(stderr); + abort(); + } + + stop_profile_block(pd->root); + + print_profile_block(pd->root); + printf("\n"); + + free_profile_block(pd->root); + pd->root = start_profile_block("root"); + pd->current = pd->root; + pd->root->parent = NULL; +} + + +void profile_start(const char *name) +{ + struct _profile_block *b; + + if ( pd == NULL ) return; + + if ( pd->current->n_children >= MAX_PROFILE_CHILDREN ) { + fprintf(stderr, "Too many profile children.\n"); + fflush(stderr); + abort(); + } + + b = start_profile_block(name); + b->parent = pd->current; + pd->current->children[pd->current->n_children++] = b; + pd->current = b; +} + + +void profile_end(const char *name) +{ + if ( pd == NULL ) return; + + if ( pd->current == NULL ) { + fprintf(stderr, "No current profile block!\n"); + fflush(stderr); + abort(); + } + + if ( strcmp(name, pd->current->name) != 0 ) { + fprintf(stderr, "Attempt to close wrong profile block (%s) " + "current block is %s\n", name, pd->current->name); + fflush(stderr); + abort(); + } + + stop_profile_block(pd->current); + + pd->current = pd->current->parent; +} diff --git a/libcrystfel/src/profile.h b/libcrystfel/src/profile.h new file mode 100644 index 00000000..183528ce --- /dev/null +++ b/libcrystfel/src/profile.h @@ -0,0 +1,42 @@ +/* + * profile.h + * + * Simple profiling according to wall clock time + * + * Copyright © 2016-2022 Deutsches Elektronen-Synchrotron DESY, + * a research centre of the Helmholtz Association. + * + * Authors: + * 2016-2022 Thomas White + * + * This file is part of CrystFEL. + * + * CrystFEL is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * CrystFEL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with CrystFEL. If not, see . + * + */ + +#ifndef PROFILE_H +#define PROFILE_H + +/** + * \file profile.h + * Simple wall-clock profiling + */ + +extern void profile_init(); +extern void profile_print_and_reset(); +extern void profile_start(const char *name); +extern void profile_end(const char *name); + +#endif /* PROFILE_H */ diff --git a/src/im-sandbox.c b/src/im-sandbox.c index ad22d96f..ace58499 100644 --- a/src/im-sandbox.c +++ b/src/im-sandbox.c @@ -61,6 +61,7 @@ #include "im-sandbox.h" #include "process_image.h" #include "im-zmq.h" +#include "profile.h" struct sandbox @@ -330,6 +331,10 @@ static int run_work(const struct index_args *iargs, Stream *st, int allDone = 0; struct im_zmq *zmqstuff = NULL; + if ( sb->profile ) { + profile_init(); + } + /* Connect via ZMQ */ if ( sb->zmq ) { zmqstuff = im_zmq_connect(sb->zmq_address, @@ -356,10 +361,12 @@ static int run_work(const struct index_args *iargs, Stream *st, /* Wait until an event is ready */ sb->shared->pings[cookie]++; set_last_task(sb->shared->last_task[cookie], "wait_event"); + profile_start("wait-queue-semaphore"); if ( sem_wait(sb->queue_sem) != 0 ) { ERROR("Failed to wait on queue semaphore: %s\n", strerror(errno)); } + profile_end("wait-queue-semaphore"); /* Get the event from the queue */ set_last_task(sb->shared->last_task[cookie], "read_queue"); @@ -446,8 +453,10 @@ static int run_work(const struct index_args *iargs, Stream *st, } sb->shared->time_last_start[cookie] = get_monotonic_seconds(); + profile_start("process-image"); process_image(iargs, &pargs, st, cookie, tmpdir, ser, - sb->shared, taccs, sb->shared->last_task[cookie]); + sb->shared, sb->shared->last_task[cookie]); + profile_end("process-image"); /* pargs.zmq_data will be copied into the image structure, so * that it can be queried for "header" values etc. It will @@ -455,6 +464,7 @@ static int run_work(const struct index_args *iargs, Stream *st, if ( sb->profile ) { pthread_mutex_lock(&sb->shared->term_lock); + profile_print_and_reset(); pthread_mutex_unlock(&sb->shared->term_lock); } } diff --git a/src/process_image.c b/src/process_image.c index 46c5f729..5282e135 100644 --- a/src/process_image.c +++ b/src/process_image.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "process_image.h" #include "predict-refine.h" @@ -150,8 +151,10 @@ static struct image *file_wait_open_read(const char *filename, set_last_task(last_task, "read file"); sb_shared->pings[cookie]++; + profile_start("image-read"); image = image_read(dtempl, filename, event, no_image_data, no_mask_data); + profile_end("image-read"); if ( image == NULL ) { if ( wait_for_file && !read_retry_done ) { read_retry_done = 1; @@ -186,6 +189,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, if ( pargs->zmq_data != NULL ) { set_last_task(last_task, "unpacking messagepack object"); + profile_start("read-data-block"); image = image_read_data_block(iargs->dtempl, pargs->zmq_data, pargs->zmq_data_size, @@ -193,8 +197,10 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, serial, iargs->no_image_data, iargs->no_mask_data); + profile_end("read-data-block"); if ( image == NULL ) return; } else { + profile_start("file-wait-open-read"); image = file_wait_open_read(pargs->filename, pargs->event, iargs->dtempl, sb_shared, last_task, @@ -202,6 +208,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, cookie, iargs->no_image_data, iargs->no_mask_data); + profile_end("file-wait-open-read"); if ( image == NULL ) { if ( iargs->wait_for_file != 0 ) { pthread_mutex_lock(&sb_shared->totals_lock); @@ -216,27 +223,36 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Take snapshot of image before applying horrible noise filters */ set_last_task(last_task, "image filter"); + profile_start("image-filter"); sb_shared->pings[cookie]++; if ( (iargs->median_filter > 0) || iargs->noisefilter ) { + profile_start("data-backup"); prefilter = backup_image_data(image->dp, image->detgeom); + profile_end("data-backup"); } else { prefilter = NULL; } if ( iargs->median_filter > 0 ) { + profile_start("median-filter"); filter_median(image, iargs->median_filter); + profile_end("median-filter"); } if ( iargs->noisefilter ) { + profile_start("noise-filter"); filter_noise(image); + profile_end("noise-filter"); } + profile_end("image-filter"); set_last_task(last_task, "resolution range"); sb_shared->pings[cookie]++; mark_resolution_range_as_bad(image, iargs->highres, +INFINITY); sb_shared->pings[cookie]++; + profile_start("peak-search"); switch ( iargs->peaks ) { case PEAK_HDF5: @@ -310,13 +326,16 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; } + profile_end("peak-search"); image->peak_resolution = estimate_peak_resolution(image->features, image->lambda, image->detgeom); if ( prefilter != NULL ) { + profile_start("restore-filter-backup"); restore_image_data(image->dp, image->detgeom, prefilter); + profile_end("restore-filter-backup"); } rn = getcwd(NULL, 0); @@ -354,8 +373,10 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Index the pattern */ set_last_task(last_task, "indexing"); + profile_start("index"); index_pattern_3(image, iargs->ipriv, &sb_shared->pings[cookie], last_task); + profile_end("index"); r = chdir(rn); if ( r ) { @@ -389,6 +410,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Integrate! */ set_last_task(last_task, "integration"); + profile_start("integration"); sb_shared->pings[cookie]++; integrate_all_5(image, iargs->int_meth, PMODEL_XSPHERE, iargs->push_res, @@ -396,14 +418,17 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, iargs->int_diag, iargs->int_diag_h, iargs->int_diag_k, iargs->int_diag_l, &sb_shared->term_lock, iargs->overpredict); + profile_end("integration"); streamwrite: set_last_task(last_task, "stream write"); + profile_start("write-stream"); sb_shared->pings[cookie]++; ret = stream_write_chunk(st, image, iargs->stream_flags); if ( ret != 0 ) { ERROR("Error writing stream file.\n"); } + profile_end("write-stream"); int n = 0; for ( i=0; in_crystals; i++ ) { -- cgit v1.2.3