From 0fd2a367d1ba017d0c1cb112a648b71c4e355f78 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Wed, 12 May 2021 16:22:25 +0200 Subject: Rework header caching layer This clears up multiple layering violations which were starting to get in the way. It enables "string" headers to be stored directly, and it will make it much simpler to add new header types in the future. Along the way, this changes all the floating point header stuff to use double precision. This is needed for EuXFEL event IDs. Closes: https://gitlab.desy.de/thomas.white/crystfel/-/issues/34 --- libcrystfel/src/image-cbf.c | 10 +++ libcrystfel/src/image-cbf.h | 3 + libcrystfel/src/image-hdf5.c | 109 ++++++++++++++++------------ libcrystfel/src/image-hdf5.h | 6 +- libcrystfel/src/image-msgpack.c | 67 +++++++----------- libcrystfel/src/image-msgpack.h | 6 +- libcrystfel/src/image.c | 153 +++++++++++++++++++++++++++------------- libcrystfel/src/image.h | 24 ++++++- libcrystfel/src/stream.c | 64 ++++++++++++----- 9 files changed, 278 insertions(+), 164 deletions(-) (limited to 'libcrystfel') diff --git a/libcrystfel/src/image-cbf.c b/libcrystfel/src/image-cbf.c index 2ea5f0a5..19b38607 100644 --- a/libcrystfel/src/image-cbf.c +++ b/libcrystfel/src/image-cbf.c @@ -630,3 +630,13 @@ int image_cbf_read(struct image *image, return 0; } + + +int image_cbf_read_header_to_cache(struct image *image, + const char *from) +{ + /* FIXME: Implementation + * https://gitlab.desy.de/thomas.white/crystfel/-/issues/10 */ + ERROR("Reading headers from CBF files is not currently supported.\n"); + return 1; +} diff --git a/libcrystfel/src/image-cbf.h b/libcrystfel/src/image-cbf.h index ca79a1a3..f6936981 100644 --- a/libcrystfel/src/image-cbf.h +++ b/libcrystfel/src/image-cbf.h @@ -53,4 +53,7 @@ extern int image_cbf_read_mask(struct panel_template *p, int gz, int *bad, int mask_good, int mask_bad); +extern int image_cbf_read_header_to_cache(struct image *image, + const char *from); + #endif /* IMAGE_CBF_H */ diff --git a/libcrystfel/src/image-hdf5.c b/libcrystfel/src/image-hdf5.c index c939e136..2995654d 100644 --- a/libcrystfel/src/image-hdf5.c +++ b/libcrystfel/src/image-hdf5.c @@ -633,8 +633,7 @@ int image_hdf5_read_mask(struct panel_template *p, } -double image_hdf5_get_value(const char *name, const char *filename, - const char *event, char *ptype) +int image_hdf5_read_header_to_cache(struct image *image, const char *name) { hid_t dh; hid_t type; @@ -653,47 +652,44 @@ double image_hdf5_get_value(const char *name, const char *filename, int i; char *subst_name = NULL; hid_t fh; - double val; int *dim_vals; int n_dim_vals; int dim_val_pos; - if ( access(filename, R_OK) == -1 ) { - ERROR("File does not exist or cannot be read: %s\n", filename); - return NAN; + if ( access(image->filename, R_OK) == -1 ) { + ERROR("File does not exist or cannot be read: %s\n", + image->filename); + return 1; } - fh = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT); + fh = H5Fopen(image->filename, H5F_ACC_RDONLY, H5P_DEFAULT); if ( fh < 0 ) { - ERROR("Couldn't open file: %s\n", filename); - return NAN; + ERROR("Couldn't open file: %s\n", image->filename); + return 1; } - subst_name = substitute_path(event, name, 1); + subst_name = substitute_path(image->ev, name, 1); if ( subst_name == NULL ) { - ERROR("Invalid event ID '%s'\n", event); + ERROR("Invalid event ID '%s'\n", image->ev); close_hdf5(fh); - return NAN; + return 1; } dh = H5Dopen2(fh, subst_name, H5P_DEFAULT); if ( dh < 0 ) { ERROR("No such numeric field '%s'\n", subst_name); close_hdf5(fh); - return NAN; + return 1; } type = H5Dget_type(dh); class = H5Tget_class(type); - if ( class == H5T_FLOAT ) { - *ptype = 'f'; - } else if ( class == H5T_INTEGER ) { - *ptype = 'i'; - } else { + /* FIXME: Handle strings as well */ + if ( (class != H5T_FLOAT) && (class != H5T_INTEGER) ) { ERROR("Not a floating point or integer value.\n"); close_hdf5(fh); - return NAN; + return 1; } /* Get the dimensionality. We have to cope with scalars expressed as @@ -703,7 +699,7 @@ double image_hdf5_get_value(const char *name, const char *filename, if ( ndims > 64 ) { ERROR("Too many dimensions for numeric value\n"); close_hdf5(fh); - return NAN; + return 1; } H5Sget_simple_extent_dims(sh, size, NULL); @@ -719,37 +715,42 @@ double image_hdf5_get_value(const char *name, const char *filename, if ( class == H5T_FLOAT ) { - r = H5Dread(dh, H5T_NATIVE_DOUBLE, ms, sh, H5P_DEFAULT, &val); + double val; + r = H5Dread(dh, H5T_NATIVE_DOUBLE, ms, sh, H5P_DEFAULT, + &val); if ( r < 0 ) { ERROR("Couldn't read scalar value from %s.\n", subst_name); free(subst_name); close_hdf5(fh); - return NAN; + return 1; } - return val; + image_cache_header_float(image, name, val); + return 0; } else { - int vali; - r = H5Dread(dh, H5T_NATIVE_INT, ms, sh, H5P_DEFAULT, &vali); + int val; + r = H5Dread(dh, H5T_NATIVE_INT, ms, sh, H5P_DEFAULT, + &val); if ( r < 0 ) { ERROR("Couldn't read scalar value from %s.\n", subst_name); free(subst_name); close_hdf5(fh); - return NAN; + return 1; } - return vali; + image_cache_header_int(image, name, val); + return 0; } } - dim_vals = read_dim_parts(event, &n_dim_vals); + dim_vals = read_dim_parts(image->ev, &n_dim_vals); if ( dim_vals == NULL ) { ERROR("Couldn't parse event '%s'\n"); close_hdf5(fh); - return NAN; + return 1; } f_offset = malloc(ndims*sizeof(hsize_t)); @@ -757,7 +758,7 @@ double image_hdf5_get_value(const char *name, const char *filename, if ( (f_offset == NULL) || (f_count == NULL) ) { ERROR("Couldn't allocate dimension arrays\n"); close_hdf5(fh); - return NAN; + return 1; } /* Every dimension of the dataset must either be size 1 or @@ -774,7 +775,7 @@ double image_hdf5_get_value(const char *name, const char *filename, subst_name, i, dim_vals[dim_val_pos], size[i]); close_hdf5(fh); - return NAN; + return 1; } f_offset[i] = dim_vals[dim_val_pos]; @@ -790,6 +791,8 @@ double image_hdf5_get_value(const char *name, const char *filename, } + free(subst_name); + check = H5Sselect_hyperslab(sh, H5S_SELECT_SET, f_offset, NULL, f_count, NULL); if ( check <0 ) { @@ -797,33 +800,49 @@ double image_hdf5_get_value(const char *name, const char *filename, free(f_offset); free(f_count); close_hdf5(fh); - return NAN; + return 1; } + free(f_offset); + free(f_count); + ms = H5Screate_simple(1,msdims,NULL); check = H5Sselect_hyperslab(ms, H5S_SELECT_SET, m_offset, NULL, m_count, NULL); if ( check < 0 ) { ERROR("Error selecting memory dataspace for float value\n"); - free(f_offset); - free(f_count); close_hdf5(fh); - return NAN; + return 1; } - r = H5Dread(dh, H5T_NATIVE_DOUBLE, ms, sh, H5P_DEFAULT, &val); - if ( r < 0 ) { - ERROR("Couldn't read value.\n"); + if ( class == H5T_FLOAT ) { + + double val; + r = H5Dread(dh, H5T_NATIVE_DOUBLE, ms, sh, H5P_DEFAULT, &val); + if ( r < 0 ) { + ERROR("Couldn't read value.\n"); + close_hdf5(fh); + return 1; + } + + image_cache_header_float(image, name, val); close_hdf5(fh); - return NAN; - } + return 0; - free(f_offset); - free(f_count); - free(subst_name); - close_hdf5(fh); + } else { - return val; + int val; + r = H5Dread(dh, H5T_NATIVE_INT, ms, sh, H5P_DEFAULT, &val); + if ( r < 0 ) { + ERROR("Couldn't read value.\n"); + close_hdf5(fh); + return 1; + } + + image_cache_header_int(image, name, val); + close_hdf5(fh); + return 0; + } } diff --git a/libcrystfel/src/image-hdf5.h b/libcrystfel/src/image-hdf5.h index 4c36cf89..239bb20d 100644 --- a/libcrystfel/src/image-hdf5.h +++ b/libcrystfel/src/image-hdf5.h @@ -34,10 +34,8 @@ #include "datatemplate_priv.h" -extern double image_hdf5_get_value(const char *from, - const char *filename, - const char *ev, - char *type); +extern int image_hdf5_read_header_to_cache(struct image *image, + const char *name); extern int image_hdf5_read(struct image *image, const DataTemplate *dtempl, diff --git a/libcrystfel/src/image-msgpack.c b/libcrystfel/src/image-msgpack.c index 0f5ab746..6785fd5f 100644 --- a/libcrystfel/src/image-msgpack.c +++ b/libcrystfel/src/image-msgpack.c @@ -242,93 +242,76 @@ static char *terminate_str(const char *ptr, size_t len) } -double image_msgpack_get_value(const char *name, - void *data_block, - size_t data_block_size, - char *ptype) +int image_msgpack_read_header_to_cache(struct image *image, + const char *name) { msgpack_unpacked unpacked; msgpack_object *value_obj; msgpack_object *obj; int r; - float val = NAN; char *str; - *ptype = 'x'; - - if ( data_block == NULL ) { + if ( image->data_block == NULL ) { ERROR("No MsgPack data!\n"); - goto out; + return 1; } msgpack_unpacked_init(&unpacked); - r = msgpack_unpack_next(&unpacked, data_block, data_block_size, NULL); + r = msgpack_unpack_next(&unpacked, image->data_block, + image->data_block_size, NULL); if ( r != MSGPACK_UNPACK_SUCCESS ) { ERROR("MessagePack unpack failed: %i\n", r); - goto out; + return 1; } obj = find_main_object(&unpacked); if ( obj == NULL ) { ERROR("Failed to find main MsgPack object.\n"); msgpack_unpacked_destroy(&unpacked); - goto out; + return 1; } value_obj = find_msgpack_kv(obj, name); if ( value_obj == NULL ) { ERROR("Couldn't find '%s' in MessagePack object\n", name); msgpack_unpacked_destroy(&unpacked); - goto out; + return 1; } switch ( value_obj->type ) { case MSGPACK_OBJECT_FLOAT64: case MSGPACK_OBJECT_FLOAT32: - //case MSGPACK_OBJECT_FLOAT: - *ptype = 'f'; - val = value_obj->via.f64; - break; + image_cache_header_float(image, name, value_obj->via.f64); + msgpack_unpacked_destroy(&unpacked); + return 0; case MSGPACK_OBJECT_POSITIVE_INTEGER: case MSGPACK_OBJECT_NEGATIVE_INTEGER: - *ptype = 'i'; - val = value_obj->via.i64; - break; + image_cache_header_int(image, name, value_obj->via.i64); + msgpack_unpacked_destroy(&unpacked); + return 0; case MSGPACK_OBJECT_STR: str = terminate_str(value_obj->via.str.ptr, value_obj->via.str.size); - if ( str != NULL ) { - int ival; - if ( convert_int(str, &ival) == 0 ) { - *ptype = 'i'; - val = ival; - } else { - ERROR("MsgPack header %s has a string type (%s)" - "(need a number, and can't convert).\n", - name, str); - val = NAN; - } - free(str); - } else { - ERROR("Failed to read MsgPack string (%s)\n", name); - val = NAN; + if ( str == NULL ) { + msgpack_unpacked_destroy(&unpacked); + return 1; } - break; + + image_cache_header_str(image, name, str); + free(str); + msgpack_unpacked_destroy(&unpacked); + return 0; default: ERROR("Unrecognised MsgPack type %i (%s)\n", value_obj->type, name); - break; + msgpack_unpacked_destroy(&unpacked); + return 1; } - - msgpack_unpacked_destroy(&unpacked); - -out: - return val; } diff --git a/libcrystfel/src/image-msgpack.h b/libcrystfel/src/image-msgpack.h index 26cf6b4e..738adc91 100644 --- a/libcrystfel/src/image-msgpack.h +++ b/libcrystfel/src/image-msgpack.h @@ -42,9 +42,7 @@ extern ImageFeatureList *image_msgpack_read_peaks(const DataTemplate *dtempl, size_t data_size, int half_pixel_shift); -extern double image_msgpack_get_value(const char *name, - void *data_block, - size_t data_block_size, - char *ptype); +extern int image_msgpack_read_header_to_cache(struct image *image, + const char *name); #endif /* IMAGE_MSGPACK_H */ diff --git a/libcrystfel/src/image.c b/libcrystfel/src/image.c index 1e2b7a3c..97112fa3 100644 --- a/libcrystfel/src/image.c +++ b/libcrystfel/src/image.c @@ -307,7 +307,7 @@ void image_cache_header_int(struct image *image, if ( ce != NULL ) { ce->header_name = strdup(header_name); ce->val_int = header_val; - ce->type = 'i'; + ce->type = HEADER_INT; image->header_cache[image->n_cached_headers++] = ce; } else { ERROR("Failed to add header cache entry.\n"); @@ -318,7 +318,7 @@ void image_cache_header_int(struct image *image, void image_cache_header_float(struct image *image, const char *header_name, - float header_val) + double header_val) { if ( image->n_cached_headers >= HEADER_CACHE_SIZE ) { ERROR("Too many headers to copy.\n"); @@ -330,7 +330,7 @@ void image_cache_header_float(struct image *image, if ( ce != NULL ) { ce->header_name = strdup(header_name); ce->val_float = header_val; - ce->type = 'f'; + ce->type = HEADER_FLOAT; image->header_cache[image->n_cached_headers++] = ce; } else { ERROR("Failed to add header cache entry.\n"); @@ -339,77 +339,130 @@ void image_cache_header_float(struct image *image, } -static double get_value(struct image *image, const char *from, - int *is_literal_number) +void image_cache_header_str(struct image *image, + const char *header_name, + const char *header_val) { - double val; - char *rval; - struct header_cache_entry *ce; - char type; + if ( image->n_cached_headers >= HEADER_CACHE_SIZE ) { + ERROR("Too many headers to copy.\n"); + } else { - if ( from == NULL ) return NAN; + struct header_cache_entry *ce; + ce = malloc(sizeof(struct header_cache_entry)); - val = strtod(from, &rval); - if ( (*rval == '\0') && (rval != from) ) { - if ( is_literal_number != NULL ) { - *is_literal_number = 1; + if ( ce != NULL ) { + ce->header_name = strdup(header_name); + ce->val_str = strdup(header_val); + ce->type = HEADER_STR; + image->header_cache[image->n_cached_headers++] = ce; + } else { + ERROR("Failed to add header cache entry.\n"); } - return val; } +} + + +static int read_header_to_cache(struct image *image, const char *from) +{ + switch ( image->data_source_type ) { + + case DST_HDF5: + return image_hdf5_read_header_to_cache(image, from); + + case DST_CBF: + case DST_CBFGZ: + return image_cbf_read_header_to_cache(image, from); + + case DST_MSGPACK: + return image_msgpack_read_header_to_cache(image, from); + + default: + ERROR("Unrecognised file type %i (read_header_to_cache)\n", + image->data_source_type); + return 1; + + } +} + + +static struct header_cache_entry *cached_header(struct image *image, const char *from) +{ + struct header_cache_entry *ce; if ( image == NULL ) { ERROR("Attempt to retrieve a header value without an image\n"); - return NAN; + return NULL; } ce = find_cache_entry(image, from); - if ( ce != NULL ) { - if ( ce->type == 'f' ) { - return ce->val_float; - } else if ( ce->type == 'i' ) { - return ce->val_int; - } else { - ERROR("Unrecognised header cache type '%c'\n", - ce->type); - return NAN; - } + if ( ce != NULL ) return ce; + /* Try to get the value from the file */ + if ( read_header_to_cache(image, from) == 0 ) { + return find_cache_entry(image, from); } else { + ERROR("Couldn't find header value '%s'\n", from); + return NULL; + } +} - switch ( image->data_source_type ) { - case DST_HDF5: - val = image_hdf5_get_value(from, image->filename, - image->ev, &type); - break; +int image_read_header_float(struct image *image, const char *from, double *val) +{ + struct header_cache_entry *ce; - case DST_CBF: - case DST_CBFGZ: - /* FIXME: Implementation */ - val = NAN; - break; + ce = cached_header(image, from); + if ( ce == NULL ) return 1; - case DST_MSGPACK: - val = image_msgpack_get_value(from, image->data_block, - image->data_block_size, - &type); - break; + switch ( ce->type ) { - default: - ERROR("Unrecognised file type %i (get_value)\n", - image->data_source_type); - return 1; + case HEADER_FLOAT: + *val = ce->val_float; + return 0; + + case HEADER_INT: + *val = ce->val_int; + return 0; + case HEADER_STR: + if ( convert_float(ce->val_str, val) == 0 ) { + return 0; + } else { + ERROR("Value '%s' (%s) can't be converted to float\n", + ce->val_str, from); + return 1; } + default: + ERROR("Unrecognised header cache type %i\n", ce->type); + return 1; + } +} + +static double get_value(struct image *image, const char *from, + int *is_literal_number) +{ + double val; + char *rval; + + if ( from == NULL ) return NAN; + + val = strtod(from, &rval); + if ( (*rval == '\0') && (rval != from) ) { + if ( is_literal_number != NULL ) { + *is_literal_number = 1; + } + return val; } - if ( type == 'f' ) { - image_cache_header_float(image, from, val); - } else if ( type == 'i' ) { - image_cache_header_int(image, from, val); + if ( is_literal_number != NULL ) { + *is_literal_number = 0; + } + if ( image_read_header_float(image, from, &val) == 0 ) { + return val; + } else { + return NAN; /* FIXME: Use out-of-band flag */ } - return val; } diff --git a/libcrystfel/src/image.h b/libcrystfel/src/image.h index 045f3e77..0d8e7166 100644 --- a/libcrystfel/src/image.h +++ b/libcrystfel/src/image.h @@ -78,12 +78,20 @@ typedef struct _imagefeaturelist ImageFeatureList; #define HEADER_CACHE_SIZE (128) +typedef enum +{ + HEADER_FLOAT, + HEADER_INT, + HEADER_STR +} HeaderCacheType; + struct header_cache_entry { char *header_name; - char type; + HeaderCacheType type; union { int val_int; - float val_float; + double val_float; + char *val_str; }; }; @@ -221,14 +229,24 @@ extern struct image *image_read_data_block(const DataTemplate *dtempl, int no_mask_data); extern void image_free(struct image *image); +extern int image_read_header_float(struct image *image, const char *from, + double *val); + +/* NB image_read_header_int and image_read_header_str should exist, but there + * is currently no use for them. Get in touch if you disagree! */ + extern void image_cache_header_float(struct image *image, const char *header_name, - float header_val); + double header_val); extern void image_cache_header_int(struct image *image, const char *header_name, int header_val); +extern void image_cache_header_str(struct image *image, + const char *header_name, + const char *header_val); + extern ImageFeatureList *image_read_peaks(const DataTemplate *dtempl, const char *filename, const char *event, diff --git a/libcrystfel/src/stream.c b/libcrystfel/src/stream.c index 89f59ddc..47438614 100644 --- a/libcrystfel/src/stream.c +++ b/libcrystfel/src/stream.c @@ -638,15 +638,27 @@ int stream_write_chunk(Stream *st, const struct image *i, for ( j=0; jn_cached_headers; j++ ) { struct header_cache_entry *ce = i->header_cache[j]; - if ( ce->type == 'f' ) { + switch ( ce->type ) { + + case HEADER_FLOAT: fprintf(st->fh, "header/float/%s = %f\n", ce->header_name, ce->val_float); - } else if ( ce->type == 'i' ) { + break; + + case HEADER_INT: fprintf(st->fh, "header/int/%s = %i\n", ce->header_name, ce->val_int); - } else { - ERROR("Unrecognised header cache type '%c'\n", - ce->type); + break; + + case HEADER_STR: + fprintf(st->fh, "header/str/%s = %s\n", + ce->header_name, ce->val_str); + break; + + default: + ERROR("Unrecognised header cache type %i\n", ce->type); + break; + } } @@ -899,12 +911,14 @@ static void read_crystal(Stream *st, struct image *image, } -static void parse_header(const char *line_in, struct image *image, char type) +static void parse_header(const char *line_in, struct image *image, + HeaderCacheType type) { char *line; char *pos; line = strdup(line_in); + chomp(line); pos = strchr(line, ' '); if ( pos == NULL ) { @@ -923,20 +937,34 @@ static void parse_header(const char *line_in, struct image *image, char type) return; } - if ( type == 'f' ) { - float v; - if ( sscanf(pos+3, "%f", &v) != 1 ) { + switch ( type ) { + + double vf; + int vi; + + case HEADER_FLOAT: + if ( convert_float(pos+3, &vf) != 0 ) { ERROR("Invalid header line '%s' (invalid value)\n", line); return; } - image_cache_header_float(image, line, v); - } else { - int v; - if ( sscanf(pos+3, "%i", &v) != 1 ) { + image_cache_header_float(image, line, vf); + break; + + case HEADER_INT: + if ( convert_int(pos+3, &vi) != 0 ) { ERROR("Invalid header line '%s' (invalid value)\n", line); return; } - image_cache_header_int(image, line, v); + image_cache_header_int(image, line, vi); + break; + + case HEADER_STR: + image_cache_header_str(image, line, pos+3); + break; + + default: + ERROR("Unrecognised header cache type %i (from stream)\n", type); + break; } } @@ -983,11 +1011,15 @@ struct image *stream_read_chunk(Stream *st, StreamFlags srf) } if ( strncmp(line, "header/int/", 11) == 0 ) { - parse_header(line+11, image, 'i'); + parse_header(line+11, image, HEADER_INT); } if ( strncmp(line, "header/float/", 13) == 0 ) { - parse_header(line+13, image, 'f'); + parse_header(line+13, image, HEADER_FLOAT); + } + + if ( strncmp(line, "header/str/", 13) == 0 ) { + parse_header(line+11, image, HEADER_STR); } if ( strncmp(line, "indexed_by = ", 13) == 0 ) { -- cgit v1.2.3