/* * ambigator.c * * Resolve indexing ambiguities * * Copyright © 2014-2021 Deutsches Elektronen-Synchrotron DESY, * a research centre of the Helmholtz Association. * Copyright © 2014 Wolfgang Brehm * * Authors: * 2014-2020 Thomas White * 2014 Wolfgang Brehm * * This file is part of CrystFEL. * * CrystFEL is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * CrystFEL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with CrystFEL. If not, see . * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_HDF5 #include #endif #include #include #include #include #include #include #include #include #include #include "version.h" static void show_help(const char *s) { printf("Syntax: %s [options] input.stream\n\n", s); printf( "Resolve indexing ambiguities.\n" "\n" " -h, --help Display this help message.\n" "\n" " --version Print CrystFEL version number and exit.\n" " -o, --output= Output stream.\n" " -y, --symmetry= Actual (\"target\") symmetry.\n" " -w Apparent (\"source\" or \"twinned\") symmetry.\n" " --operator= Ambiguity operator, e.g. \"k,h,-l\"\n" " -n, --iterations= Iterate times.\n" " --highres= High resolution cutoff in A.\n" " --lowres= Low resolution cutoff in A.\n" " --start-assignments= Read starting assignments from file.\n" " --end-assignments= Save end assignments to file.\n" " --fg-graph= Save f and g correlation values to file.\n" " --ncorr= Use correlations per crystal. Default 1000\n" " -j Use threads for CC calculation.\n" " --really-random Be non-deterministic.\n" " --corr-matrix= Write the correlation matrix to file.\n" ); } struct flist { int n; int n_groups; unsigned int *s; unsigned int *group; float *i; unsigned int *s_reidx; unsigned int *group_reidx; float *i_reidx; }; static struct flist *asymm_and_merge(RefList *in, const SymOpList *sym, UnitCell *cell, double rmin, double rmax, SymOpList *amb, int auto_res) { Reflection *refl; RefListIterator *iter; RefList *asym; struct flist *f; int n; asym = reflist_new(); if ( asym == NULL ) return NULL; for ( refl = first_refl(in, &iter); refl != NULL; refl = next_refl(refl, iter) ) { signed int h, k, l; signed int ha, ka, la; Reflection *cr; int group = 0; get_indices(refl, &h, &k, &l); if ( cell == NULL ) { ERROR("Can't calculate resolution cutoff - no cell\n"); } else { double res = 2.0*resolution(cell, h, k, l); if ( res < rmin ) continue; if ( res > rmax ) continue; if ( auto_res ) { if ( res < 1e9 ) { group = 0; /* inf <= res < 10 Å */ } else if ( (res>2e9) && (res<4e9) ) { group = 1; /* 5 < res < 2.5 Å */ } else if ( res > 4e9 ) { group = 2; /* 2.5 < res < 0 Å */ } else continue; /* NB gap in ranges */ } } get_asymm(sym, h, k, l, &ha, &ka, &la); if ( amb != NULL ) { signed int hr, kr, lr; signed int hra, kra, lra; get_equiv(amb, NULL, 0, ha, ka, la, &hr, &kr, &lr); get_asymm(sym, hr, kr, lr, &hra, &kra, &lra); /* Skip twin-proof reflections */ if ( (ha==hra) && (ka==kra) && (la==lra) ) { //STATUS("%i %i %i is twin proof\n", h, k, l); continue; } } cr = find_refl(asym, ha, ka, la); if ( cr == NULL ) { cr = add_refl(asym, ha, ka, la); assert(cr != NULL); copy_data(cr, refl); } else { const double i = get_intensity(cr); const int r = get_redundancy(cr); set_intensity(cr, (r*i + get_intensity(refl))/(r+1)); set_redundancy(cr, r+1); } set_flag(cr, group); } f = malloc(sizeof(struct flist)); if ( f == NULL ) { ERROR("Failed to allocate flist\n"); return NULL; } if ( auto_res ) { f->n_groups = 3; } else { f->n_groups = 1; } n = num_reflections(asym); f->s = malloc(n*sizeof(unsigned int)); f->s_reidx = malloc(n*sizeof(unsigned int)); f->i = malloc(n*sizeof(float)); f->i_reidx = malloc(n*sizeof(float)); f->group = malloc(n*sizeof(unsigned int)); f->group_reidx = malloc(n*sizeof(unsigned int)); if ( (f->s == NULL) || (f->i == NULL) || (f->s_reidx == NULL) || (f->i_reidx == NULL) || (f->group_reidx == NULL) || (f->group == NULL) ) { ERROR("Failed to allocate flist\n"); goto out; } f->n = 0; for ( refl = first_refl(asym, &iter); refl != NULL; refl = next_refl(refl, iter) ) { signed int h, k, l; get_indices(refl, &h, &k, &l); f->s[f->n] = SERIAL(h, k, l); f->group[f->n] = get_flag(refl); f->i[f->n] = get_intensity(refl); f->n++; } assert(f->n == n); if ( amb != NULL ) { RefList *reidx = reflist_new(); if ( reidx == NULL ) goto out; for ( refl = first_refl(asym, &iter); refl != NULL; refl = next_refl(refl, iter) ) { signed int h, k, l; signed int hr, kr, lr; signed int hra, kra, lra; Reflection *cr; get_indices(refl, &h, &k, &l); get_equiv(amb, NULL, 0, h, k, l, &hr, &kr, &lr); get_asymm(sym, hr, kr, lr, &hra, &kra, &lra); cr = add_refl(reidx, hra, kra, lra); if ( cr == NULL ) { ERROR("Failed to add reflection\n"); reflist_free(reidx); goto out; } copy_data(cr, refl); } n = 0; for ( refl = first_refl(reidx, &iter); refl != NULL; refl = next_refl(refl, iter) ) { signed int h, k, l; get_indices(refl, &h, &k, &l); f->s_reidx[n] = SERIAL(h, k, l); f->group_reidx[n] = get_flag(refl); f->i_reidx[n++] = get_intensity(refl); } assert(f->n == n); reflist_free(reidx); } reflist_free(asym); return f; out: free(f->s); free(f->s_reidx); free(f->i); free(f->i_reidx); free(f->group); free(f->group_reidx); free(f); return NULL; } static float corr_group(struct flist *a, struct flist *b, int *pn, int a_reidx, int group) { float s_xy = 0.0; float s_x = 0.0; float s_y = 0.0; float s_x2 = 0.0; float s_y2 = 0.0; int n = 0; float t1, t2; int ap = 0; int bp = 0; int done = 0; unsigned int *sa; float *ia; unsigned int *ga; if ( a_reidx ) { sa = a->s_reidx; ia = a->i_reidx; ga = a->group_reidx; } else { sa = a->s; ia = a->i; ga = a->group; } if ( (a->n == 0) || (b->n == 0) ) { *pn = 0; return 0.0; } while ( 1 ) { while ( sa[ap] > b->s[bp] ) { if ( ++bp == b->n ) { done = 1; break; } } if ( done ) break; while ( sa[ap] < b->s[bp] ) { if ( ++ap == a->n ) { done = 1; break; } } if ( done ) break; if ( sa[ap] == b->s[bp] ) { if ( ga[ap] == group ) { float aint, bint; aint = ia[ap]; bint = b->i[bp]; s_xy += aint*bint; s_x += aint; s_y += bint; s_x2 += aint*aint; s_y2 += bint*bint; n++; } if ( ++ap == a->n ) break; if ( ++bp == b->n ) break; } } *pn = n; t1 = s_x2 - s_x*s_x / n; t2 = s_y2 - s_y*s_y / n; if ( (t1 <= 0.0) || (t2 <= 0.0) ) return 0.0; return (s_xy - s_x*s_y/n) / sqrt(t1*t2); } static float corr(struct flist *a, struct flist *b, int *pn, int a_reidx) { int i; double total = 0.0; for ( i=0; in_groups; i++ ) { double v = corr_group(a, b, pn, a_reidx, i); /* NaN means no reflections in this range for this pair */ if ( !isnan(v) ) total += v; } return total/a->n_groups; } struct cc_list { signed int *ind; float *cc; signed int *ind_reidx; float *cc_reidx; }; struct ambigator_queue_args { int n_started; int n_finished; int n_to_do; long long int mean_nac; long long int nmean_nac; struct cc_list *ccs; struct flist **crystals; int n_crystals; int ncorr; SymOpList *amb; gsl_rng **rngs; }; struct cc_job { struct cc_list *ccs; int i; int mean_nac; int nmean_nac; int fail; struct flist **crystals; int n_crystals; int ncorr; SymOpList *amb; gsl_rng **rngs; }; static void *get_task(void *vp) { struct ambigator_queue_args *qargs = vp; struct cc_job *job; if ( qargs->n_started == qargs->n_to_do ) return NULL; job = malloc(sizeof(struct cc_job)); if ( job == NULL ) return NULL; job->ccs = qargs->ccs; job->i = qargs->n_started++; job->crystals = qargs->crystals; job->n_crystals = qargs->n_crystals; job->ncorr = qargs->ncorr; job->amb = qargs->amb; job->rngs = qargs->rngs; return job; } static void final(void *qp, void *wp) { struct ambigator_queue_args *qargs = qp; struct cc_job *job = wp; qargs->mean_nac += job->mean_nac; qargs->nmean_nac += job->nmean_nac; if ( job->fail ) { ERROR("Failed to calculate CCs (out of memory?)\n"); abort(); } free(job); qargs->n_finished++; progress_bar(qargs->n_finished, qargs->n_to_do, "Calculating CCs"); } static void work(void *wp, int cookie) { struct cc_job *job = wp; int i = job->i; int k, l; struct cc_list *ccs = job->ccs; struct flist **crystals = job->crystals; int n_crystals = job->n_crystals; int ncorr = job->ncorr; SymOpList *amb = job->amb; int mean_nac = 0; int nmean_nac = 0; gsl_permutation *p; job->fail = 1; p = gsl_permutation_alloc(n_crystals); if ( p == NULL ) return; gsl_permutation_init(p); gsl_ran_shuffle(job->rngs[cookie], p->data, n_crystals, sizeof(size_t)); ccs[i].ind = malloc(ncorr*sizeof(int)); ccs[i].cc = malloc(ncorr*sizeof(float)); ccs[i].ind_reidx = calloc(ncorr, sizeof(int)); ccs[i].cc_reidx = calloc(ncorr, sizeof(float)); if ( (ccs[i].ind==NULL) || (ccs[i].cc==NULL) || (ccs[i].ind_reidx==NULL) || (ccs[i].cc_reidx==NULL) ) { return; } k = 0; for ( l=0; lmean_nac = mean_nac; job->nmean_nac = nmean_nac; job->fail = 0; } static gsl_rng **setup_random(gsl_rng *rng, int n) { gsl_rng **rngs; int i; rngs = malloc(n * sizeof(gsl_rng *)); if ( rngs == NULL ) return NULL; for ( i=0; i= ncorr); ncorr++; /* Extra value at end for sentinel */ qargs.rngs = setup_random(rng, nthreads); if ( qargs.rngs == NULL ) { ERROR("Failed to set up RNGs\n"); return NULL; } ccs = malloc(n_crystals*sizeof(struct cc_list)); if ( ccs == NULL ) return NULL; qargs.n_started = 0; qargs.n_finished = 0; qargs.n_to_do = n_crystals; qargs.ccs = ccs; qargs.mean_nac = 0; qargs.nmean_nac = 0; qargs.crystals = crystals; qargs.n_crystals = n_crystals; qargs.ncorr = ncorr; qargs.amb = amb; run_threads(nthreads, work, get_task, final, &qargs, n_crystals, 0, 0, 0); for ( i=0; i 0 ) { STATUS("WARNING: %i crystals had no correlation\n", ndud); } STATUS("Mean f,g = %10f,%10f. Changed %i assignments this time.\n", mf/nmf, mg/nmf, nch); } static void reindex_reflections(FILE *fh, FILE *ofh, int assignment, SymOpList *amb) { int first = 1; do { char *rval; char line[1024]; int n; signed int h, k, l; int r; rval = fgets(line, 1023, fh); if ( rval == NULL ) break; if ( strcmp(line, REFLECTION_END_MARKER"\n") == 0 ) { fputs(line, ofh); return; } if ( first ) { fputs(line, ofh); first = 0; continue; } r = sscanf(line, "%i %i %i%n", &h, &k, &l, &n); /* See scanf() manual page about %n to see why <3 is used */ if ( (r < 3) && !first ) return; if ( assignment ) { get_equiv(amb, NULL, 0, h, k, l, &h, &k, &l); } fprintf(ofh, "%4i %4i %4i%s", h, k, l, line+n); } while ( 1 ); } /* This is nasty, but means the output includes absolutely everything in the * input, even stuff ignored by read_chunk() */ static void write_reindexed_stream(const char *infile, const char *outfile, int *assignments, SymOpList *amb, int argc, char *argv[]) { FILE *fh; FILE *ofh; int i; struct rvec as, bs, cs; int have_as = 0; int have_bs = 0; int have_cs = 0; int done = 0; fh = fopen(infile, "r"); if ( fh == NULL ) { ERROR("Failed to open '%s'\n", infile); return; } ofh = fopen(outfile, "w"); if ( ofh == NULL ) { ERROR("Failed to open '%s'\n", outfile); fclose(fh); return; } /* Copy the header */ do { char line[1024]; char *rval; rval = fgets(line, 1023, fh); if ( rval == NULL ) { ERROR("Failed to read stream audit info.\n"); return; } if ( strncmp(line, "-----", 5) == 0 ) { done = 1; /* Add our own header */ fprintf(ofh, "Re-indexed by ambigator %s\n", crystfel_version_string()); if ( argc > 0 ) { for ( i=0; i 0 ) fprintf(ofh, " "); fprintf(ofh, "%s", argv[i]); } fprintf(ofh, "\n"); } } fputs(line, ofh); } while ( !done ); i = 0; do { char *rval; char line[1024]; int d = 0; float u, v, w; rval = fgets(line, 1023, fh); if ( rval == NULL ) break; if ( strncmp(line, "Cell parameters ", 16) == 0 ) { d = 1; } if ( sscanf(line, "astar = %f %f %f", &u, &v, &w) == 3 ) { as.u = u*1e9; as.v = v*1e9; as.w = w*1e9; have_as = 1; d = 1; } if ( sscanf(line, "bstar = %f %f %f", &u, &v, &w) == 3 ) { bs.u = u*1e9; bs.v = v*1e9; bs.w = w*1e9; have_bs = 1; d = 1; } if ( sscanf(line, "cstar = %f %f %f", &u, &v, &w) == 3 ) { cs.u = u*1e9; cs.v = v*1e9; cs.w = w*1e9; have_cs = 1; d = 1; } if ( have_as && have_bs && have_cs ) { UnitCell *cell; double asx, asy, asz; double bsx, bsy, bsz; double csx, csy, csz; double a, b, c, al, be, ga; cell = cell_new_from_reciprocal_axes(as, bs, cs); assert(cell != NULL); if ( assignments[i] ) { signed int h, k, l; struct rvec na, nb, nc; get_equiv(amb, NULL, 0, 1, 0, 0, &h, &k, &l); na.u = as.u*h + bs.u*k + cs.u*l; na.v = as.v*h + bs.v*k + cs.v*l; na.w = as.w*h + bs.w*k + cs.w*l; get_equiv(amb, NULL, 0, 0, 1, 0, &h, &k, &l); nb.u = as.u*h + bs.u*k + cs.u*l; nb.v = as.v*h + bs.v*k + cs.v*l; nb.w = as.w*h + bs.w*k + cs.w*l; get_equiv(amb, NULL, 0, 0, 0, 1, &h, &k, &l); nc.u = as.u*h + bs.u*k + cs.u*l; nc.v = as.v*h + bs.v*k + cs.v*l; nc.w = as.w*h + bs.w*k + cs.w*l; cell_set_reciprocal(cell, na.u, na.v, na.w, nb.u, nb.v, nb.w, nc.u, nc.v, nc.w); } /* The cell parameters might change, so update them. * Unique axis, centering and lattice type can't change, * though. */ cell_get_parameters(cell, &a, &b, &c, &al, &be, &ga); fprintf(ofh, "Cell parameters %7.5f %7.5f %7.5f nm," " %7.5f %7.5f %7.5f deg\n", a*1.0e9, b*1.0e9, c*1.0e9, rad2deg(al), rad2deg(be), rad2deg(ga)); cell_get_reciprocal(cell, &asx, &asy, &asz, &bsx, &bsy, &bsz, &csx, &csy, &csz); fprintf(ofh, "astar = %+9.7f %+9.7f %+9.7f nm^-1\n", asx/1e9, asy/1e9, asz/1e9); fprintf(ofh, "bstar = %+9.7f %+9.7f %+9.7f nm^-1\n", bsx/1e9, bsy/1e9, bsz/1e9); fprintf(ofh, "cstar = %+9.7f %+9.7f %+9.7f nm^-1\n", csx/1e9, csy/1e9, csz/1e9); cell_free(cell); have_as = 0; have_bs = 0; have_cs = 0; } /* Not a bug: STREAM_REFLECTION_START_MARKER gets passed through */ if ( !d ) fputs(line, ofh); if ( strcmp(line, STREAM_REFLECTION_START_MARKER"\n") == 0 ) { reindex_reflections(fh, ofh, assignments[i++], amb); } } while ( 1 ); if ( !feof(fh) ) { ERROR("Error reading stream.\n"); } fclose(fh); fclose(ofh); } static void save_corr(const char *filename, struct cc_list *ccs, int n_crystals, int *assignments) { #ifdef HAVE_HDF5 hid_t fh, fsh, msh, cdh, rdh; herr_t r; hsize_t size[2]; int i; /* Create file */ fh = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); if ( fh < 0 ) { ERROR("Couldn't create file: %s\n", filename); return; } /* Size of overall dataset */ size[0] = n_crystals; size[1] = n_crystals; fsh = H5Screate_simple(2, size, NULL); msh = H5Screate_simple(2, size, NULL); /* Create overall correlation matrix dataset */ cdh = H5Dcreate2(fh, "correlation_matrix", H5T_NATIVE_FLOAT, fsh, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if ( cdh < 0 ) { ERROR("Couldn't create dataset\n"); ERROR("Correlation matrices will not be written.\n"); H5Fclose(fh); return; } /* Create overall reindexed correlation matrix dataset */ rdh = H5Dcreate2(fh, "correlation_matrix_reindexed", H5T_NATIVE_FLOAT, fsh, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if ( rdh < 0 ) { ERROR("Couldn't create dataset\n"); ERROR("Correlation matrices will not be written.\n"); H5Fclose(fh); return; } for ( i=0; ifeatures); for ( i=0; in_crystals; i++ ) { Crystal *cr; RefList *list; UnitCell *cell; cr = image->crystals[i].cr; list = image->crystals[i].refls; cell = crystal_get_cell(cr); if ( n_crystals == max_crystals ) { struct flist **crystals_new; size_t ns; ns = (max_crystals+1024)*sizeof(struct flist *); crystals_new = realloc(crystals, ns); if ( crystals_new == NULL ) { fprintf(stderr, "Failed to allocate " "memory for crystals.\n"); return 1; } max_crystals += 1024; crystals = crystals_new; } crystals[n_crystals] = asymm_and_merge(list, s_sym, cell, rmin, rmax, amb, auto_res); if ( crystals[n_crystals] == NULL ) { ERROR("asymm_and_merge failed!\n"); return 1; } cell_free(cell); n_crystals++; reflist_free(list); } fprintf(stderr, "Loaded %i crystals from %i chunks\r", n_crystals, ++n_chunks); } while ( 1 ); fprintf(stderr, "\n"); stream_close(st); assignments = malloc(n_crystals*sizeof(int)); if ( assignments == NULL ) { ERROR("Couldn't allocate memory for assignments.\n"); return 1; } orig_assignments = malloc(n_crystals*sizeof(int)); if ( orig_assignments == NULL ) { ERROR("Couldn't allocate memory for original assignments.\n"); return 1; } rng = gsl_rng_alloc(gsl_rng_mt19937); if ( config_random ) { FILE *fh; unsigned long int seed; fh = fopen("/dev/urandom", "r"); if ( fh == NULL ) { ERROR("Failed to open /dev/urandom. Try again without" " --really-random.\n"); return 1; } if ( fread(&seed, sizeof(seed), 1, fh) == 1 ) { gsl_rng_set(rng, seed); } else { ERROR("Failed to seed RNG\n"); } fclose(fh); } if ( start_ass_fn != NULL ) { FILE *fh; int i; fh = fopen(start_ass_fn, "r"); if ( fh == NULL ) { ERROR("Failed to open '%s'\n", start_ass_fn); return 1; } for ( i=0; i 0.5); } } for ( j=0; j n_crystals) ) { ncorr = n_crystals; } ccs = calc_ccs(crystals, n_crystals, ncorr, amb, rng, &mean_nac, n_threads); if ( ccs == NULL ) { ERROR("Failed to allocate CCs\n"); return 1; } STATUS("Mean number of correlations per crystal: %.1f\n", mean_nac); for ( j=0; js); free(crystals[j]->i); free(crystals[j]->s_reidx); free(crystals[j]->i_reidx); free(crystals[j]); } free(crystals); for ( j=0; j