/* * compare_hkl.c * * Compare reflection lists * * Copyright © 2012-2014 Deutsches Elektronen-Synchrotron DESY, * a research centre of the Helmholtz Association. * * Authors: * 2010-2014 Thomas White * 2013 Lorenzo Galli * * This file is part of CrystFEL. * * CrystFEL is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * CrystFEL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with CrystFEL. If not, see . * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include "utils.h" #include "statistics.h" #include "symmetry.h" #include "reflist-utils.h" #include "cell-utils.h" enum fom { FOM_R1I, FOM_R1F, FOM_R2, FOM_RSPLIT, FOM_CC, FOM_CCSTAR, FOM_CCANO, FOM_CRDANO, FOM_RANO, FOM_RANORSPLIT }; static enum fom get_fom(const char *s) { if ( strcasecmp(s, "r1i") == 0 ) return FOM_R1I; if ( strcasecmp(s, "r1f") == 0 ) return FOM_R1F; if ( strcasecmp(s, "r2") == 0 ) return FOM_R2; if ( strcasecmp(s, "rsplit") == 0 ) return FOM_RSPLIT; if ( strcasecmp(s, "cc") == 0 ) return FOM_CC; if ( strcasecmp(s, "ccstar") == 0 ) return FOM_CCSTAR; if ( strcasecmp(s, "ccano") == 0 ) return FOM_CCANO; if ( strcasecmp(s, "crdano") == 0 ) return FOM_CRDANO; if ( strcasecmp(s, "rano") == 0 ) return FOM_RANO; if ( strcasecmp(s, "rano/rsplit") == 0 ) return FOM_RANORSPLIT; ERROR("Unknown figure of merit '%s'.\n", s); exit(1); } static void show_help(const char *s) { printf("Syntax: %s [options] \n\n", s); printf( "Compare intensity lists.\n" "\n" " -y, --symmetry= The symmetry of both the input files.\n" " -p, --pdb= PDB file to use.\n" " --fom= Calculate this figure of merit Choose from:\n" " R1I, R1F, R2, Rsplit, CC, CCstar,\n" " CCano, CRDano, Rano, Rano/Rsplit\n" " --nshells= Use resolution shells.\n" " -u Force scale factor to 1.\n" " --shell-file= Write resolution shells to .\n" "\n" "You can control which reflections are included in the calculation:\n" "\n" " --ignore-negs Ignore reflections with negative intensities.\n" " --zero-negs Set negative intensities to zero.\n" " --sigma-cutoff= Discard reflections with I/sigma(I) < n.\n" " --rmin= Low resolution cutoff (1/d in m^-1).\n" " --rmax= High resolution cutoff (1/d in m^-1).\n" " --lowres= Low resolution cutoff in (d in A).\n" " --highres= High resolution cutoff in (d in A).\n" " --intensity-shells Use shells of intensity instead of resolution.\n" "\n" " -h, --help Display this help message.\n" ); } struct fom_context { enum fom fom; int nshells; /* For R-factors */ double *num; double *den; /* For "double" R-factors */ double *num2; double *den2; /* For CCs */ double **vec1; double **vec2; int *n; int nmax; }; static struct fom_context *init_fom(enum fom fom, int nmax, int nshells) { struct fom_context *fctx; int i; fctx = malloc(sizeof(struct fom_context)); if ( fctx == NULL ) return NULL; fctx->fom = fom; fctx->nshells = nshells; switch ( fctx->fom ) { case FOM_RANORSPLIT : fctx->num2 = malloc(nshells*sizeof(double)); fctx->den2 = malloc(nshells*sizeof(double)); if ( (fctx->num2 == NULL) || (fctx->den2 == NULL) ) return NULL; for ( i=0; inum2[i] = 0.0; fctx->den2[i] = 0.0; } /* Intentional fall-through (no break) */ case FOM_R1I : case FOM_R1F : case FOM_R2 : case FOM_RSPLIT : case FOM_RANO : fctx->num = malloc(nshells*sizeof(double)); fctx->den = malloc(nshells*sizeof(double)); if ( (fctx->num == NULL) || (fctx->den == NULL) ) return NULL; for ( i=0; inum[i] = 0.0; fctx->den[i] = 0.0; } break; case FOM_CC : case FOM_CCSTAR : case FOM_CCANO : case FOM_CRDANO : fctx->vec1 = malloc(nshells*sizeof(double *)); fctx->vec2 = malloc(nshells*sizeof(double *)); if ( (fctx->vec1 == NULL) || (fctx->vec2 == NULL) ) return NULL; for ( i=0; ivec1[i] = malloc(nmax*sizeof(double)); if ( fctx->vec1[i] == NULL ) return NULL; fctx->vec2[i] = malloc(nmax*sizeof(double)); if ( fctx->vec2[i] == NULL ) return NULL; fctx->n = malloc(nshells*sizeof(int)); if ( fctx->n == NULL ) return NULL; } for ( i=0; in[i] = 0; } fctx->nmax = nmax; break; } return fctx; } static void add_to_fom(struct fom_context *fctx, double i1, double i2, double i1bij, double i2bij, int bin) { double f1, f2; double im, imbij; /* Negative intensities have already been weeded out. */ f1 = sqrt(i1); f2 = sqrt(i2); switch ( fctx->fom ) { case FOM_R1I : fctx->num[bin] += fabs(i1 - i2); fctx->den[bin] += i1; break; case FOM_R1F : fctx->num[bin] += fabs(f1 - f2); fctx->den[bin] += f1; break; case FOM_R2 : fctx->num[bin] += pow(i1 - i2, 2.0); fctx->den[bin] += pow(i1, 2.0); break; case FOM_RSPLIT : fctx->num[bin] += fabs(i1 - i2); fctx->den[bin] += i1 + i2; break; case FOM_CC : case FOM_CCSTAR : assert(fctx->n[bin] < fctx->nmax); fctx->vec1[bin][fctx->n[bin]] = i1; fctx->vec2[bin][fctx->n[bin]] = i2; fctx->n[bin]++; break; case FOM_CCANO : case FOM_CRDANO : assert(fctx->n[bin] < fctx->nmax); fctx->vec1[bin][fctx->n[bin]] = i1 - i1bij; fctx->vec2[bin][fctx->n[bin]] = i2 - i2bij; fctx->n[bin]++; break; case FOM_RANORSPLIT : fctx->num2[bin] += fabs(i1 - i2); fctx->den2[bin] += i1 + i2; /* Intentional fall-through (no break) */ case FOM_RANO : im = (i1 + i2)/2.0; imbij = (i1bij + i2bij)/2.0; fctx->num[bin] += fabs(im - imbij); fctx->den[bin] += im + imbij; break; } } static double fom_overall(struct fom_context *fctx) { double overall_num = INFINITY; double overall_den = 0.0; double overall_num2 = INFINITY; double overall_den2 = 0.0; int i; double *overall_vec1; double *overall_vec2; int overall_n; double *overall_along_diagonal; double *overall_perpend_diagonal; double variance_signal; double variance_error; double cc = INFINITY; switch ( fctx->fom ) { case FOM_R1I : case FOM_R1F : case FOM_R2 : case FOM_RSPLIT : case FOM_RANO : overall_num = 0.0; overall_den = 0.0; for ( i=0; inshells; i++ ) { overall_num += fctx->num[i]; overall_den += fctx->den[i]; } break; case FOM_RANORSPLIT : overall_num = 0.0; overall_den = 0.0; for ( i=0; inshells; i++ ) { overall_num += fctx->num[i]; overall_den += fctx->den[i]; } overall_num2 = 0.0; overall_den2 = 0.0; for ( i=0; inshells; i++ ) { overall_num2 += fctx->num2[i]; overall_den2 += fctx->den2[i]; } break; case FOM_CC : case FOM_CCSTAR : case FOM_CCANO : overall_vec1 = malloc(fctx->nmax*sizeof(double)); overall_vec2 = malloc(fctx->nmax*sizeof(double)); overall_n = 0; for ( i=0; inshells; i++ ) { int j; for ( j=0; jn[i]; j++ ) { overall_vec1[overall_n] = fctx->vec1[i][j]; overall_vec2[overall_n] = fctx->vec2[i][j]; overall_n++; } } cc = gsl_stats_correlation(overall_vec1, 1, overall_vec2, 1, overall_n); free(overall_vec1); free(overall_vec2); break; case FOM_CRDANO : overall_along_diagonal = malloc(fctx->nmax*sizeof(double)); overall_perpend_diagonal = malloc(fctx->nmax*sizeof(double)); overall_n = 0; for ( i=0; inshells; i++ ) { int j; for ( j=0; jn[i]; j++ ) { overall_along_diagonal[overall_n] = ( fctx->vec1[i][j] + fctx->vec2[i][j] ) / sqrt(2.0); overall_perpend_diagonal[overall_n] = ( fctx->vec1[i][j] - fctx->vec2[i][j] ) / sqrt(2.0); overall_n++; } } variance_signal = gsl_stats_variance_m(overall_along_diagonal, 1, overall_n, 0.0); variance_error = gsl_stats_variance_m(overall_perpend_diagonal, 1, overall_n, 0.0); cc = sqrt(variance_signal / variance_error ); free(overall_along_diagonal); free(overall_perpend_diagonal); break; } switch ( fctx->fom ) { case FOM_R1I : case FOM_R1F : return overall_num/overall_den; case FOM_R2 : return sqrt(overall_num/overall_den); case FOM_RSPLIT : return 2.0*(overall_num/overall_den) / sqrt(2.0); case FOM_CC : case FOM_CCANO : case FOM_CRDANO : return cc; case FOM_CCSTAR : return sqrt((2.0*cc)/(1.0+cc)); case FOM_RANO : return 2.0*(overall_num/overall_den); case FOM_RANORSPLIT : return (2.0*(overall_num/overall_den)) / (2.0*(overall_num2/overall_den2) / sqrt(2.0)); } ERROR("This point is never reached.\n"); abort(); } static double fom_shell(struct fom_context *fctx, int i) { double cc; int j; double variance_signal; double variance_error; double *along_diagonal; double *perpend_diagonal; switch ( fctx->fom ) { case FOM_R1I : case FOM_R1F : return fctx->num[i]/fctx->den[i]; case FOM_R2 : return sqrt(fctx->num[i]/fctx->den[i]); case FOM_RSPLIT : return 2.0*(fctx->num[i]/fctx->den[i]) / sqrt(2.0); case FOM_CC : case FOM_CCANO : return gsl_stats_correlation(fctx->vec1[i], 1, fctx->vec2[i], 1, fctx->n[i]); case FOM_CCSTAR : cc = gsl_stats_correlation(fctx->vec1[i], 1, fctx->vec2[i], 1, fctx->n[i]); return sqrt((2.0*cc)/(1.0+cc)); case FOM_RANO : return 2.0 * fctx->num[i]/fctx->den[i]; case FOM_RANORSPLIT : return (2.0*fctx->num[i]/fctx->den[i]) / (2.0*(fctx->num2[i]/fctx->den2[i]) / sqrt(2.0)); case FOM_CRDANO : along_diagonal = malloc(fctx->n[i] * sizeof(double)); perpend_diagonal = malloc(fctx->n[i] * sizeof(double)); for ( j=0; jn[i]; j++ ) { along_diagonal[j] = ( fctx->vec1[i][j] + fctx->vec2[i][j] ) / sqrt(2.0); perpend_diagonal[j] = ( fctx->vec1[i][j] - fctx->vec2[i][j] ) / sqrt(2.0); } variance_signal = gsl_stats_variance_m(along_diagonal, 1, fctx->n[i], 0.0); variance_error = gsl_stats_variance_m(perpend_diagonal, 1, fctx->n[i], 0.0); free(along_diagonal); free(perpend_diagonal); return sqrt(variance_signal / variance_error); } ERROR("This point is never reached.\n"); abort(); } struct shells { int config_intshells; int nshells; double *rmins; double *rmaxs; }; static struct shells *set_intensity_shells(double min_I, double max_I, int nshells) { struct shells *s; int i; if ( min_I >= max_I ) { ERROR("Invalid intensity range.\n"); return NULL; } /* Adjust minimum and maximum intensities to get the most densely * populated part of the reflections */ max_I = min_I + (max_I-min_I)/5000.0; s = malloc(sizeof(struct shells)); if ( s == NULL ) return NULL; s->rmins = malloc(nshells*sizeof(double)); s->rmaxs = malloc(nshells*sizeof(double)); if ( (s->rmins==NULL) || (s->rmaxs==NULL) ) { ERROR("Couldn't allocate memory for shells.\n"); free(s); return NULL; } s->config_intshells = 1; s->nshells = nshells; for ( i=0; irmins[i] = min_I + i*(max_I - min_I)/nshells;; s->rmaxs[i] = min_I + (i+1)*(max_I - min_I)/nshells;; } return s; } static struct shells *set_resolution_shells(double rmin, double rmax, int nshells) { struct shells *s; double total_vol, vol_per_shell; int i; s = malloc(sizeof(struct shells)); if ( s == NULL ) return NULL; s->rmins = malloc(nshells*sizeof(double)); s->rmaxs = malloc(nshells*sizeof(double)); if ( (s->rmins==NULL) || (s->rmaxs==NULL) ) { ERROR("Couldn't allocate memory for resolution shells.\n"); free(s); return NULL; } s->config_intshells = 0; s->nshells = nshells; total_vol = pow(rmax, 3.0) - pow(rmin, 3.0); vol_per_shell = total_vol / nshells; s->rmins[0] = rmin; for ( i=1; irmins[i-1], 3.0); r = pow(r, 1.0/3.0); /* Shells of constant volume */ s->rmaxs[i-1] = r; s->rmins[i] = r; } s->rmaxs[nshells-1] = rmax; return s; } static double shell_label(struct shells *s, int i) { if ( s->config_intshells ) { return (i+0.5) / s->nshells; } else { return s->rmins[i] + (s->rmaxs[i] - s->rmins[i])/2.0; } } static int get_bin(struct shells *s, Reflection *refl, UnitCell *cell) { if ( s->config_intshells ) { double intensity; int bin, j; intensity = get_intensity(refl); bin = -1; for ( j=0; jnshells; j++ ) { if ( (intensity>s->rmins[j]) && (intensity<=s->rmaxs[j]) ) { bin = j; break; } } return bin; } else { double d; int bin, j; signed int h, k, l; get_indices(refl, &h, &k, &l); d = 2.0 * resolution(cell, h, k, l); bin = -1; for ( j=0; jnshells; j++ ) { if ( (d>s->rmins[j]) && (d<=s->rmaxs[j]) ) { bin = j; break; } } /* Allow for slight rounding errors */ if ( (bin == -1) && (d <= s->rmins[0]) ) bin = 0; if ( (bin == -1) && (d >= s->rmaxs[s->nshells-1]) ) bin = 0; assert(bin != -1); return bin; } } static void do_fom(RefList *list1, RefList *list2, UnitCell *cell, double rmin, double rmax, enum fom fom, int config_unity, int nshells, const char *filename, int config_intshells, double min_I, double max_I, SymOpList *sym) { int *cts; int i; Reflection *refl1; RefListIterator *iter; FILE *fh; double scale; struct fom_context *fctx; struct shells *shells; const char *t1, *t2; int n_out; cts = malloc(nshells*sizeof(int)); fctx = init_fom(fom, num_reflections(list1), nshells); if ( (fctx==NULL) || (cts==NULL) ) { ERROR("Couldn't allocate memory for resolution shells.\n"); return; } for ( i=0; i 0.0 ) { double res = 2.0*resolution(cell, h, k, l); if ( res < rmin_fix ) { nres++; continue; } } if ( rmax_fix > 0.0 ) { double res = 2.0*resolution(cell, h, k, l); if ( res > rmax_fix ) { nres++; continue; } } if ( (fom == FOM_CCANO) || (fom == FOM_CRDANO) || (fom == FOM_RANO) || (fom == FOM_RANORSPLIT) ) { Reflection *refl1_bij = NULL; Reflection *refl2_bij = NULL; signed int hb, kb, lb; if ( is_centric(h, k, l, sym) ) { ncen++; continue; } if ( find_equiv_in_list(list1, -h, -k, -l, sym, &hb, &kb, &lb) ) { refl1_bij = find_refl(list1, hb, kb, lb); } if ( find_equiv_in_list(list2, -h, -k, -l, sym, &hb, &kb, &lb) ) { refl2_bij = find_refl(list2, hb, kb, lb); } if ( (refl1_bij == NULL) || (refl2_bij == NULL) ) { nbij++; continue; } } refl1_acc = add_refl(list1_acc, h, k, l); copy_data(refl1_acc, refl1); set_intensity(refl1_acc, val1); refl2_acc = add_refl(list2_acc, h, k, l); copy_data(refl2_acc, refl2); set_intensity(refl2_acc, val2); if ( val1 > max_I ) max_I = val1; if ( val1 < min_I ) min_I = val1; ncom++; } gsl_set_error_handler_off(); if ( nrej > 0 ) { STATUS("Discarded %i reflection pairs because either or both" " versions had I/sigma(I) < %f.\n", nrej, sigma_cutoff); } if ( config_ignorenegs && (nneg > 0) ) { STATUS("Discarded %i reflection pairs because either or both" " versions had negative intensities.\n", nneg); } if ( config_zeronegs && (nneg > 0) ) { STATUS("For %i reflection pairs, either or both versions had" " negative intensities which were set to zero.\n", nneg); } if ( nres > 0 ) { STATUS("%i reflection pairs rejected because either or both" " versions were outside the resolution range.\n", nres); } if ( nbij > 0 ) { STATUS("%i reflection pairs rejected because either or both" " versions did not have Bijvoet partners.\n", nres); } if ( ncen > 0 ) { STATUS("%i reflection pairs rejected because they were" " centric.\n", ncen); } STATUS("%i reflection pairs accepted.\n", ncom); resolution_limits(list1_acc, cell, &rmin, &rmax); resolution_limits(list2_acc, cell, &rmin, &rmax); STATUS("Accepted resolution range: %f to %f nm^-1" " (%.2f to %.2f Angstroms).\n", rmin/1e9, rmax/1e9, 1e10/rmin, 1e10/rmax); reflist_free(list1_raw); reflist_free(list2_raw); reflist_free(list1); reflist_free(list2); if ( rmin_fix >= 0.0 ) { rmin = rmin_fix; } if ( rmax_fix >= 0.0 ) { rmax = rmax_fix; } if ( (rmin_fix>=0.0) || (rmax_fix>=0.0) ) { STATUS("Fixed resolution range: %f to %f nm^-1" " (%.2f to %.2f Angstroms).\n", rmin/1e9, rmax/1e9, 1e10/rmin, 1e10/rmax); } do_fom(list1_acc, list2_acc, cell, rmin, rmax, fom, config_unity, nshells, shell_file, config_intshells, min_I, max_I, sym); free(shell_file); reflist_free(list1_acc); reflist_free(list2_acc); return 0; }