/* * compare_hkl.c * * Compare reflection lists * * Copyright © 2012-2021 Deutsches Elektronen-Synchrotron DESY, * a research centre of the Helmholtz Association. * * Authors: * 2010-2021 Thomas White * 2013 Lorenzo Galli * * This file is part of CrystFEL. * * CrystFEL is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * CrystFEL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with CrystFEL. If not, see . * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "version.h" static void show_help(const char *s) { printf("Syntax: %s [options] \n\n", s); printf( "Compare intensity lists.\n" "\n" " -y, --symmetry= The symmetry of both the input files.\n" " -p, --pdb= Unit cell file to use.\n" " --fom= Calculate this figure of merit Choose from:\n" " R1I, R1F, R2, Rsplit, CC, CCstar,\n" " CCano, CRDano, Rano, Rano/Rsplit, d1sig,\n" " d2sig\n" " --nshells= Use resolution shells.\n" " -u Force scale factor to 1.\n" " --shell-file= Write resolution shells to .\n" "\n" "You can control which reflections are included in the calculation:\n" "\n" " --ignore-negs Ignore reflections with negative intensities.\n" " --zero-negs Set negative intensities to zero.\n" " --sigma-cutoff= Discard reflections with I/sigma(I) < n.\n" " --rmin= Low resolution cutoff (1/d in m^-1).\n" " --rmax= High resolution cutoff (1/d in m^-1).\n" " --lowres= Low resolution cutoff in (d in A).\n" " --highres= High resolution cutoff in (d in A).\n" "\n" " -h, --help Display this help message.\n" " --version Print CrystFEL version number and exit.\n" ); } static enum fom_type fom_type_from_string(const char *s) { if ( strcasecmp(s, "r1i") == 0 ) return FOM_R1I; if ( strcasecmp(s, "r1f") == 0 ) return FOM_R1F; if ( strcasecmp(s, "r2") == 0 ) return FOM_R2; if ( strcasecmp(s, "rsplit") == 0 ) return FOM_RSPLIT; if ( strcasecmp(s, "cc") == 0 ) return FOM_CC; if ( strcasecmp(s, "cc1/2") == 0 ) return FOM_CC; if ( strcasecmp(s, "cchalf") == 0 ) return FOM_CC; if ( strcasecmp(s, "ccstar") == 0 ) return FOM_CCSTAR; if ( strcasecmp(s, "cc*") == 0 ) return FOM_CCSTAR; if ( strcasecmp(s, "ccano") == 0 ) return FOM_CCANO; if ( strcasecmp(s, "crdano") == 0 ) return FOM_CRDANO; if ( strcasecmp(s, "rano") == 0 ) return FOM_RANO; if ( strcasecmp(s, "rano/rsplit") == 0 ) return FOM_RANORSPLIT; if ( strcasecmp(s, "d1sig") == 0 ) return FOM_D1SIG; if ( strcasecmp(s, "d2sig") == 0 ) return FOM_D2SIG; ERROR("Unknown figure of merit '%s'.\n", s); exit(1); } static void do_fom(RefList *list1, RefList *list2, UnitCell *cell, double rmin, double rmax, enum fom_type fom, int config_unity, int nshells, const char *filename, SymOpList *sym) { struct fom_shells *shells; struct fom_context *fctx; FILE *fh; int i; const char *t1, *t2; /* Calculate the bins */ shells = fom_make_resolution_shells(rmin, rmax, nshells); if ( shells == NULL ) { ERROR("Failed to set up shells.\n"); return; } fctx = fom_calculate(list1, list2, cell, shells, fom, config_unity, sym); switch ( fom ) { case FOM_R1I : STATUS("Overall R1(I) = %.2f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_R1F : STATUS("Overall R1(F) = %.2f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_R2 : STATUS("Overall R(2) = %.2f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_RSPLIT : STATUS("Overall Rsplit = %.2f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_CC : STATUS("Overall CC = %.7f\n", fom_overall_value(fctx)); break; case FOM_CCSTAR : STATUS("Overall CC* = %.7f\n", fom_overall_value(fctx)); break; case FOM_CCANO : STATUS("Overall CCano = %.7f\n", fom_overall_value(fctx)); break; case FOM_CRDANO : STATUS("Overall CRDano = %.7f\n", fom_overall_value(fctx)); break; case FOM_RANO : STATUS("Overall Rano = %.2f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_RANORSPLIT : STATUS("Overall Rano/Rsplit = %.7f\n", fom_overall_value(fctx)); break; case FOM_D1SIG : STATUS("Fraction of differences less than 1 sigma = %.7f %%\n", 100.0*fom_overall_value(fctx)); break; case FOM_D2SIG : STATUS("Fraction of differences less than 2 sigma = %.7f %%\n", 100.0*fom_overall_value(fctx)); break; default : ERROR("Unhandled figure of merit type\n"); break; } fh = fopen(filename, "w"); if ( fh == NULL ) { ERROR("Couldn't open '%s'\n", filename); return; } t1 = " 1/d centre"; t2 = " d / A Min 1/nm Max 1/nm"; switch ( fom ) { case FOM_R1I : fprintf(fh, "%s R1(I)/%% nref%s\n", t1, t2); break; case FOM_R1F : fprintf(fh, "%s R1(F)/%% nref%s\n", t1, t2); break; case FOM_R2 : fprintf(fh, "%s R2/%% nref%s\n", t1, t2); break; case FOM_RSPLIT : fprintf(fh, "%s Rsplit/%% nref%s\n", t1, t2); break; case FOM_CC : fprintf(fh, "%s CC nref%s\n", t1, t2); break; case FOM_CCSTAR : fprintf(fh, "%s CC* nref%s\n", t1, t2); break; case FOM_CCANO : fprintf(fh, "%s CCano nref%s\n", t1, t2); break; case FOM_CRDANO : fprintf(fh, "%s CRDano nref%s\n", t1, t2); break; case FOM_RANO : fprintf(fh, "%s Rano/%% nref%s\n", t1, t2); break; case FOM_RANORSPLIT : fprintf(fh, "%s Rano/Rsplit nref%s\n", t1, t2); break; case FOM_D1SIG : fprintf(fh, "%s D<1sigma/%% nref%s\n", t1, t2); break; case FOM_D2SIG : fprintf(fh, "%s D<2sigma/%% nref%s\n", t1, t2); break; default : break; } for ( i=0; irmins[i]*1.0e-9, shells->rmaxs[i]*1.0e-9); break; case FOM_CC : case FOM_CCSTAR : case FOM_CCANO : case FOM_CRDANO : fprintf(fh, "%10.3f %10.7f %10i %10.2f " "%10.3f %10.3f\n", cen*1.0e-9, r, fom_shell_num_reflections(fctx, i), (1.0/cen)*1e10, shells->rmins[i]*1.0e-9, shells->rmaxs[i]*1.0e-9); break; case FOM_RANORSPLIT : fprintf(fh, "%10.3f %10.7f %10i %10.2f " "%10.3f %10.3f\n", cen*1.0e-9, r, fom_shell_num_reflections(fctx, i), (1.0/cen)*1e10, shells->rmins[i]*1.0e-9, shells->rmaxs[i]*1.0e-9); break; case FOM_D1SIG : case FOM_D2SIG : fprintf(fh, "%10.3f %10.2f %10i %10.2f " "%10.3f %10.3f\n", cen*1.0e-9, r*100.0, fom_shell_num_reflections(fctx, i), (1.0/cen)*1e10, shells->rmins[i]*1.0e-9, shells->rmaxs[i]*1.0e-9); break; default : break; } } fclose(fh); } static void check_highres() { static int have = 0; if ( have ) { ERROR("You cannot use --rmax and --highres at the same time.\n"); exit(1); } have = 1; } static void check_lowres() { static int have = 0; if ( have ) { ERROR("You cannot use --rmin and --lowres at the same time.\n"); exit(1); } have = 1; } int main(int argc, char *argv[]) { int c; UnitCell *cell; char *afile = NULL; char *bfile = NULL; char *sym_str = NULL; char *sym_str_fromfile = NULL; char *sym_str_fromfile1 = NULL; char *sym_str_fromfile2 = NULL; SymOpList *sym; RefList *list1_acc; RefList *list2_acc; RefList *list1; RefList *list2; RefList *list1_raw; RefList *list2_raw; enum fom_type fom = FOM_R1I; char *cellfile = NULL; float rmin_fix = -1.0; float rmax_fix = -1.0; double rmin, rmax; float sigma_cutoff = -INFINITY; int config_ignorenegs = 0; int config_zeronegs = 0; int config_unity = 0; int nshells = 10; char *shell_file = NULL; float highres, lowres; int mul_cutoff = 0; int anom; struct fom_rejections rej; /* Long options */ const struct option longopts[] = { {"help", 0, NULL, 'h'}, {"version", 0, NULL, 10 }, {"symmetry", 1, NULL, 'y'}, {"pdb", 1, NULL, 'p'}, {"rmin", 1, NULL, 2}, {"rmax", 1, NULL, 3}, {"fom", 1, NULL, 4}, {"sigma-cutoff", 1, NULL, 5}, {"nshells", 1, NULL, 6}, {"shell-file", 1, NULL, 7}, {"highres", 1, NULL, 8}, {"lowres", 1, NULL, 9}, {"min-measurements", 1, NULL, 11}, {"ignore-negs", 0, &config_ignorenegs, 1}, {"zero-negs", 0, &config_zeronegs, 1}, {0, 0, NULL, 0} }; /* Short options */ while ((c = getopt_long(argc, argv, "hy:p:u", longopts, NULL)) != -1) { switch (c) { case 'h' : show_help(argv[0]); return 0; case 10 : printf("CrystFEL: %s\n", crystfel_version_string()); printf("%s\n", crystfel_licence_string()); return 0; case 'y' : sym_str = strdup(optarg); break; case 'p' : cellfile = strdup(optarg); break; case 'u' : config_unity = 1; break; case 0 : break; case 2 : check_lowres(); if ( sscanf(optarg, "%e", &rmin_fix) != 1 ) { ERROR("Invalid value for --rmin\n"); return 1; } break; case 3 : check_highres(); if ( sscanf(optarg, "%e", &rmax_fix) != 1 ) { ERROR("Invalid value for --rmax\n"); return 1; } break; case 4 : fom = fom_type_from_string(optarg); break; case 5 : if ( sscanf(optarg, "%f", &sigma_cutoff) != 1 ) { ERROR("Invalid value for --sigma-cutoff\n"); return 1; } STATUS("WARNING: You are using --sigma-cutoff. " "Be aware that the figures of merit will not " "reflect the entire data set!\n"); break; case 6 : if ( sscanf(optarg, "%i", &nshells) != 1 ) { ERROR("Invalid value for --nshells\n"); return 1; } break; case 7 : shell_file = strdup(optarg); break; case 8 : check_highres(); if ( sscanf(optarg, "%e", &highres) != 1 ) { ERROR("Invalid value for --highres\n"); return 1; } rmax_fix = 1.0 / (highres/1e10); break; case 9 : check_lowres(); if ( sscanf(optarg, "%e", &lowres) != 1 ) { ERROR("Invalid value for --lowres\n"); return 1; } rmin_fix = 1.0 / (lowres/1e10); break; case 11 : if ( sscanf(optarg, "%i", &mul_cutoff) != 1 ) { ERROR("Invalid value for --min-measurements\n"); return 1; } break; case '?' : break; default : ERROR("Unhandled option '%c'\n", c); break; } } if ( argc != (optind+2) ) { ERROR("Please provide exactly two HKL files to compare.\n"); return 1; } if ( !config_ignorenegs && !config_zeronegs ) { switch ( fom ) { case FOM_R1F : ERROR("Your chosen figure of merit involves converting" " intensities to structure factors, but you have" " not specified how to handle negative" " intensities.\n"); ERROR("Please try again with --ignore-negs or" " --zero-negs.\n"); return 1; case FOM_R2 : case FOM_R1I : case FOM_RSPLIT : case FOM_CC : case FOM_CCSTAR : case FOM_CCANO : case FOM_CRDANO : case FOM_RANO : case FOM_RANORSPLIT : case FOM_D1SIG : case FOM_D2SIG : break; default : ERROR("Unhandled figure of merit!\n"); return 1; } } if ( (fom != FOM_R1F) && (config_ignorenegs || config_zeronegs) ) { ERROR("WARNING: You are using --zero-negs or --ignore-negs " "even though your chosen figure of merit does not " "require it.\n"); ERROR("The figures of merit will not reflect the entire data " "set!\n"); } afile = strdup(argv[optind++]); bfile = strdup(argv[optind]); if ( shell_file == NULL ) shell_file = strdup("shells.dat"); cell = load_cell_from_file(cellfile); if ( cellfile == NULL ) { ERROR("You must provide a unit cell.\n"); exit(1); } if ( cell == NULL ) { ERROR("Failed to load cell.\n"); return 1; } free(cellfile); list1_raw = read_reflections_2(afile, &sym_str_fromfile1); if ( list1_raw == NULL ) { ERROR("Couldn't read file '%s'\n", afile); return 1; } list2_raw = read_reflections_2(bfile, &sym_str_fromfile2); if ( list2_raw == NULL ) { ERROR("Couldn't read file '%s'\n", bfile); return 1; } if ( (sym_str_fromfile1 != NULL) && (sym_str_fromfile2 != NULL) ) { if ( strcmp(sym_str_fromfile1, sym_str_fromfile2) != 0 ) { ERROR("The symmetries of the two list do not match:\n"); ERROR(" %s: %s\n", afile, sym_str_fromfile1); ERROR(" %s: %s\n", bfile, sym_str_fromfile2); return 1; } sym_str_fromfile = sym_str_fromfile1; free(sym_str_fromfile2); } if ( sym_str == NULL ) { if ( sym_str_fromfile != NULL ) { STATUS("Using symmetry from reflection files: %s\n", sym_str_fromfile); sym_str = sym_str_fromfile; } else { sym_str = strdup("1"); } } sym = get_pointgroup(sym_str); free(sym_str); if ( is_centrosymmetric(sym) ) { switch ( fom ) { case FOM_R1F : case FOM_R2 : case FOM_R1I : case FOM_RSPLIT : case FOM_CC : case FOM_CCSTAR : case FOM_D1SIG : case FOM_D2SIG : break; case FOM_CCANO : case FOM_CRDANO : case FOM_RANO : case FOM_RANORSPLIT : ERROR("You are trying to measure an anomalous signal in" " a centrosymmetric point group.\n"); ERROR("This is a silly thing to do, and I'm refusing to" " help you do it.\n"); ERROR("Please review your earlier processing steps and" " try again using a non-centrosymmetric point" " group for '-y'.\n"); return 1; default : ERROR("Unhandled figure of merit type!\n"); return 1; } } /* Check that the intensities have the correct symmetry */ if ( check_list_symmetry(list1_raw, sym) ) { ERROR("The first input reflection list does not appear to" " have symmetry %s\n", symmetry_name(sym)); if ( cell_get_lattice_type(cell) == L_MONOCLINIC ) { ERROR("You may need to specify the unique axis in your " "point group. The default is unique axis c.\n"); ERROR("See 'man crystfel' for more details.\n"); } return 1; } if ( check_list_symmetry(list2_raw, sym) ) { ERROR("The second input reflection list does not appear to" " have symmetry %s\n", symmetry_name(sym)); if ( cell_get_lattice_type(cell) == L_MONOCLINIC ) { ERROR("You may need to specify the unique axis in your " "point group. The default is unique axis c.\n"); ERROR("See 'man crystfel' for more details.\n"); } return 1; } resolution_limits(list1_raw, cell, &rmin, &rmax); STATUS("%s: %i reflections, resolution range %.2f to %.2f Angstroms" " (%.5f to %.5f nm^-1).\n", afile, num_reflections(list1_raw), 1e10/rmin, 1e10/rmax, rmin/1e9, rmax/1e9); resolution_limits(list2_raw, cell, &rmin, &rmax); STATUS("%s: %i reflections, resolution range %.2f to %.2f Angstroms" " (%.5f to %.5f nm^-1).\n", bfile, num_reflections(list2_raw), 1e10/rmin, 1e10/rmax, rmin/1e9, rmax/1e9); list1 = asymmetric_indices(list1_raw, sym); list2 = asymmetric_indices(list2_raw, sym); reflist_free(list1_raw); reflist_free(list2_raw); anom = ( (fom == FOM_CCANO) || (fom == FOM_CRDANO) || (fom == FOM_RANO) || (fom == FOM_RANORSPLIT) ); rej = fom_select_reflection_pairs(list1, list2, &list1_acc, &list2_acc, cell, sym, anom, rmin_fix, rmax_fix, sigma_cutoff, config_ignorenegs, config_zeronegs, mul_cutoff); reflist_free(list1); reflist_free(list2); gsl_set_error_handler_off(); if ( rej.low_snr > 0 ) { STATUS("Discarded %i reflection pairs because either or both" " versions had I/sigma(I) < %f.\n", rej.low_snr, sigma_cutoff); } if ( rej.negative_deleted > 0 ) { STATUS("Discarded %i reflection pairs because either or both" " versions had negative intensities.\n", rej.negative_deleted); } if ( rej.negative_zeroed > 0 ) { STATUS("For %i reflection pairs, either or both versions had" " negative intensities which were set to zero.\n", rej.negative_zeroed); } if ( rej.few_measurements > 0 ) { STATUS("%i reflection pairs rejected because either or both" " versions had too few measurements.\n", rej.few_measurements); } if ( rej.outside_resolution_range > 0 ) { STATUS("%i reflection pairs rejected because either or both" " versions were outside the resolution range.\n", rej.outside_resolution_range); } if ( rej.no_bijvoet > 0 ) { STATUS("%i reflection pairs rejected because either or both" " versions did not have Bijvoet partners.\n", rej.no_bijvoet); } if ( rej.centric > 0 ) { STATUS("%i reflection pairs rejected because they were" " centric.\n", rej.centric); } STATUS("%i reflection pairs accepted.\n", rej.common); resolution_limits(list1_acc, cell, &rmin, &rmax); resolution_limits(list2_acc, cell, &rmin, &rmax); STATUS("Accepted resolution range: %f to %f nm^-1" " (%.2f to %.2f Angstroms).\n", rmin/1e9, rmax/1e9, 1e10/rmin, 1e10/rmax); if ( rmin_fix >= 0.0 ) { rmin = rmin_fix; } if ( rmax_fix >= 0.0 ) { rmax = rmax_fix; } if ( (rmin_fix>=0.0) || (rmax_fix>=0.0) ) { STATUS("Fixed resolution range: %f to %f nm^-1" " (%.2f to %.2f Angstroms).\n", rmin/1e9, rmax/1e9, 1e10/rmin, 1e10/rmax); } do_fom(list1_acc, list2_acc, cell, rmin, rmax, fom, config_unity, nshells, shell_file, sym); free(shell_file); reflist_free(list1_acc); reflist_free(list2_acc); return 0; }