/* * process_hkl.c * * Assemble and process FEL Bragg intensities * * (c) 2006-2010 Thomas White * * Part of CrystFEL - crystallography with a FEL * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include "utils.h" #include "statistics.h" #include "sfac.h" #include "reflections.h" #include "symmetry.h" #include "stream.h" #include "beam-parameters.h" /* Number of divisions for intensity histograms */ #define NBINS (50) static void show_help(const char *s) { printf("Syntax: %s [options]\n\n", s); printf( "Assemble and process FEL Bragg intensities.\n" "\n" " -h, --help Display this help message.\n" " -i, --input= Specify input filename (\"-\" for stdin).\n" " -o, --output= Specify output filename for merged intensities\n" " (don't specify for no output).\n" " -p, --pdb= PDB file to use (default: molecule.pdb).\n" " -b, --beam= Get beam parameters from file (used for sigmas).\n" "\n" " --max-only Take the integrated intensity to be equal to the\n" " maximum intensity measured for that reflection.\n" " The default is to use the mean value from all\n" " measurements.\n" " --sum Sum (rather than average) the intensities for the\n" " final output list. This is useful for comparing\n" " results to radially summed powder patterns, but\n" " will break R-factor analysis.\n" " --start-after= Skip n patterns at the start of the stream.\n" " --stop-after= Stop after processing n patterns. Zero means\n" " keep going until the end of the input, and is\n" " the default.\n" " -g, --histogram= Calculate the histogram of measurements for this\n" " reflection.\n" " --rmerge Calculate and report Rmerge and Rpim\n" "\n" " --scale Scale each pattern for best fit with the current\n" " model.\n" " -y, --symmetry= Merge according to point group .\n" " -a, --input-symmetry= Specify the apparent (input) symmetry.\n" " --reference= Compare against intensities from when\n" " scaling or resolving ambiguities.\n" " The symmetry of the reference list must be the\n" " same as that given with '-y'.\n" " --outstream= Write an annotated version of the input stream\n" " to .\n" ); } static void plot_histogram(double *vals, int n) { int i; double max = -INFINITY; double min = +INFINITY; double step; int histo[NBINS]; FILE *fh; fh = fopen("histogram.dat", "w"); if ( fh == NULL ) { ERROR("Couldn't open 'histogram.dat'\n"); return; } for ( i=0; i max ) max = vals[i]; if ( vals[i] < min ) min = vals[i]; } STATUS("%f %f\n", min, max); min--; max++; for ( i=0; iop; for ( j=0; jh, r->k, r->l, &h, &k, &l, holo, op); get_asymm(h, k, l, &h, &k, &l, mero); set_intensity(trial_ints, h, k, l, lookup_intensity(patt, r->h, r->k, r->l)); set_count(trial_counts, h, k, l, 1); } intersection = intersection_items(observed, items); fom = stat_pearson_i(trial_ints, model, intersection); delete_items(intersection); free(trial_ints); free(trial_counts); //printf(" %f", fom); if ( fom > best_fom ) { best_fom = fom; best_op = op; } } //printf("\n"); return best_op; } static void merge_pattern(double *model, ReflItemList *observed, const double *new, ReflItemList *items, unsigned int *model_counts, int mo, ReflItemList *twins, const char *holo, const char *mero, ReflItemList *reference, const double *reference_i, double *hist_vals, signed int hist_h, signed int hist_k, signed int hist_l, int *hist_n, double *devs, double *means, FILE *outfh) { int i; int twin; ReflItemList *sym_items = new_items(); if ( twins != NULL ) { if ( reference != NULL ) { twin = resolve_twin(reference_i, reference, new, items, twins, holo, mero); } else { twin = resolve_twin(model, observed, new, items, twins, holo, mero); } } else { twin = 0; } if ( outfh != NULL ) { fprintf(outfh, "twin=%i\n", twin); } for ( i=0; ih; ks = item->k; ls = item->l; /* Transform into correct side of the twin law. * "twin" is always zero if no de-twinning is performed. */ get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin); /* Put into the asymmetric unit for the target group */ get_asymm(h, k, l, &h, &k, &l, mero); /* Read the intensity from the original location * (i.e. before screwing around with symmetry) */ intensity = lookup_intensity(new, hs, ks, ls); /* User asked for max only? */ if ( !mo ) { integrate_intensity(model, h, k, l, intensity); } else { if ( intensity > lookup_intensity(model, h, k, l) ) { set_intensity(model, h, k, l, intensity); } } if ( devs != NULL ) { double m; m = lookup_intensity(means, h, k, l); integrate_intensity(devs, h, k, l, pow(intensity-m, 2.0)); } if ( !find_item(sym_items, h, k, l) ) { add_item(sym_items, h, k, l); } /* Increase count */ integrate_count(model_counts, h, k, l, 1); if ( hist_vals != NULL ) { int p = *hist_n; if ( (h==hist_h) && (k==hist_k) && (l==hist_l) ) { hist_vals[p] = intensity; *hist_n = p+1; } } } /* Dump the reflections in this pattern into the overall list */ union_items(observed, sym_items); delete_items(sym_items); } enum { SCALE_NONE, SCALE_CONSTINT, SCALE_INTPERBRAGG, SCALE_TWOPASS, }; static void scale_intensities(const double *model, ReflItemList *model_items, double *new_pattern, ReflItemList *new_items, double f0, int f0_valid, const char *sym) { double s; double top = 0.0; double bot = 0.0; unsigned int i; const int scaling = SCALE_INTPERBRAGG; for ( i=0; ih, it->k, it->l, sym, &hu, &ku, &lu); i1 = lookup_intensity(model, hu, ku, lu); i2 = lookup_intensity(new_pattern, it->h, it->k, it->l); /* Calculate LSQ estimate of scaling factor */ top += i1 * i2; bot += i2 * i2; break; case SCALE_CONSTINT : /* Sum up the intensity in the pattern */ i2 = lookup_intensity(new_pattern, it->h, it->k, it->l); top += i2; break; case SCALE_INTPERBRAGG : /* Sum up the intensity in the pattern */ i2 = lookup_intensity(new_pattern, it->h, it->k, it->l); top += i2; bot += 1.0; break; } } switch ( scaling ) { case SCALE_TWOPASS : s = top / bot; break; case SCALE_CONSTINT : s = 1000.0 / top; break; case SCALE_INTPERBRAGG : s = 1000.0 / (top/bot); break; } //if ( f0_valid ) printf("%f %f\n", s, f0); /* Multiply the new pattern up by "s" */ for ( i=0; i0) ) { /* Assume a default I0 if we don't have one by now */ if ( config_scale && !f0_valid ) { n_nof0++; f0 = 1.0; } /* Scale if requested */ if ( config_scale ) { if ( reference == NULL ) { scale_intensities(model, observed, new_pattern, items, f0, f0_valid, mero); } else { scale_intensities(reference_i, reference, new_pattern, items, f0, f0_valid, mero); } } /* Start of second or later pattern */ merge_pattern(model, observed, new_pattern, items, counts, config_maxonly, twins, holo, mero, reference, reference_i, hist_vals, hist_h, hist_k, hist_l, hist_i, devs, means, outfh); n_patterns++; if ( n_patterns == config_stopafter ) { progress_bar(n_total_patterns, n_total_patterns, "Merging"); break; } progress_bar(n_patterns, n_total_patterns, "Merging"); /* Reset for the next pattern */ clear_items(items); f0_valid = 0; } if ( outfh != NULL ) { fprintf(outfh, "%s", line); } if ( strncmp(line, "f0 = ", 5) == 0 ) { r = sscanf(line, "f0 = %f", &f0); if ( r != 1 ) { f0 = 1.0; f0_valid = 0; continue; } f0_valid = 1; } r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity); if ( r != 4 ) continue; /* Not interested in the central beam */ if ( (h==0) && (k==0) && (l==0) ) continue; /* The same raw indices (before mapping into the asymmetric * unit should not turn up twice in one pattern. */ if ( find_item(items, h, k, l) != 0 ) { ERROR("More than one measurement for %i %i %i in" " pattern number %i\n", h, k, l, n_patterns); } set_intensity(new_pattern, h, k, l, intensity); /* NB: This list contains raw indices, before working out * where they belong in the asymmetric unit. */ add_item(items, h, k, l); } while ( rval != NULL ); delete_items(items); free(new_pattern); /* Calculate mean intensity if necessary */ if ( !config_sum && !config_maxonly ) { for ( i=0; i 0 ) { model[i] /= (double)counts[i]; } } } *pmodel = model; *pcounts = counts; *pobserved = observed; STATUS("%i patterns had no f0 valid value.\n", n_nof0); } int main(int argc, char *argv[]) { int c; char *filename = NULL; char *output = NULL; FILE *fh; double *model; unsigned int *counts; UnitCell *cell = NULL; int config_maxonly = 0; int config_startafter = 0; int config_stopafter = 0; int config_sum = 0; int config_scale = 0; int config_rmerge = 0; unsigned int n_total_patterns; char *sym = NULL; char *in_sym = NULL; char *pdb = NULL; ReflItemList *twins; ReflItemList *observed; int i; char *histo = NULL; signed int hist_h, hist_k, hist_l; double *hist_vals = NULL; int hist_i; char *outstream = NULL; char *reference = NULL; ReflItemList *reference_items; double *reference_i; FILE *outfh = NULL; struct beam_params *beam = NULL; int space_for_hist = 0; double *devs; double *esds; /* Long options */ const struct option longopts[] = { {"help", 0, NULL, 'h'}, {"input", 1, NULL, 'i'}, {"output", 1, NULL, 'o'}, {"max-only", 0, &config_maxonly, 1}, {"output-every", 1, NULL, 'e'}, {"stop-after", 1, NULL, 's'}, {"start-after", 1, NULL, 'f'}, {"sum", 0, &config_sum, 1}, {"scale", 0, &config_scale, 1}, {"symmetry", 1, NULL, 'y'}, {"input-symmetry", 1, NULL, 'a'}, {"pdb", 1, NULL, 'p'}, {"histogram", 1, NULL, 'g'}, {"rmerge", 0, &config_rmerge, 1}, {"outstream", 1, NULL, 2}, {"reference", 1, NULL, 'r'}, {"beam", 1, NULL, 'b'}, {0, 0, NULL, 0} }; /* Short options */ while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:g:f:a:r:b:", longopts, NULL)) != -1) { switch (c) { case 'h' : show_help(argv[0]); return 0; case 'i' : filename = strdup(optarg); break; case 'o' : output = strdup(optarg); break; case 's' : config_stopafter = atoi(optarg); break; case 'f' : config_startafter = atoi(optarg); break; case 'p' : pdb = strdup(optarg); break; case 'y' : sym = strdup(optarg); break; case 'g' : histo = strdup(optarg); break; case 'r' : reference = strdup(optarg); break; case 2 : outstream = strdup(optarg); break; case 'a' : in_sym = strdup(optarg); break; case 'b' : beam = get_beam_parameters(optarg); if ( beam == NULL ) { ERROR("Failed to load beam parameters" " from '%s'\n", optarg); return 1; } break; case 0 : break; default : return 1; } } if ( config_sum && config_rmerge ) { ERROR("Options --sum and --rmerge do not make sense" " together.\n"); return 1; } if ( filename == NULL ) { ERROR("Please specify filename using the -i option\n"); return 1; } if ( pdb == NULL ) { pdb = strdup("molecule.pdb"); } cell = load_cell_from_pdb(pdb); free(pdb); if ( sym == NULL ) sym = strdup("1"); /* Show useful symmetry information */ if ( (sym != NULL) && (in_sym != NULL) ) { int np = num_general_equivs(in_sym) / num_general_equivs(sym); if ( np > 1 ) { STATUS("Resolving point group %s into %s " "(%i possibilities)\n", in_sym, sym, np); /* Get the list of twin/Bijvoet possibilities */ twins = get_twin_possibilities(in_sym, sym); STATUS("Twin operation indices from %s are:", in_sym); for ( i=0; iop); } STATUS("\n"); } else { STATUS("No resolution required to get from %s to %s\n", in_sym, sym); twins = NULL; } } else { STATUS("No twin resolution requested (use -a otherwise).\n"); twins = NULL; in_sym = strdup(sym); } /* Open the data stream */ if ( strcmp(filename, "-") == 0 ) { fh = stdin; } else { fh = fopen(filename, "r"); } free(filename); if ( fh == NULL ) { ERROR("Failed to open input file\n"); return 1; } /* Read the reference reflections */ if ( reference != NULL ) { reference_i = new_list_intensity(); reference_items = read_reflections(reference, reference_i, NULL, NULL, NULL); if ( reference_items == NULL ) { ERROR("Couldn't read '%s'\n", reference); return 1; } } else { reference_items = NULL; reference_i = NULL; } if ( outstream != NULL ) { outfh = fopen(outstream, "w"); if ( outfh == NULL ) { ERROR("Couldn't open '%s'\n", outstream); return 1; } } /* Count the number of patterns in the file */ n_total_patterns = count_patterns(fh); STATUS("There are %i patterns to process\n", n_total_patterns); rewind(fh); if ( histo != NULL ) { int r; r = sscanf(histo, "%i,%i,%i", &hist_h, &hist_k, &hist_l); if ( r != 3 ) { ERROR("Invalid indices for '--histogram'\n"); return 1; } space_for_hist = n_total_patterns * num_general_equivs(sym); hist_vals = malloc(space_for_hist * sizeof(double)); free(histo); STATUS("Histogramming %i %i %i -> ", hist_h, hist_k, hist_l); /* Put into the asymmetric cell for the target group */ get_asymm(hist_h, hist_k, hist_l, &hist_h, &hist_k, &hist_l, sym); STATUS("%i %i %i\n", hist_h, hist_k, hist_l); } hist_i = 0; merge_all(fh, &model, &observed, &counts, config_maxonly, config_scale, config_sum, config_startafter, config_stopafter, twins, in_sym, sym, n_total_patterns, reference_items, reference_i, hist_vals, hist_h, hist_k, hist_l, &hist_i, NULL, NULL, outfh); rewind(fh); if ( space_for_hist && (hist_i >= space_for_hist) ) { ERROR("Histogram array was too small!\n"); } if ( hist_vals != NULL ) { STATUS("%i %i %i was seen %i times.\n", hist_h, hist_k, hist_l, hist_i); plot_histogram(hist_vals, hist_i); } STATUS("Extra pass to calculate ESDs...\n"); devs = new_list_intensity(); esds = new_list_intensity(); rewind(fh); merge_all(fh, &model, &observed, &counts, config_maxonly, config_scale, 0, config_startafter, config_stopafter, twins, in_sym, sym, n_total_patterns, reference_items, reference_i, NULL, 0, 0, 0, NULL, devs, model, NULL); for ( i=0; ih; k = it->k, l = it->l; dev = lookup_intensity(devs, h, k, l); count = lookup_count(counts, h, k, l); if ( count < 2 ) { /* If we have only one measurement, the error is 100% */ esd = lookup_intensity(model, h, k, l); } else { esd = sqrt(dev) / (double)count; } set_intensity(esds, h, k, l, esd); } if ( output != NULL ) { double adu_per_photon; if ( beam == NULL ) { adu_per_photon = 167.0; STATUS("No beam parameters file provided (use -b), " "so I'm assuming 167.0 ADU per photon.\n"); } else { adu_per_photon = beam->adu_per_photon; } write_reflections(output, observed, model, esds, NULL, counts, cell); } fclose(fh); free(sym); free(model); free(counts); free(output); if ( cell != NULL ) cell_free(cell); return 0; }