From 0f7396045b5242ab8271b22e97b53ccd42f60397 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Tue, 7 May 2019 17:25:50 +0200 Subject: Remove old "statistics" module with R factors etc compare_hkl does all of these, much better than these functions. --- libcrystfel/CMakeLists.txt | 2 - libcrystfel/doc/index.md | 1 - libcrystfel/src/statistics.c | 683 ------------------------------------------- libcrystfel/src/statistics.h | 73 ----- src/check_hkl.c | 1 - src/compare_hkl.c | 1 - src/process_hkl.c | 1 - 7 files changed, 762 deletions(-) delete mode 100644 libcrystfel/src/statistics.c delete mode 100644 libcrystfel/src/statistics.h diff --git a/libcrystfel/CMakeLists.txt b/libcrystfel/CMakeLists.txt index 55c2c0f5..6d0cd8dc 100644 --- a/libcrystfel/CMakeLists.txt +++ b/libcrystfel/CMakeLists.txt @@ -47,7 +47,6 @@ set(LIBCRYSTFEL_SOURCES src/hdf5-file.c src/geometry.c src/peakfinder8.c - src/statistics.c src/symmetry.c src/stream.c src/peaks.c @@ -85,7 +84,6 @@ set(LIBCRYSTFEL_HEADERS src/cell.h src/reflist-utils.h src/thread-pool.h - src/statistics.h src/utils.h src/detector.h src/geometry.h diff --git a/libcrystfel/doc/index.md b/libcrystfel/doc/index.md index d806a2e5..16868fef 100644 --- a/libcrystfel/doc/index.md +++ b/libcrystfel/doc/index.md @@ -35,7 +35,6 @@ API documentation * \ref peaks.h "Main peak search functions" * \ref peakfinder8.h "The peakfinder8 algorithm" * \ref filters.h "Image (noise) filters" -* \ref statistics.h "R-values and other statistics" * \ref symmetry.h "Point group symmetry" * Mathematical constructions: * \ref integer_matrix.h "Integer matrices" diff --git a/libcrystfel/src/statistics.c b/libcrystfel/src/statistics.c deleted file mode 100644 index ff8f233c..00000000 --- a/libcrystfel/src/statistics.c +++ /dev/null @@ -1,683 +0,0 @@ -/* - * statistics.c - * - * Structure-factor statistics - * - * Copyright © 2012 Deutsches Elektronen-Synchrotron DESY, - * a research centre of the Helmholtz Association. - * - * Authors: - * 2009-2012 Thomas White - * - * This file is part of CrystFEL. - * - * CrystFEL is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CrystFEL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CrystFEL. If not, see . - * - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include -#include -#include - -#include "statistics.h" -#include "utils.h" - -/** \file statistics.h */ - - -struct r_params { - RefList *list1; - RefList *list2; - int fom; /* Which FoM to use (see the enum just below) */ -}; - -enum { - R_1_ZERO, - R_1_IGNORE, - R_2, - R_1_I, - R_DIFF_ZERO, - R_DIFF_IGNORE, - R_DIFF_INTENSITY, -}; - - -/* Return the least squares optimal scaling factor when comparing intensities. - * list1,list2 are the two intensity lists to compare. - */ -double stat_scale_intensity(RefList *list1, RefList *list2) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - top += i1 * i2; - bot += i2 * i2; - - } - - return top/bot; -} - - -/* Return the least squares optimal scaling factor when comparing the square - * roots of the intensities (i.e. one approximation to the structure factor - * moduli). - * list1,list2 are the two intensity lists to compare (they contain intensities, - * not square rooted intensities). - */ -static double stat_scale_sqrti(RefList *list1, RefList *list2) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - if ( i1 < 0.0 ) continue; - f1 = sqrt(i1); - - if ( i2 < 0.0 ) continue; - f2 = sqrt(i2); - - top += f1 * f2; - bot += f2 * f2; - - } - - return top/bot; -} - - -static double internal_r1_ignorenegs(RefList *list1, RefList *list2, - double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - if ( i1 < 0.0 ) continue; - f1 = sqrt(i1); - - if ( i2 < 0.0 ) continue; - f2 = sqrt(i2); - f2 *= scale; - - top += fabs(f1 - f2); - bot += f1; - - } - - return top/bot; -} - - -static double internal_r1_negstozero(RefList *list1, RefList *list2, - double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - f1 = i1 > 0.0 ? sqrt(i1) : 0.0; - - f2 = i2 > 0.0 ? sqrt(i2) : 0.0; - f2 *= scale; - - top += fabs(f1 - f2); - bot += f1; - - } - - return top/bot; -} - - -static double internal_r2(RefList *list1, RefList *list2, double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - i2 *= scale; - - top += pow(i1 - i2, 2.0); - bot += pow(i1, 2.0); - - } - - return sqrt(top/bot); -} - - -static double internal_r_i(RefList *list1, RefList *list2, double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - i2 *= scale; - - top += fabs(i1-i2); - bot += fabs(i1); - - } - - return top/bot; -} - - -static double internal_rdiff_intensity(RefList *list1, RefList *list2, - double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - i2 *= scale; - - top += fabs(i1 - i2); - bot += i1 + i2; - - } - - return 2.0*top/bot; -} - - -static double internal_rdiff_negstozero(RefList *list1, RefList *list2, - double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - f1 = i1 > 0.0 ? sqrt(i1) : 0.0; - - f2 = i2 > 0.0 ? sqrt(i2) : 0.0; - f2 *= scale; - - top += fabs(f1 - f2); - bot += f1 + f2; - - } - - return 2.0*top/bot; -} - - -static double internal_rdiff_ignorenegs(RefList *list1, RefList *list2, - double scale) -{ - double top = 0.0; - double bot = 0.0; - Reflection *refl1; - RefListIterator *iter; - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - if ( i1 < 0.0 ) continue; - f1 = sqrt(i1); - - if ( i2 < 0.0 ) continue; - f2 = sqrt(i2); - f2 *= scale; - - top += fabs(f1 - f2); - bot += f1 + f2; - - } - - return 2.0*top/bot; -} - - -static double calc_r(double scale, void *params) -{ - struct r_params *rp = params; - - switch ( rp->fom ) { - - case R_1_ZERO : - return internal_r1_negstozero(rp->list1, rp->list2, scale); - - case R_1_IGNORE : - return internal_r1_ignorenegs(rp->list1, rp->list2, scale); - - case R_2 : - return internal_r2(rp->list1, rp->list2, scale); - - case R_1_I : - return internal_r_i(rp->list1, rp->list2, scale); - - case R_DIFF_ZERO : - return internal_rdiff_negstozero(rp->list1, rp->list2,scale); - - case R_DIFF_IGNORE : - return internal_rdiff_ignorenegs(rp->list1, rp->list2, scale); - - case R_DIFF_INTENSITY : - return internal_rdiff_intensity(rp->list1, rp->list2, scale); - - } - - ERROR("No such FoM!\n"); - abort(); -} - - -static double r_minimised(RefList *list1, RefList *list2, double *scalep, int fom, - int u) -{ - gsl_function F; - gsl_min_fminimizer *s; - int status; - double scale = 1.0; - struct r_params rp; - int iter = 0; - - rp.list1 = list1; - rp.list2 = list2; - rp.fom = fom; - - if ( u ) { - - scale = 1.0; - - } else { - - F.function = &calc_r; - F.params = &rp; - - s = gsl_min_fminimizer_alloc(gsl_min_fminimizer_brent); - - /* Initial guess */ - switch ( fom ) { - - case R_1_ZERO : - case R_1_IGNORE : - case R_DIFF_ZERO : - case R_DIFF_IGNORE : - scale = stat_scale_sqrti(list1, list2); - break; - - case R_2 : - case R_1_I : - case R_DIFF_INTENSITY : - scale = stat_scale_intensity(list1, list2); - break; - - } - //STATUS("Initial scale factor estimate: %5.2e\n", scale); - - /* Probably within an order of magnitude either side */ - gsl_min_fminimizer_set(s, &F, scale, scale/10.0, scale*10.0); - - do { - - double lo, up; - - /* Iterate */ - if ( gsl_min_fminimizer_iterate(s) ) { - ERROR("Failed to find scale factor.\n"); - return NAN; - } - - /* Get the current estimate */ - scale = gsl_min_fminimizer_x_minimum(s); - lo = gsl_min_fminimizer_x_lower(s); - up = gsl_min_fminimizer_x_upper(s); - - /* Check for convergence */ - status = gsl_min_test_interval(lo, up, 0.001, 0.0); - - iter++; - - } while ( status == GSL_CONTINUE ); - - if ( status != GSL_SUCCESS ) { - ERROR("Scale factor minimisation failed.\n"); - } - - gsl_min_fminimizer_free(s); - - } - - //STATUS("Final scale factor: %5.2e\n", scale); - *scalep = scale; - return calc_r(scale, &rp); -} - - -double stat_r1_ignore(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_1_IGNORE, u); -} - - -double stat_r1_zero(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_1_ZERO, u); -} - - -double stat_r2(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_2, u); -} - - -double stat_r1_i(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_1_I, u); -} - - -double stat_rdiff_zero(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_DIFF_ZERO, u); -} - - -double stat_rdiff_ignore(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_DIFF_IGNORE, u); -} - - -double stat_rdiff_intensity(RefList *list1, RefList *list2, double *scalep, int u) -{ - return r_minimised(list1, list2, scalep, R_DIFF_INTENSITY, u); -} - - -double stat_pearson_i(RefList *list1, RefList *list2) -{ - double *vec1, *vec2; - int ni = num_reflections(list1); - double val; - int nacc = 0; - Reflection *refl1; - RefListIterator *iter; - - vec1 = malloc(ni*sizeof(double)); - vec2 = malloc(ni*sizeof(double)); - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - vec1[nacc] = i1; - vec2[nacc] = i2; - nacc++; - } - - val = gsl_stats_correlation(vec1, 1, vec2, 1, nacc); - free(vec1); - free(vec2); - - return val; -} - - -double stat_pearson_f_ignore(RefList *list1, RefList *list2) -{ - double *vec1, *vec2; - int ni = num_reflections(list1); - double val; - int nacc = 0; - Reflection *refl1; - RefListIterator *iter; - - vec1 = malloc(ni*sizeof(double)); - vec2 = malloc(ni*sizeof(double)); - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - if ( i1 < 0.0 ) continue; - if ( i2 < 0.0 ) continue; - - f1 = sqrt(i1); - f2 = sqrt(i2); - - vec1[nacc] = f1; - vec2[nacc] = f2; - nacc++; - - } - - val = gsl_stats_correlation(vec1, 1, vec2, 1, nacc); - free(vec1); - free(vec2); - - return val; -} - - -double stat_pearson_f_zero(RefList *list1, RefList *list2) -{ - double *vec1, *vec2; - int ni = num_reflections(list1); - double val; - int nacc = 0; - Reflection *refl1; - RefListIterator *iter; - - vec1 = malloc(ni*sizeof(double)); - vec2 = malloc(ni*sizeof(double)); - - for ( refl1 = first_refl(list1, &iter); - refl1 != NULL; - refl1 = next_refl(refl1, iter) ) - { - double i1, i2; - double f1, f2; - signed int h, k, l; - Reflection *refl2; - - get_indices(refl1, &h, &k, &l); - refl2 = find_refl(list2, h, k, l); - if ( refl2 == NULL ) continue; /* No common reflection */ - - i1 = get_intensity(refl1); - i2 = get_intensity(refl2); - - f1 = i1 > 0.0 ? sqrt(i1) : 0.0; - f2 = i2 > 0.0 ? sqrt(i2) : 0.0; - - vec1[nacc] = f1; - vec2[nacc] = f2; - nacc++; - - } - - val = gsl_stats_correlation(vec1, 1, vec2, 1, nacc); - free(vec1); - free(vec2); - - return val; -} diff --git a/libcrystfel/src/statistics.h b/libcrystfel/src/statistics.h deleted file mode 100644 index 4e2d2aff..00000000 --- a/libcrystfel/src/statistics.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * statistics.h - * - * Structure-factor statistics - * - * Copyright © 2012 Deutsches Elektronen-Synchrotron DESY, - * a research centre of the Helmholtz Association. - * - * Authors: - * 2009-2012 Thomas White - * - * This file is part of CrystFEL. - * - * CrystFEL is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * CrystFEL is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with CrystFEL. If not, see . - * - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#ifndef STATISTICS_H -#define STATISTICS_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "reflist.h" - -/** - * \file statistics.h - * Intensity statistic calculations - */ - -extern double stat_scale_intensity(RefList *list1, RefList *list2); - -extern double stat_r1_zero(RefList *list1, RefList *list2, - double *scalep, int u); -extern double stat_r1_ignore(RefList *list1, RefList *list2, - double *scalep, int u); - -extern double stat_r2(RefList *list1, RefList *list2, double *scalep, int u); - -extern double stat_r1_i(RefList *list1, RefList *list2, double *scalep, int u); - -extern double stat_rdiff_zero(RefList *list1, RefList *list2, - double *scalep, int u); -extern double stat_rdiff_ignore(RefList *list1, RefList *list2, - double *scalep, int u); -extern double stat_rdiff_intensity(RefList *list1, RefList *list2, - double *scalep, int u); - -extern double stat_pearson_i(RefList *list1, RefList *list2); -extern double stat_pearson_f_zero(RefList *list1, RefList *list2); -extern double stat_pearson_f_ignore(RefList *list1, RefList *list2); - -#ifdef __cplusplus -} -#endif - -#endif /* STATISTICS_H */ diff --git a/src/check_hkl.c b/src/check_hkl.c index 224ccc00..ca096ae8 100644 --- a/src/check_hkl.c +++ b/src/check_hkl.c @@ -41,7 +41,6 @@ #include #include "utils.h" -#include "statistics.h" #include "symmetry.h" #include "reflist.h" #include "reflist-utils.h" diff --git a/src/compare_hkl.c b/src/compare_hkl.c index 5812cad2..479d7332 100644 --- a/src/compare_hkl.c +++ b/src/compare_hkl.c @@ -44,7 +44,6 @@ #include #include "utils.h" -#include "statistics.h" #include "symmetry.h" #include "reflist-utils.h" #include "cell-utils.h" diff --git a/src/process_hkl.c b/src/process_hkl.c index ab5b9af1..393dc3e8 100644 --- a/src/process_hkl.c +++ b/src/process_hkl.c @@ -44,7 +44,6 @@ #include #include "utils.h" -#include "statistics.h" #include "reflist-utils.h" #include "symmetry.h" #include "stream.h" -- cgit v1.2.3