Use new Kabsch algorithm for faster scaling

author: Thomas White <taw@physics.org> 2011-07-11 16:55:35 +0200
committer: Thomas White <taw@physics.org> 2012-02-22 15:27:32 +0100
commit: 623939fb6bd9475935546b9539f404eaaff41a3f (patch)
tree: 8d8e77b07903bbb36df60646b4127dec18397d3f
parent: 1bd043203f25a26d5d806ed847f87a4fd7df5f22 (diff)
5 files changed, 183 insertions, 381 deletions
diff --git a/doc/reference/CrystFEL-sections.txt b/doc/reference/CrystFEL-sections.txt
index ac5392df..c032fe76 100644
--- a/doc/reference/CrystFEL-sections.txt
+++ b/doc/reference/CrystFEL-sections.txt
@@ -25,9 +25,10 @@ get_partial
 get_scalable
 get_refinable
 get_redundancy
-get_sum_squared_dev
 get_esd_intensity
 get_phase
+get_temp1
+get_temp2
 <SUBSECTION>
 set_detector_pos
 set_partial
@@ -35,10 +36,11 @@ set_int
 set_scalable
 set_refinable
 set_redundancy
-set_sum_squared_dev
 set_esd_intensity
 set_ph
 set_symmetric_indices
+set_temp1
+set_temp2
 <SUBSECTION>
 copy_data
 num_reflections
diff --git a/src/hrs-scaling.c b/src/hrs-scaling.c
index 45bbd591..cff0b90c 100644
--- a/src/hrs-scaling.c
+++ b/src/hrs-scaling.c
@@ -31,344 +31,142 @@
 #include "reflist.h"
 
 
-/* Maximum number of iterations of NLSq scaling per macrocycle. */
+/* Maximum number of iterations of scaling per macrocycle. */
 #define MAX_CYCLES (50)
 
+/* ESD of restraint driving scale factors to unity */
+#define SCALING_RESTRAINT (1.0)
 
-static char *find_common_reflections(struct image *images, int n)
+
+static double iterate_scale(struct image *images, int n, RefList *reference)
 {
-	int i;
-	char *cref;
+	double max_shift = 0.0;
+	int frame;
 
-	cref = calloc(n*n, sizeof(char));
+	assert(reference != NULL);
 
-	for ( i=0; i<n; i++ ) {
+	for ( frame=0; frame<n; frame++ ) {
 
+		struct image *image = &images[frame];
 		Reflection *refl;
 		RefListIterator *iter;
+		double num = 0.0;
+		double den = 0.0;
+		double corr;
 
-		for ( refl = first_refl(images[i].reflections, &iter);
+		for ( refl = first_refl(image->reflections, &iter);
 		      refl != NULL;
-		      refl = next_refl(refl, iter) ) {
-
+		      refl = next_refl(refl, iter) )
+		{
 			signed int h, k, l;
-			int j;
+			double Ih, Ihl, esd;
+			Reflection *r;
 
 			if ( !get_scalable(refl) ) continue;
 
-			get_indices(refl, &h, &k, &l);
-
-			for ( j=0; j<i; j++ ) {
-				Reflection *r2;
-				r2 = find_refl(images[j].reflections, h, k, l);
-				if ( r2 != NULL ) {
-					cref[i+n*j] = 1;
-					cref[j+n*i] = 1;
-					break;
-				}
-			}
-
-		}
-
-	}
-
-	return cref;
-}
-
-
-static void s_uhavha(signed int hat, signed int kat, signed int lat,
-                     struct image *image, double *uha, double *vha)
-{
-	double uha_val = 0.0;
-	double vha_val = 0.0;
-	RefList *reflections = image->reflections;
-	Reflection *refl;
-
-	for ( refl = find_refl(reflections, hat, kat, lat);
-	      refl != NULL;
-	      refl = next_found_refl(refl) )
-	{
-		double ic, sigi;
-
-		if ( !get_scalable(refl) ) continue;
-
-		ic = get_intensity(refl) / get_partiality(refl);
-
-		/* Get the error in the estimated full intensity */
-		sigi = get_esd_intensity(refl) / get_partiality(refl);
-
-		uha_val += 1.0 / pow(sigi, 2.0);
-		vha_val += ic / pow(sigi, 2.0);
-	}
-
-	*uha = uha_val;
-	*vha = vha_val;
-}
-
-
-static void s_uhvh(struct image *images, int n,
-                   signed int h, signed int k, signed int l,
-                   double *uhp, double *vhp)
-{
-	int a;
-	double uh = 0.0;
-	double vh = 0.0;
-
-	for ( a=0; a<n; a++ ) {
-
-		double uha, vha;
-
-		s_uhavha(h, k, l, &images[a], &uha, &vha);
-
-		uh += pow(images[a].osf, 2.0) * uha;
-		vh += images[a].osf * vha;
-
-	}
-
-	*uhp = uh;
-	*vhp = vh;
-}
-
-
-static gsl_vector *solve_diagonal(gsl_vector *v, gsl_vector *M)
-{
-	gsl_vector *shifts;
-	int n, frame;
-
-	n = v->size;
-	if ( v->size != M->size ) return NULL;
-
-	shifts = gsl_vector_alloc(n);
-	if ( shifts == NULL ) return NULL;
-
-	for ( frame=0; frame<n; frame++ ) {
-
-		double num, den, sh;
-
-		num = gsl_vector_get(v, frame);
-		den = gsl_vector_get(M, frame);
-		sh = num/den;
-
-		if ( isnan(sh) ) {
-			gsl_vector_set(shifts, frame, 0.0);
-		} else {
-			gsl_vector_set(shifts, frame, sh);
-		}
-
-	}
-
-	return shifts;
-}
-
-
-static double iterate_scale(struct image *images, int n, RefList *scalable,
-                            char *cref, RefList *reference)
-{
-	gsl_vector *M;
-	gsl_vector *v;
-	gsl_vector *shifts;
-	double max_shift;
-	int frame;
-	Reflection *refl;
-	RefListIterator *iter;
-
-	M = gsl_vector_calloc(n);
-	v = gsl_vector_calloc(n);
-
-	for ( refl = first_refl(scalable, &iter);
-	      refl != NULL;
-	      refl = next_refl(refl, iter) )
-	{
-		int a;
-		signed int h, k, l;
-		double uh, vh, Ih;
-
-		get_indices(refl, &h, &k, &l);
-
-		s_uhvh(images, n, h, k, l, &uh, &vh);
-
-		if ( !reference ) {
-			Ih = vh / uh;
-			/* 0 / 0 = 0, not NaN */
-			if ( isnan(Ih) ) Ih = 0.0;
-		} else {
 			/* Look up by asymmetric indices */
-			Reflection *r = find_refl(reference, h, k, l);
+			get_indices(refl, &h, &k, &l);
+			r = find_refl(reference, h, k, l);
 			if ( r == NULL ) {
 				ERROR("%3i %3i %3i isn't in the "
 				      "reference list, so why is it "
 				      "marked as scalable?\n", h, k, l);
 				Ih = 0.0;
 			} else {
+				if ( get_redundancy(r) < 2 ) continue;
 				Ih = get_intensity(r);
 			}
-		}
-
-		for ( a=0; a<n; a++ ) {
-
-			struct image *image = &images[a];
-			double vc, rha, vha, uha;
-			double vval;
 
-			/* Determine the "solution" vector component */
-			s_uhavha(h, k, l, image, &uha, &vha);
-			rha = vha - image->osf * uha * Ih;
-			vc = Ih * rha;
-			vval = gsl_vector_get(v, a);
-			gsl_vector_set(v, a, vval+vc);
+			Ihl = get_intensity(refl) / get_partiality(refl);
+			esd = get_esd_intensity(refl) / get_partiality(refl);
 
-			vval = gsl_vector_get(M, a);
-			gsl_vector_set(M, a, vval+(pow(Ih, 2.0) * uha));
+			num += Ih * (Ihl/image->osf) / pow(esd/image->osf, 2.0);
+			den += pow(Ih, 2.0)/pow(esd/image->osf, 2.0);
 
 		}
-	}
-
-	shifts = solve_diagonal(v, M);
-	gsl_vector_free(M);
-	gsl_vector_free(v);
 
-	if ( shifts == NULL ) return 0.0;
+		//num += image->osf / pow(SCALING_RESTRAINT, 2.0);
+		//den += pow(image->osf, 2.0)/pow(SCALING_RESTRAINT, 2.0);
 
-	/* Apply shifts */
-	max_shift = 0.0;
-	for ( frame=0; frame<n; frame++ ) {
-
-		double shift = gsl_vector_get(shifts, frame);
-
-		images[frame].osf += shift;
-
-		//STATUS("Shift %i: %5.2f: -> %5.2f\n",
-		//       frame, shift, images[frame].osf);
-
-		if ( fabs(shift) > fabs(max_shift) ) {
-			max_shift = fabs(shift);
+		corr = num / den;
+		if ( !isnan(corr) && !isinf(corr) ) {
+			image->osf *= corr;
 		}
+		if ( fabs(corr-1.0) > max_shift ) max_shift = fabs(corr-1.0);
 
 	}
 
-	gsl_vector_free(shifts);
-
 	return max_shift;
 }
 
 
-static void lsq_intensities(struct image *images, int n, RefList *full)
+static RefList *lsq_intensities(struct image *images, int n)
 {
+	RefList *full;
+	int frame;
 	Reflection *refl;
 	RefListIterator *iter;
 
-	for ( refl = first_refl(full, &iter);
-	      refl != NULL;
-	      refl = next_refl(refl, iter) )
-	{
-		double num = 0.0;
-		double den = 0.0;
-		int m;
-		int redundancy = 0;
-		signed int h, k, l;
-
-		get_indices(refl, &h, &k, &l);
-
-		/* For each frame */
-		for ( m=0; m<n; m++ ) {
+	full = reflist_new();
 
-			double G;
-			Reflection *f;
-
-			/* Don't scale intensities from this image if
-			 * post refinement failed on the last step. */
-			if ( images[m].pr_dud ) continue;
-
-			G = images[m].osf;
+	for ( frame=0; frame<n; frame++ ) {
 
-			for ( f = find_refl(images[m].reflections, h, k, l);
-			      f != NULL;  f = next_found_refl(refl) )
-			{
+		double G;
 
-				double p;
+		if ( images[frame].pr_dud ) continue;
+		G = images[frame].osf;
 
-				if ( !get_scalable(f) ) continue;
-				p = get_partiality(f);
+		for ( refl = first_refl(images[frame].reflections, &iter);
+		      refl != NULL;
+		      refl = next_refl(refl, iter) )
+		{
+			Reflection *f;
+			signed int h, k, l;
+			double num, den;
+			int red;
+			double Ihl, esd;
 
-				num += get_intensity(f) * p * G;
-				den += pow(p, 2.0) * pow(G, 2.0);
-				redundancy++;
+			if ( !get_scalable(refl) ) continue;
 
+			get_indices(refl, &h, &k, &l);
+			f = find_refl(full, h, k, l);
+			if ( f == NULL ) {
+				f = add_refl(full, h, k, l);
+				num = 0.0;
+				den = 0.0;
+				red = 0;
+			} else {
+				num = get_temp1(f);
+				den = get_temp2(f);
+				red = get_redundancy(f);
 			}
 
-		}
-
-		set_int(refl, num/den);
+			Ihl = get_intensity(refl) / get_partiality(refl);
+			esd = get_esd_intensity(refl) / get_partiality(refl);
 
-		//STATUS("%3i %3i %3i %i %i\n", h, k, l,
-		//        redundancy, get_redundancy(refl));
+			num += (Ihl/G) / pow(esd/G, 2.0);
+			den += 1.0 / pow(esd/G, 2.0);
+			red++;
 
-		if ( get_redundancy(refl) != redundancy ) {
-			ERROR("Didn't find all the expected parts for"
-			      " %3i %3i %3i\n", h, k, l);
+			set_temp1(f, num);
+			set_temp2(f, den);
+			set_redundancy(f, red);
 		}
 
 	}
-}
-
-
-static void normalise_osfs(struct image *images, int n)
-{
-	int m;
-	double tot = 0.0;
-	double norm;
-
-	for ( m=0; m<n; m++ ) {
-		tot += images[m].osf;
-	}
-	norm = n / tot;
-
-	for ( m=0; m<n; m++ ) {
-		images[m].osf *= norm;
-	}
-}
-
-
-static RefList *guess_scaled_reflections(struct image *images, int n)
-{
-	RefList *scalable;
-	int i;
-
-	scalable = reflist_new();
-	for ( i=0; i<n; i++ ) {
-
-		Reflection *refl;
-		RefListIterator *iter;
 
-		/* Don't scale intensities from this image if
-		 * post refinement failed on the last step. */
-		if ( images[i].pr_dud ) continue;
-
-		for ( refl = first_refl(images[i].reflections, &iter);
-		      refl != NULL;
-		      refl = next_refl(refl, iter) )
-		{
-			if ( get_scalable(refl) ) {
-
-				signed int h, k, l;
-				Reflection *f;
-				int red;
-
-				get_indices(refl, &h, &k, &l);
-				f = find_refl(scalable, h, k, l);
-				if ( f == NULL ) {
-					f = add_refl(scalable, h, k, l);
-				}
-
-				red = get_redundancy(f);
-				set_redundancy(f, red+1);
+	for ( refl = first_refl(full, &iter);
+	      refl != NULL;
+	      refl = next_refl(refl, iter) )
+	{
+		double Ih;
 
-			}
-		}
+		Ih = get_temp1(refl) / get_temp2(refl);
+		set_int(refl, Ih);
 
 	}
 
-	return scalable;
+	return full;
 }
 
 
@@ -438,80 +236,54 @@ static UNUSED void plot_graph(struct image *image, RefList *reference)
 }
 
 
-static void scale_matrix(struct image *images, int n, RefList *scalable,
-                         RefList *reference)
-{
-	int i, m;
-	double max_shift;
-	char *cref;
-
-	/* Start with all scale factors equal */
-	for ( m=0; m<n; m++ ) images[m].osf = 1.0;
-
-	cref = find_common_reflections(images, n);
-
-	/* Iterate */
-	i = 0;
-	do {
-
-		max_shift = iterate_scale(images, n, scalable, cref, reference);
-		STATUS("Scaling iteration %2i: max shift = %5.2f\n",
-		       i+1, max_shift);
-		i++;
-		if ( reference == NULL ) normalise_osfs(images, n);
-
-	} while ( (max_shift > 0.01) && (i < MAX_CYCLES) );
-
-	free(cref);
-}
-
-
-static void scale_megatron(struct image *images, int n, RefList *scalable)
+/* Scale the stack of images */
+RefList *scale_intensities(struct image *images, int n, RefList *gref)
 {
-	int i, m;
-	double max_shift;
-	double *old_osfs;
-
-	old_osfs = calloc(n, sizeof(double));
+	int i;
+	double max_corr;
+	RefList *full = NULL;
 
-	/* Start with all scale factors equal */
-	for ( m=0; m<n; m++ ) {
-		images[m].osf = 1.0;
-		old_osfs[m] = images[m].osf;
+	for ( i=0; i<n; i++ ) {
+		images[i].osf = 1.0;
 	}
 
-	lsq_intensities(images, n, scalable);
+	/* No reference -> create an initial list to refine against */
+	if ( gref == NULL ) {
+		full = lsq_intensities(images, n);
+	}
 
 	/* Iterate */
 	i = 0;
 	do {
 
-		max_shift = iterate_scale(images, n, scalable, NULL, scalable);
-		STATUS("Scaling iteration %2i: max shift = %5.2f\n",
-		       i+1, max_shift);
-		i++;
+		RefList *reference;
 
-		for ( m=0; m<n; m++ ) {
-			images[m].osf *= old_osfs[m];
+		/* Refine against reference or current "full" estimates */
+		if ( gref != NULL ) {
+			reference = gref;
+		} else {
+			reference = full;
 		}
 
-	} while ( (max_shift > 0.01) && (i < MAX_CYCLES) );
-}
+		max_corr = iterate_scale(images, n, reference);
+		STATUS("Scaling iteration %2i: max correction = %5.2f\n",
+		       i+1, max_corr);
 
+		/* No reference -> generate list for next iteration */
+		if ( gref == NULL ) {
+			reflist_free(full);
+			full = lsq_intensities(images, n);
+		}
 
-/* Scale the stack of images */
-RefList *scale_intensities(struct image *images, int n, RefList *reference)
-{
-	RefList *full;
+		//show_scale_factors(images, n);
 
-	full = guess_scaled_reflections(images, n);
+		i++;
 
-	if ( reference != NULL ) {
-		scale_matrix(images, n, full, reference);
-	} else {
-		scale_megatron(images, n, full);
-	}
+	} while ( (max_corr > 0.01) && (i < MAX_CYCLES) );
+
+	if ( gref != NULL ) {
+		full = lsq_intensities(images, n);
+	} /* else we already did it */
 
-	lsq_intensities(images, n, full);
 	return full;
 }
diff --git a/src/process_hkl.c b/src/process_hkl.c
index b3722ad9..731344cb 100644
--- a/src/process_hkl.c
+++ b/src/process_hkl.c
@@ -170,7 +170,7 @@ static void merge_pattern(RefList *model, RefList *new, int max_only,
 
 		} else if ( pass == 2 ) {
 
-			double dev = get_sum_squared_dev(model_version);
+			double dev = get_temp1(model_version);
 
 			/* Other ways of estimating the sigma are possible,
 			 * choose from:
@@ -180,7 +180,7 @@ static void merge_pattern(RefList *model, RefList *new, int max_only,
 			 * as well. */
 			dev += pow(intensity - model_int, 2.0);
 
-			set_sum_squared_dev(model_version, dev);
+			set_temp1(model_version, dev);
 
 			if ( hist_vals != NULL ) {
 				int p = *hist_n;
@@ -374,7 +374,7 @@ static void merge_all(FILE *fh, RefList *model,
 		      refl != NULL;
 		      refl = next_refl(refl, iter) ) {
 
-			double sum_squared_dev = get_sum_squared_dev(refl);
+			double sum_squared_dev = get_temp1(refl);
 			int red = get_redundancy(refl);
 			int h, k, l;
 			double esd;
diff --git a/src/reflist.c b/src/reflist.c
index b51af27c..5b594d76 100644
--- a/src/reflist.c
+++ b/src/reflist.c
@@ -81,9 +81,9 @@ struct _refldata {
 	/* Redundancy */
 	int redundancy;
 
-	/* Total squared difference between all estimates of this reflection
-	 * and the estimated mean value */
-	double sum_squared_dev;
+	/* User-specified temporary values */
+	double temp1;
+	double temp2;
 };
 
 
@@ -423,47 +423,61 @@ int get_redundancy(const Reflection *refl)
 
 
 /**
- * get_sum_squared_dev:
+ * get_esd_intensity:
  * @refl: A %Reflection
  *
- * The sum squared deviation is used to estimate the standard errors on the
- * intensities during 'Monte Carlo' merging.
+ * Returns: the standard error in the intensity measurement (as returned by
+ * get_intensity()) for this reflection.
+ *
+ **/
+double get_esd_intensity(const Reflection *refl)
+{
+	return refl->data.esd_i;
+}
+
+
+/**
+ * get_phase:
+ * @refl: A %Reflection
  *
- * Returns: the sum of the squared deviations between the intensities and the
- * mean intensity from all measurements of the reflection (and probably its
- * symmetry equivalents according to some point group).
+ * Returns: the phase for this reflection.
  *
  **/
-double get_sum_squared_dev(const Reflection *refl)
+double get_phase(const Reflection *refl)
 {
-	return refl->data.sum_squared_dev;
+	return refl->data.phase;
 }
 
 
 /**
- * get_esd_intensity:
+ * get_temp1:
  * @refl: A %Reflection
  *
- * Returns: the standard error in the intensity measurement (as returned by
- * get_intensity()) for this reflection.
+ * The temporary values can be used according to the needs of the calling
+ * program.
+ *
+ * Returns: the first temporary value for this reflection.
  *
  **/
-double get_esd_intensity(const Reflection *refl)
+double get_temp1(const Reflection *refl)
 {
-	return refl->data.esd_i;
+	return refl->data.temp1;
 }
 
 
 /**
- * get_phase:
+ * get_temp2:
  * @refl: A %Reflection
  *
- * Returns: the phase for this reflection.
+ * The temporary values can be used according to the needs of the calling
+ * program.
+ *
+ * Returns: the second temporary value for this reflection.
  *
  **/
-double get_phase(const Reflection *refl)
+double get_temp2(const Reflection *refl)
 {
-	return refl->data.phase;
+	return refl->data.temp2;
 }
 
 
@@ -585,24 +599,6 @@ void set_redundancy(Reflection *refl, int red)
 
 
 /**
- * set_sum_squared_dev:
- * @refl: A %Reflection
- * @dev: New sum squared deviation for the reflection
- *
- * The sum squared deviation is used to estimate the standard errors on the
- * intensities during 'Monte Carlo' merging.  It is defined as the sum of the
- * squared deviations between the intensities and the mean intensity from all
- * measurements of the reflection (and probably its symmetry equivalents
- * according to some point group).
- *
- **/
-void set_sum_squared_dev(Reflection *refl, double dev)
-{
-	refl->data.sum_squared_dev = dev;
-}
-
-
-/**
  * set_esd_intensity:
  * @refl: A %Reflection
  * @esd: New standard error for this reflection's intensity measurement
@@ -648,6 +644,36 @@ void set_symmetric_indices(Reflection *refl,
 }
 
 
+/**
+ * set_temp1
+ * @refl: A %Reflection
+ * @temp: New temporary value for the reflection
+ *
+ * The temporary values can be used according to the needs of the calling
+ * program.
+ *
+ **/
+void set_temp1(Reflection *refl, double temp)
+{
+	refl->data.temp1 = temp;
+}
+
+
+/**
+ * set_temp2
+ * @refl: A %Reflection
+ * @temp: New temporary value for the reflection
+ *
+ * The temporary values can be used according to the needs of the calling
+ * program.
+ *
+ **/
+void set_temp2(Reflection *refl, double temp)
+{
+	refl->data.temp2 = temp;
+}
+
+
 /********************************* Insertion **********************************/
 
 static Reflection *rotate_once(Reflection *refl, int dir)
diff --git a/src/reflist.h b/src/reflist.h
index 0d836edf..6c0285f1 100644
--- a/src/reflist.h
+++ b/src/reflist.h
@@ -70,7 +70,8 @@ extern void get_partial(const Reflection *refl, double *r1, double *r2,
 extern int get_scalable(const Reflection *refl);
 extern int get_refinable(const Reflection *refl);
 extern int get_redundancy(const Reflection *refl);
-extern double get_sum_squared_dev(const Reflection *refl);
+extern double get_temp1(const Reflection *refl);
+extern double get_temp2(const Reflection *refl);
 extern double get_esd_intensity(const Reflection *refl);
 extern double get_phase(const Reflection *refl);
 
@@ -84,7 +85,8 @@ extern void set_int(Reflection *refl, double intensity);
 extern void set_scalable(Reflection *refl, int scalable);
 extern void set_refinable(Reflection *refl, int refinable);
 extern void set_redundancy(Reflection *refl, int red);
-extern void set_sum_squared_dev(Reflection *refl, double dev);
+extern void set_temp1(Reflection *refl, double dev);
+extern void set_temp2(Reflection *refl, double dev);
 extern void set_esd_intensity(Reflection *refl, double esd);
 extern void set_ph(Reflection *refl, double phase);
 extern void set_symmetric_indices(Reflection *refl,
author	Thomas White <taw@physics.org>	2011-07-11 16:55:35 +0200
committer	Thomas White <taw@physics.org>	2012-02-22 15:27:32 +0100
commit	623939fb6bd9475935546b9539f404eaaff41a3f (patch)
tree	8d8e77b07903bbb36df60646b4127dec18397d3f
parent	1bd043203f25a26d5d806ed847f87a4fd7df5f22 (diff)