/* * taketwo.c * * Rewrite of TakeTwo algorithm (Acta D72 (8) 956-965) for CrystFEL * * Copyright © 2016 Helen Ginn * Copyright © 2016 Deutsches Elektronen-Synchrotron DESY, * a research centre of the Helmholtz Association. * * Authors: * 2016 Helen Ginn * 2016 Thomas White * * This file is part of CrystFEL. * * CrystFEL is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * CrystFEL is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with CrystFEL. If not, see . * */ #include #include #include #include #include #include "cell-utils.h" #include "index.h" #include "taketwo.h" #include "peaks.h" #include /** * spotvec * @obsvec: an observed vector between two spots * @matches: array of matching theoretical vectors from unit cell * @match_num: number of matches * @distance: length of obsvec (do I need this?) * @her_rlp: pointer to first rlp position for difference vec * @his_rlp: pointer to second rlp position for difference vec * * Structure representing 3D vector between two potential Bragg peaks * in reciprocal space, and an array of potential matching theoretical * vectors from unit cell/centering considerations. **/ struct SpotVec { struct rvec obsvec; struct rvec *matches; int match_num; double distance; struct rvec *her_rlp; struct rvec *his_rlp; }; struct taketwo_private { IndexingMethod indm; float *ltl; UnitCell *cell; }; /* Maximum distance between two rlp sizes to consider info for indexing */ #define MAX_RECIP_DISTANCE (0.12*1e10) /* Tolerance for two lengths in reciprocal space to be considered the same */ #define RECIP_TOLERANCE (0.001*1e10) /* Threshold for network members to consider a potential solution */ #define NETWORK_MEMBER_THRESHOLD (20) /* Maximum network members (obviously a solution so should stop) */ #define MAX_NETWORK_MEMBERS (NETWORK_MEMBER_THRESHOLD + 5) /* Maximum dead ends for a single branch extension during indexing */ #define MAX_DEAD_ENDS (5) /* Tolerance for two angles to be considered the same */ #define ANGLE_TOLERANCE (deg2rad(1.0)) /* Tolerance for rot_mats_are_similar */ #define TRACE_TOLERANCE (deg2rad(3.0)) /** TODO: * * - May need to be capable of playing with the tolerances/#defined stuff. * - Multiple lattices */ /* ------------------------------------------------------------------------ * apologetic function * ------------------------------------------------------------------------*/ void apologise() { printf("Error - could not allocate memory for indexing.\n"); } /* ------------------------------------------------------------------------ * functions concerning aspects of rvec which are very likely to be * incorporated somewhere else in CrystFEL and therefore may need to be * deleted and references connected to a pre-existing function. (Lowest level) * ------------------------------------------------------------------------*/ static struct rvec new_rvec(double new_u, double new_v, double new_w) { struct rvec new_rvector; new_rvector.u = new_u; new_rvector.v = new_v; new_rvector.w = new_w; return new_rvector; } static struct rvec diff_vec(struct rvec from, struct rvec to) { struct rvec diff = new_rvec(to.u - from.u, to.v - from.v, to.w - from.w); return diff; } static double sq_length(struct rvec vec) { double sqlength = (vec.u * vec.u + vec.v * vec.v + vec.w * vec.w); return sqlength; } static double rvec_length(struct rvec vec) { return sqrt(sq_length(vec)); } static void normalise_rvec(struct rvec *vec) { double length = rvec_length(*vec); vec->u /= length; vec->v /= length; vec->w /= length; } static double rvec_cosine(struct rvec v1, struct rvec v2) { double dot_prod = v1.u * v2.u + v1.v * v2.v + v1.w * v2.w; double v1_length = rvec_length(v1); double v2_length = rvec_length(v2); double cos_theta = dot_prod / (v1_length * v2_length); return cos_theta; } static double rvec_angle(struct rvec v1, struct rvec v2) { double cos_theta = rvec_cosine(v1, v2); double angle = acos(cos_theta); return angle; } static struct rvec rvec_cross(struct rvec a, struct rvec b) { struct rvec c; c.u = a.v*b.w - a.w*b.v; c.v = -(a.u*b.w - a.w*b.u); c.w = a.u*b.v - a.v*b.u; return c; } static void show_rvec(struct rvec r2) { struct rvec r = r2; normalise_rvec(&r); STATUS("[ %.3f %.3f %.3f ]\n", r.u, r.v, r.w); } /* ------------------------------------------------------------------------ * functions called under the core functions, still specialised (Level 3) * ------------------------------------------------------------------------*/ static gsl_matrix *rotation_around_axis(struct rvec c, double th) { double omc = 1.0 - cos(th); double s = sin(th); gsl_matrix *res = gsl_matrix_alloc(3, 3); gsl_matrix_set(res, 0, 0, cos(th) + c.u*c.u*omc); gsl_matrix_set(res, 0, 1, c.u*c.v*omc - c.w*s); gsl_matrix_set(res, 0, 2, c.u*c.w*omc + c.v*s); gsl_matrix_set(res, 1, 0, c.u*c.v*omc + c.w*s); gsl_matrix_set(res, 1, 1, cos(th) + c.v*c.v*omc); gsl_matrix_set(res, 1, 2, c.v*c.w*omc - c.u*s); gsl_matrix_set(res, 2, 0, c.w*c.u*omc - c.v*s); gsl_matrix_set(res, 2, 1, c.w*c.v*omc + c.u*s); gsl_matrix_set(res, 2, 2, cos(th) + c.w*c.w*omc); return res; } /* Rotate vector (vec1) around axis (axis) by angle theta. Find value of * theta for which the angle between (vec1) and (vec2) is minimised. * Behold! Finally an analytical solution for this one. Assuming * that @result has already been allocated. Will upload the maths to the * shared Google drive. */ static gsl_matrix *closest_rot_mat(struct rvec vec1, struct rvec vec2, struct rvec axis) { /* Let's have unit vectors */ normalise_rvec(&vec1); normalise_rvec(&vec2); normalise_rvec(&axis); /* Redeclaring these to try and maintain readability and * check-ability against the maths I wrote down */ double a = vec2.u; double b = vec2.v; double c = vec2.w; double p = vec1.u; double q = vec1.v; double r = vec1.w; double x = axis.u; double y = axis.v; double z = axis.w; /* Components in handwritten maths online when I upload it */ double A = a*(p*x*x - p + x*y*q + x*z*r) + b*(p*x*y + q*y*y - q + r*y*z) + c*(p*x*z + q*y*z + r*z*z - r); double B = a*(y*r - z*q) + b*(p*z - r*x) + c*(q*x - p*y); double tan_theta = - B / A; double theta = atan(tan_theta); /* Now we have two possible solutions, theta or theta+pi * and we need to work out which one. This could potentially be * simplified - do we really need so many cos/sins? maybe check * the 2nd derivative instead? */ double cc = cos(theta); double C = 1 - cc; double s = sin(theta); double occ = -cc; double oC = 1 - occ; double os = -s; double pPrime = (x*x*C+cc)*p + (x*y*C-z*s)*q + (x*z*C+y*s)*r; double qPrime = (y*x*C+z*s)*p + (y*y*C+cc)*q + (y*z*C-x*s)*r; double rPrime = (z*x*C-y*s)*p + (z*y*C+x*s)*q + (z*z*C+cc)*r; double pDbPrime = (x*x*oC+occ)*p + (x*y*oC-z*os)*q + (x*z*oC+y*os)*r; double qDbPrime = (y*x*oC+z*os)*p + (y*y*oC+occ)*q + (y*z*oC-x*os)*r; double rDbPrime = (z*x*oC-y*os)*p + (z*y*oC+x*os)*q + (z*z*oC+occ)*r; double cosAlpha = pPrime * a + qPrime * b + rPrime * c; double cosAlphaOther = pDbPrime * a + qDbPrime * b + rDbPrime * c; int addPi = (cosAlphaOther > cosAlpha); double bestAngle = theta + addPi * M_PI; /* Return an identity matrix which has been rotated by * theta around "axis" */ return rotation_around_axis(axis, bestAngle); } static double matrix_trace(gsl_matrix *a) { int i; double tr = 0.0; assert(a->size1 == a->size2); for ( i=0; isize1; i++ ) { tr += gsl_matrix_get(a, i, i); } return tr; } static int rot_mats_are_similar(gsl_matrix *rot1, gsl_matrix *rot2, double *score) { double tr; gsl_matrix *sub; gsl_matrix *mul; sub = gsl_matrix_calloc(3, 3); gsl_matrix_memcpy(sub, rot1); gsl_matrix_sub(sub, rot2); /* sub = rot1 - rot2 */ mul = gsl_matrix_calloc(3, 3); gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, sub, sub, 0.0, mul); tr = matrix_trace(mul); if (score != NULL) *score = tr; gsl_matrix_free(mul); gsl_matrix_free(sub); double max = sqrt(4.0*(1.0-cos(TRACE_TOLERANCE))); return (tr < max); } static gsl_matrix *rotation_between_vectors(struct rvec a, struct rvec b) { double th = rvec_angle(a, b); struct rvec c = rvec_cross(a, b); normalise_rvec(&c); return rotation_around_axis(c, th); } static gsl_vector *rvec_to_gsl(struct rvec v) { gsl_vector *a = gsl_vector_alloc(3); gsl_vector_set(a, 0, v.u); gsl_vector_set(a, 1, v.v); gsl_vector_set(a, 2, v.w); return a; } struct rvec gsl_to_rvec(gsl_vector *a) { struct rvec v; v.u = gsl_vector_get(a, 0); v.v = gsl_vector_get(a, 1); v.w = gsl_vector_get(a, 2); return v; } /* Code me in gsl_matrix language to copy the contents of the function * in cppxfel (IndexingSolution::createSolution). This function is quite * intensive on the number crunching side so simple angle checks are used * to 'pre-scan' vectors beforehand. */ static gsl_matrix *generate_rot_mat(struct rvec obs1, struct rvec obs2, struct rvec cell1, struct rvec cell2) { gsl_matrix *rotateSpotDiffMatrix; gsl_matrix *secondTwizzleMatrix; gsl_matrix *fullMat; gsl_vector *cell2v = rvec_to_gsl(cell2); gsl_vector *cell2vr = gsl_vector_calloc(3); normalise_rvec(&obs1); normalise_rvec(&obs2); normalise_rvec(&cell1); normalise_rvec(&cell2); /* Rotate reciprocal space so that the first simulated vector lines up * with the observed vector. */ rotateSpotDiffMatrix = rotation_between_vectors(cell1, obs1); normalise_rvec(&obs1); /* Multiply cell2 by rotateSpotDiffMatrix --> cell2vr */ gsl_blas_dgemv(CblasNoTrans, 1.0, rotateSpotDiffMatrix, cell2v, 0.0, cell2vr); /* Now we twirl around the firstAxisUnit until the rotated simulated * vector matches the second observed vector as closely as possible. */ secondTwizzleMatrix = closest_rot_mat(gsl_to_rvec(cell2vr), obs2, obs1); /* We want to apply the first matrix and then the second matrix, * so we multiply these. */ fullMat = gsl_matrix_calloc(3, 3); gsl_blas_dgemm(CblasTrans, CblasTrans, 1.0, rotateSpotDiffMatrix, secondTwizzleMatrix, 0.0, fullMat); gsl_matrix_transpose(fullMat); gsl_vector_free(cell2v); gsl_vector_free(cell2vr); gsl_matrix_free(secondTwizzleMatrix); gsl_matrix_free(rotateSpotDiffMatrix); return fullMat; } static int obs_vecs_share_spot(struct SpotVec *her_obs, struct SpotVec *his_obs) { /* FIXME: Disgusting... can I tone this down a bit? */ if ( (her_obs->her_rlp == his_obs->her_rlp) || (her_obs->her_rlp == his_obs->his_rlp) || (her_obs->his_rlp == his_obs->her_rlp) || (her_obs->his_rlp == his_obs->his_rlp) ) { return 1; } return 0; } static int obs_shares_spot_w_array(struct SpotVec *obs_vecs, int test_idx, int *members, int num) { int i; struct SpotVec *her_obs = &obs_vecs[test_idx]; for ( i=0; iobsvec, his_obs->obsvec); /* calculate angle between all potential theoretical vectors */ for ( i=0; imatch_num; i++ ) { for ( j=0; jmatch_num; j++ ) { struct rvec *her_match = &her_obs->matches[i]; struct rvec *his_match = &his_obs->matches[j]; double theory_angle = rvec_angle(*her_match, *his_match); /* is this angle a match? */ double angle_diff = fabs(theory_angle - obs_angle); if ( angle_diff < ANGLE_TOLERANCE ) { size_t new_size = (*match_count + 1) * sizeof(int); if (her_match_idxs && his_match_idxs) { /* Reallocate the array to fit in another match */ int *temp_hers; temp_hers = realloc(*her_match_idxs, new_size); int *temp_his; temp_his = realloc(*his_match_idxs, new_size); if ( temp_hers == NULL || temp_his == NULL ) { apologise(); } (*her_match_idxs) = temp_hers; (*his_match_idxs) = temp_his; (*her_match_idxs)[*match_count] = i; (*his_match_idxs)[*match_count] = j; } (*match_count)++; } } } return (*match_count > 0); } static int obs_angles_match_array(struct SpotVec *obs_vecs, int test_idx, int *obs_members, int *match_members, int num) { /* note: this is just a preliminary check to reduce unnecessary * computation later down the line, but is not entirely accurate. * For example, I have not checked that the 'matching cell vector' * is identical - too much faff. **/ int i, j; struct SpotVec *her_obs = &obs_vecs[test_idx]; for ( i=0; i obs_vec_count) { return 0; } int match_found = -1; signed int next_index = find_next_index(rot, obs_vecs, obs_vec_count, obs_members, match_members, start, member_num, &match_found, 0); if ( member_num < 2 ) { return 0; } if ( next_index < 0 ) { /* If there have been too many dead ends, give up * on indexing altogether. **/ if ( dead_ends > MAX_DEAD_ENDS ) { break; } /* We have not had too many dead ends. Try removing the last member and continue. */ start++; member_num--; dead_ends++; continue; } /* we have elongated membership - so reset dead_ends counter */ dead_ends = 0; obs_members[member_num] = next_index; match_members[member_num] = match_found; start = next_index + 1; member_num++; if (member_num > *max_members) { *max_members = member_num; } /* If member_num is high enough, we want to return a yes */ if ( member_num > NETWORK_MEMBER_THRESHOLD ) break; } finalise_solution(rot, obs_vecs, obs_members, match_members, member_num); return ( member_num > NETWORK_MEMBER_THRESHOLD ); } static int start_seed(struct SpotVec *obs_vecs, int obs_vec_count, int i, int j, int i_match, int j_match, gsl_matrix **rotation, int *max_members) { gsl_matrix *rot_mat; rot_mat = generate_rot_mat(obs_vecs[i].obsvec, obs_vecs[j].obsvec, obs_vecs[i].matches[i_match], obs_vecs[j].matches[j_match]); /* Try to expand this rotation matrix to a larger network */ int success = grow_network(rot_mat, obs_vecs, obs_vec_count, i, j, i_match, j_match, max_members); /* return this matrix and if it was immediately successful */ *rotation = rot_mat; return success; } static int find_seed(struct SpotVec *obs_vecs, int obs_vec_count, gsl_matrix **rotation) { /* META: Maximum achieved maximum network membership */ int max_max_members = 0; gsl_matrix *best_rotation = NULL; unsigned long start_time = time(NULL); /* loop round pairs of vectors to try and find a suitable * seed to start building a self-consistent network of vectors */ int i, j; for ( i=0; i max_max_members) { max_max_members = max_members; gsl_matrix_free(best_rotation); best_rotation = *rotation; *rotation = NULL; } else { gsl_matrix_free(*rotation); *rotation = NULL; } } unsigned long now_time = time(NULL); unsigned int seconds = now_time - start_time; if (seconds > 30) { /* Heading towards CrystFEL cutoff so return your best guess and run */ free(i_idx); free(j_idx); *rotation = best_rotation; return (best_rotation != NULL); } } free(i_idx); free(j_idx); } } /* yes this } is meant to be here */ *rotation = best_rotation; return (best_rotation != NULL); } struct sortme { struct rvec v; double dist; }; static int sort_func(const void *av, const void *bv) { struct sortme *a = (struct sortme *)av; struct sortme *b = (struct sortme *)bv; return a->dist > b->dist; } static int match_obs_to_cell_vecs(struct rvec *cell_vecs, int cell_vec_count, struct SpotVec *obs_vecs, int obs_vec_count) { int i, j; for ( i=0; i RECIP_TOLERANCE ) continue; /* we have a match, add to array! */ size_t new_size = (count+1)*sizeof(struct sortme); for_sort = realloc(for_sort, new_size); if ( for_sort == NULL ) return 0; for_sort[count].v = cell_vecs[j]; for_sort[count].dist = dist_diff; count++; } qsort(for_sort, count, sizeof(struct sortme), sort_func); obs_vecs[i].matches = malloc(count*sizeof(struct rvec)); obs_vecs[i].match_num = count; for ( j=0; j max_sq_length ) continue; count++; struct SpotVec *temp_obs_vecs; temp_obs_vecs = realloc(*obs_vecs, count*sizeof(struct SpotVec)); if ( temp_obs_vecs == NULL ) { return 0; } else { *obs_vecs = temp_obs_vecs; /* initialise all SpotVec struct members */ struct SpotVec spot_vec; spot_vec.obsvec = diff; spot_vec.distance = sqrt(sqlength); spot_vec.matches = NULL; spot_vec.match_num = 0; spot_vec.her_rlp = &rlps[i]; spot_vec.his_rlp = &rlps[j]; (*obs_vecs)[count - 1] = spot_vec; } } } *obs_vec_count = count; return 1; } static int gen_theoretical_vecs(UnitCell *cell, struct rvec **cell_vecs, int *vec_count) { double a, b, c, alpha, beta, gamma; int h_max, k_max, l_max; double asx, asy, asz; double bsx, bsy, bsz; double csx, csy, csz; cell_get_parameters(cell, &a, &b, &c, &alpha, &beta, &gamma); /* find maximum Miller (h, k, l) indices for a given resolution */ h_max = MAX_RECIP_DISTANCE * a; k_max = MAX_RECIP_DISTANCE * b; l_max = MAX_RECIP_DISTANCE * c; int h, k, l; int count = 0; cell_get_reciprocal(cell, &asx, &asy, &asz, &bsx, &bsy, &bsz, &csx, &csy, &csz); for ( h=-h_max; h<=+h_max; h++ ) { for ( k=-k_max; k<=+k_max; k++ ) { for ( l=-l_max; l<=+l_max; l++ ) { struct rvec cell_vec; /* Exclude systematic absences from centering concerns */ if ( forbidden_reflection(cell, h, k, l) ) continue; cell_vec.u = h*asx + k*bsx + l*csx; cell_vec.v = h*asy + k*bsy + l*csy; cell_vec.w = h*asz + k*bsz + l*csz; /* add this to our array - which may require expanding */ count++; struct rvec *temp_cell_vecs; temp_cell_vecs = realloc(*cell_vecs, count*sizeof(struct rvec)); if ( temp_cell_vecs == NULL ) { return 0; } else { *cell_vecs = temp_cell_vecs; (*cell_vecs)[count - 1] = cell_vec; } } } } *vec_count = count; return 1; } /* ------------------------------------------------------------------------ * cleanup functions - called from run_taketwo(). * ------------------------------------------------------------------------*/ static void cleanup_taketwo_cell_vecs(struct rvec *cell_vecs) { free(cell_vecs); } static void cleanup_taketwo_obs_vecs(struct SpotVec *obs_vecs, int obs_vec_count) { int i; for ( i=0; ifeatures)+1)*sizeof(struct rvec)); for ( i=0; ifeatures); i++ ) { struct imagefeature *pk = image_get_feature(image->features, i); if ( pk == NULL ) continue; rlps[n_rlps].u = pk->rx; rlps[n_rlps].v = pk->ry; rlps[n_rlps].w = pk->rz; n_rlps++; } rlps[n_rlps].u = 0.0; rlps[n_rlps].v = 0.0; rlps[n_rlps++].w = 0.0; cell = run_taketwo(tp->cell, rlps, n_rlps); free(rlps); if ( cell == NULL ) return 0; cr = crystal_new(); if ( cr == NULL ) { ERROR("Failed to allocate crystal.\n"); return 0; } crystal_set_cell(cr, cell); if ( tp->indm & INDEXING_CHECK_PEAKS ) { if ( !peak_sanity_check(image, &cr, 1) ) { cell_free(cell); crystal_free(cr); return 0; } } image_add_crystal(image, cr); return 1; } IndexingPrivate *taketwo_prepare(IndexingMethod *indm, UnitCell *cell, struct detector *det, float *ltl) { struct taketwo_private *tp; /* Flags that TakeTwo knows about */ *indm &= INDEXING_METHOD_MASK | INDEXING_CHECK_PEAKS | INDEXING_USE_LATTICE_TYPE | INDEXING_USE_CELL_PARAMETERS | INDEXING_CONTROL_FLAGS; if ( !( (*indm & INDEXING_USE_LATTICE_TYPE) && (*indm & INDEXING_USE_CELL_PARAMETERS)) ) { ERROR("TakeTwo indexing requires cell and lattice type " "information.\n"); return NULL; } if ( cell == NULL ) { ERROR("TakeTwo indexing requires a unit cell.\n"); return NULL; } STATUS("Welcome to TakeTwo\n"); STATUS("If you use these indexing results, please cite:\n"); STATUS("Ginn et al., Acta Cryst. (2016). D72, 956-965\n"); tp = malloc(sizeof(struct taketwo_private)); if ( tp == NULL ) return NULL; tp->ltl = ltl; tp->cell = cell; tp->indm = *indm; return (IndexingPrivate *)tp; } void taketwo_cleanup(IndexingPrivate *pp) { struct taketwo_private *tp = (struct taketwo_private *)pp; free(tp); }