aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2015-08-31 12:10:01 +0200
committerThomas White <taw@physics.org>2015-08-31 12:10:01 +0200
commit9b9fa1f6d8d3155357f03db0670fae01e45d4339 (patch)
tree7d8ffa2cf990a63097385963ed0eea85f325b157
parent87a90f04ce9db14d7f097955468e9e4f2d2f06f3 (diff)
partialator: Add --custom-split
-rw-r--r--src/partialator.c352
1 files changed, 328 insertions, 24 deletions
diff --git a/src/partialator.c b/src/partialator.c
index 24f8a6d1..591478af 100644
--- a/src/partialator.c
+++ b/src/partialator.c
@@ -59,6 +59,234 @@
#include "rejection.h"
+struct csplit_hash_entry
+{
+ int n_events;
+ char **events;
+ int *datasets;
+};
+
+#define CSPLIT_HASH_MAX (65536)
+
+struct custom_split
+{
+ int n_events_total;
+ int n_datasets;
+ char **dataset_names;
+ struct csplit_hash_entry hashes[CSPLIT_HASH_MAX];
+};
+
+
+static int csplit_hash(const char *id)
+{
+ int i;
+ size_t len = strlen(id);
+ int h = 0;
+
+ for ( i=0; i<len; i++ ) {
+ h += id[i] * i;
+ h = h % CSPLIT_HASH_MAX;
+ }
+ assert(h < CSPLIT_HASH_MAX);
+
+ return h;
+}
+
+
+static void add_to_hash_entry(struct csplit_hash_entry *he, const char *id,
+ int dsn)
+{
+ he->events = realloc(he->events, (1+he->n_events)*sizeof(char *));
+ he->datasets = realloc(he->datasets, (1+he->n_events)*sizeof(int));
+ if ( (he->events == NULL) || (he->datasets == NULL) ) {
+ ERROR("Failed to grow csplit hash entry.\n");
+ abort();
+ }
+
+ he->events[he->n_events] = strdup(id);
+ he->datasets[he->n_events] = dsn;
+ he->n_events++;
+}
+
+
+static signed int find_dsn_for_id(struct custom_split *csplit, const char *id)
+{
+ int hash = csplit_hash(id);
+ int i;
+ struct csplit_hash_entry *he = &csplit->hashes[hash];
+
+ for ( i=0; i<he->n_events; i++ ) {
+ if ( strcmp(he->events[i], id) == 0 ) {
+ return he->datasets[i];
+ }
+ }
+
+ return -1;
+}
+
+
+/* Find dataset number */
+static int find_dsn(struct custom_split *csplit, const char *ds)
+{
+ int i;
+
+ for ( i=0; i<csplit->n_datasets; i++ ) {
+ if ( strcmp(csplit->dataset_names[i], ds) == 0 ) {
+ return i;
+ }
+ }
+
+ csplit->dataset_names = realloc(csplit->dataset_names,
+ (1+csplit->n_datasets)*sizeof(char *));
+ if ( csplit->dataset_names == NULL ) {
+ ERROR("Failed to grow list of dataset names\n");
+ abort();
+ }
+
+ csplit->n_datasets++;
+ csplit->dataset_names[csplit->n_datasets-1] = strdup(ds);
+ return csplit->n_datasets-1;
+}
+
+
+/* Add arbitrary ID 'id' to dataset table with name 'ds' */
+static void add_to_csplit(struct custom_split *csplit, const char *id,
+ const char *ds)
+{
+ int dsn;
+ int hash;
+ struct csplit_hash_entry *he;
+
+ dsn = find_dsn(csplit, ds);
+
+ hash = csplit_hash(id);
+ he = &csplit->hashes[hash];
+ add_to_hash_entry(he, id, dsn);
+ csplit->n_events_total++;
+}
+
+
+/* Write two-way split results (i.e. for CC1/2 etc) for this list of crystals */
+static void write_split(Crystal **crystals, int n_crystals, const char *outfile,
+ int nthreads, PartialityModel pmodel,
+ int min_measurements, SymOpList *sym, double push_res)
+{
+ char tmp[1024];
+ RefList *split;
+ Crystal *crystals1[n_crystals];
+ Crystal *crystals2[n_crystals];
+ int n_crystals1 = 0;
+ int n_crystals2 = 0;
+ int i;
+
+ for ( i=0; i<n_crystals; i++ ) {
+ if ( i % 2 ) {
+ crystals1[n_crystals1] = crystals[i];
+ n_crystals1++;
+ } else {
+ crystals2[n_crystals2] = crystals[i];
+ n_crystals2++;
+ }
+ }
+ snprintf(tmp, 1024, "%s1", outfile);
+ split = merge_intensities(crystals1, n_crystals1, nthreads,
+ pmodel, min_measurements, push_res);
+ STATUS("Writing two-way split to %s ", tmp);
+ write_reflist_2(tmp, split, sym);
+ reflist_free(split);
+ snprintf(tmp, 1024, "%s2", outfile);
+ split = merge_intensities(crystals2, n_crystals2, nthreads,
+ pmodel, min_measurements, push_res);
+ STATUS("and %s\n", tmp);
+ write_reflist_2(tmp, split, sym);
+ reflist_free(split);
+}
+
+
+static char *insert_into_filename(const char *fn, const char *add)
+{
+ int i;
+ char *out;
+
+ out = malloc(strlen(fn) + strlen(add) + 2);
+ if ( out == NULL ) return NULL;
+
+ for ( i=strlen(fn); i>0; i-- ) {
+ if ( fn[i] == '.' ) {
+ strncpy(out, fn, i);
+ out[i] = '\0';
+ strcat(out, "-");
+ strcat(out, add);
+ strcat(out, fn+i);
+ return out;
+ }
+ }
+
+ /* Fallback if fn does not contain a dot */
+ strcpy(out, fn);
+ strcat(out, "-");
+ strcat(out, add);
+ return out;
+}
+
+
+/* Write custom split results (including a two-way split) */
+static void write_custom_split(struct custom_split *csplit, int dsn,
+ Crystal **crystals, int n_crystals,
+ PartialityModel pmodel, int min_measurements,
+ double push_res, SymOpList *sym, int nthreads,
+ const char *outfile)
+{
+ char *tmp;
+ RefList *split;
+ Crystal *crystalsn[n_crystals];
+ int n_crystalsn = 0;
+ int i;
+
+ for ( i=0; i<n_crystals; i++ ) {
+
+ const char *fn;
+ struct event *ev;
+ char *evs;
+ char *id;
+ int dsn_crystal;
+
+ fn = crystal_get_image(crystals[i])->filename;
+ ev = crystal_get_image(crystals[i])->event;
+ evs = get_event_string(ev);
+
+ id = malloc(strlen(evs)+strlen(fn)+2);
+ if ( id == NULL ) {
+ ERROR("Failed to allocate ID\n");
+ return;
+ }
+ strcpy(id, fn);
+ strcat(id, " ");
+ strcat(id, evs);
+ dsn_crystal = find_dsn_for_id(csplit, id);
+ free(id);
+ if ( dsn == dsn_crystal ) {
+ crystalsn[n_crystalsn] = crystals[i];
+ n_crystalsn++;
+ }
+
+ }
+
+ tmp = insert_into_filename(outfile, csplit->dataset_names[dsn]);
+
+ STATUS("Writing dataset '%s' to %s (%i crystals)\n",
+ csplit->dataset_names[dsn], tmp, n_crystalsn);
+ split = merge_intensities(crystalsn, n_crystalsn, nthreads,
+ pmodel, min_measurements, push_res);
+ write_reflist_2(tmp, split, sym);
+ reflist_free(split);
+
+ write_split(crystalsn, n_crystalsn, tmp, nthreads, pmodel,
+ min_measurements, sym, push_res);
+ free(tmp);
+}
+
+
static void show_help(const char *s)
{
printf("Syntax: %s [options]\n\n", s);
@@ -82,7 +310,79 @@ static void show_help(const char *s)
" --max-adu=<n> Saturation value of detector.\n"
" --push-res=<n> Merge higher than apparent resolution cutoff.\n"
" -j <n> Run <n> analyses in parallel.\n"
-" --no-free Disable cross-validation (testing only).\n");
+" --no-free Disable cross-validation (testing only).\n"
+" --custom-split List of files for custom dataset splitting.\n");
+}
+
+
+static struct custom_split *load_custom_split(const char *filename)
+{
+ struct custom_split *csplit;
+ FILE *fh;
+ int i;
+
+ csplit = malloc(sizeof(struct custom_split));
+ if ( csplit == NULL ) return NULL;
+ csplit->n_datasets = 0;
+ csplit->n_events_total = 0;
+ csplit->dataset_names = NULL;
+ for ( i=0; i<CSPLIT_HASH_MAX; i++ ) {
+ csplit->hashes[i].n_events = 0;
+ csplit->hashes[i].events = NULL;
+ csplit->hashes[i].datasets = NULL;
+ }
+
+ fh = fopen(filename, "r");
+ if ( fh == NULL ) {
+ ERROR("Failed to open '%s'\n", filename);
+ free(csplit);
+ return NULL;
+ }
+
+ do {
+
+ char *rval;
+ char line[1024];
+ int i;
+ char *fn;
+ struct event *ev;
+ char *evs;
+ char *id;
+
+ rval = fgets(line, 1023, fh);
+ if ( rval == NULL ) break;
+
+ chomp(line);
+ notrail(line);
+
+ for ( i=strlen(line); i>0; i-- ) {
+ if ( (line[i] == ' ') || (line[i]=='\t') ) break;
+ }
+ if ( i == 0 ) {
+ ERROR("Badly formatted line '%s'\n", line);
+ return NULL;
+ }
+ i++;
+
+ fn = strndup(line, i);
+ ev = get_event_from_event_string(line);
+ evs = get_event_string(ev);
+ notrail(fn);
+ id = malloc(strlen(fn) + strlen(evs) + 2);
+ strcpy(id, fn);
+ strcat(id, " ");
+ strcat(id, evs);
+ add_to_csplit(csplit, id, line+i);
+ free(id);
+
+ } while ( 1 );
+
+ fclose(fh);
+
+ STATUS("Hash table load factor = %.2f\n",
+ (double)csplit->n_events_total / CSPLIT_HASH_MAX);
+
+ return csplit;
}
@@ -304,6 +604,8 @@ int main(int argc, char *argv[])
double push_res = 0.0;
gsl_rng *rng;
int no_free = 0;
+ char *csplit_fn = NULL;
+ struct custom_split *csplit = NULL;
/* Long options */
const struct option longopts[] = {
@@ -323,6 +625,7 @@ int main(int argc, char *argv[])
{"start-params", 1, NULL, 4},
{"push-res", 1, NULL, 5},
{"res-push", 1, NULL, 5}, /* compat */
+ {"custom-split", 1, NULL, 6},
{"no-scale", 0, &no_scale, 1},
{"no-pr", 0, &no_pr, 1},
@@ -433,6 +736,10 @@ int main(int argc, char *argv[])
push_res = push_res*1e9;
break;
+ case 6 :
+ csplit_fn = strdup(optarg);
+ break;
+
case 0 :
break;
@@ -491,6 +798,16 @@ int main(int argc, char *argv[])
STATUS("Not going to refine the unity model.\n");
}
+ /* Read the custom split list (if applicable) */
+ if ( csplit_fn != NULL ) {
+ csplit = load_custom_split(csplit_fn);
+ if ( csplit == NULL ) {
+ ERROR("Failed to load custom split list.\n");
+ return 1;
+ }
+ free(csplit_fn);
+ }
+
gsl_set_error_handler_off();
rng = gsl_rng_alloc(gsl_rng_mt19937);
@@ -674,34 +991,21 @@ int main(int argc, char *argv[])
}
/* Output results */
+ STATUS("Writing overall results to %s\n", outfile);
write_reflist_2(outfile, full, sym);
/* Output split results */
- char tmp[1024];
- RefList *split;
- Crystal *crystals1[n_crystals];
- Crystal *crystals2[n_crystals];
- int n_crystals1 = 0;
- int n_crystals2 = 0;
- for ( i=0; i<n_crystals; i++ ) {
- if ( i % 2 ) {
- crystals1[n_crystals1] = crystals[i];
- n_crystals1++;
- } else {
- crystals2[n_crystals2] = crystals[i];
- n_crystals2++;
+ write_split(crystals, n_crystals, outfile, nthreads, pmodel,
+ min_measurements, sym, push_res);
+
+ /* Output custom split results */
+ if ( csplit != NULL ) {
+ for ( i=0; i<csplit->n_datasets; i++ ) {
+ write_custom_split(csplit, i, crystals, n_crystals,
+ pmodel, min_measurements, push_res,
+ sym, nthreads, outfile);
}
}
- snprintf(tmp, 1024, "%s1", outfile);
- split = merge_intensities(crystals1, n_crystals1, nthreads,
- pmodel, min_measurements, push_res);
- write_reflist_2(tmp, split, sym);
- reflist_free(split);
- snprintf(tmp, 1024, "%s2", outfile);
- split = merge_intensities(crystals2, n_crystals2, nthreads,
- pmodel, min_measurements, push_res);
- write_reflist_2(tmp, split, sym);
- reflist_free(split);
/* Dump parameters */
FILE *fh;