aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2021-11-25 15:50:36 +0100
committerThomas White <taw@physics.org>2021-11-25 15:50:36 +0100
commitf5d198b468d3e2c1bdece65ff5106c1989aee3c0 (patch)
tree51170cde751b812fd4e2403ff8d74a016b022be8
parent652872f38e4f1738366b16c350f27efd8cf7c99d (diff)
partialator: Handle spaces in custom split file
The csplit format is ambiguous when the filenames contain spaces. To make things a bit clearer, the file now requires the fields to be separated by exactly one space rather than any number of tabs/spaces. Fixes: https://gitlab.desy.de/thomas.white/crystfel/-/issues/55
-rw-r--r--doc/man/partialator.12
-rw-r--r--src/partialator.c57
2 files changed, 44 insertions, 15 deletions
diff --git a/doc/man/partialator.1 b/doc/man/partialator.1
index 817fd7ed..6fa67593 100644
--- a/doc/man/partialator.1
+++ b/doc/man/partialator.1
@@ -251,7 +251,7 @@ partialator -i \fImy.stream \fR-o \fImy.hkl\fR -y \fImypointgroup \fB--model=xsp
.SH CUSTOM DATASET SPLITTING
When performing a time-resolved experiment (for example), it is preferable to ensure that the data for all time points has been processed identically. Rather than processing each time point independently with separate runs of partialator, it is better to process them all together and do the splitting into time points just before the final output. Consider, for example, the case of simple scaling (without a B factor): when merging independently, the resulting datasets would probably end up with different overall scaling factors. When comparing the results, you would need to take this difference into account. In practice, most programs can do that job easily, but what about if a B factor is included? And what if partialities are included - how unique is the solution?
-With \fBpartialator --custom-split\fR, you can provide a separate text file containing a list of filenames, event numbers and \fIdataset names\fR, one event (detector frame) per line, with the fields separated by any number of spaces, commas or tabs. For each unique \fIdataset name\fR, a separate reflection list will be output. All crystals will be refined together, but they will be merged according to the dataset names you give. The parameters (scaling factors, partialities etc) determined during the joint refinement will be applied. For each dataset, a separate pair of split half-datasets will also be written, allowing you to calculate figures of merit such as Rsplit and CC1/2 for each one.
+With \fBpartialator --custom-split\fR, you can provide a separate text file containing a list of filenames, event numbers and \fIdataset names\fR, one event (detector frame) per line, with each field separated by exactly one space. For each unique \fIdataset name\fR, a separate reflection list will be output. All crystals will be refined together, but they will be merged according to the dataset names you give. The parameters (scaling factors, partialities etc) determined during the joint refinement will be applied. For each dataset, a separate pair of split half-datasets will also be written, allowing you to calculate figures of merit such as Rsplit and CC1/2 for each one.
If the overall output filename (given with \fB-o\fR or \fB--output\fR) were \fBmerged.hkl\fR, then a dataset named \fIdataset\fR would be written to \fBmerged-\fIdataset\fB.hkl\fR. The corresponding half-datasets would be written to \fBmerged-\fIdataset\fB.hkl1\fR and \fBmerged-\fIdataset\fB.hkl2\fR.
diff --git a/src/partialator.c b/src/partialator.c
index 20d46abb..c6f1c085 100644
--- a/src/partialator.c
+++ b/src/partialator.c
@@ -451,11 +451,22 @@ static void check_csplit(Crystal **crystals, int n_crystals,
}
+static int looks_like_event(const char *str)
+{
+ if ( strstr(str, "//") == NULL ) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+
static struct custom_split *load_custom_split(const char *filename)
{
struct custom_split *csplit;
FILE *fh;
int i;
+ int lno = 0;
csplit = malloc(sizeof(struct custom_split));
if ( csplit == NULL ) return NULL;
@@ -483,31 +494,49 @@ static struct custom_split *load_custom_split(const char *filename)
char *evs;
char *ds;
char *id;
- int n;
- char **bits;
+ size_t n, ev_start, ds_start;
+ lno++;
rval = fgets(line, 1023, fh);
if ( rval == NULL ) break;
chomp(line);
notrail(line);
- n = assplode(line, " \t,", &bits, ASSPLODE_NONE);
- if ( n < 2 ) {
- ERROR("Badly formatted line '%s'\n", line);
+
+ /* Look for start of dataset */
+ n = strlen(line);
+ while ( line[n] != ' ' && n > 0 ) n--;
+ if ( n == 0 ) {
+ ERROR("Custom split file line %i has too few (only 1) "
+ "fields.\n", lno);
+ free(csplit);
return NULL;
}
+ ds_start = n+1;
+ ds = strdup(&line[ds_start]);
+
+ n--;
+ while ( line[n] != ' ' && n > 0 ) n--;
+ if ( n == 0 ) {
+ ev_start = 0;
+ } else {
+ ev_start = n+1;
+ }
+
+ evs = strndup(&line[ev_start], ds_start-ev_start-1);
+ if ( !looks_like_event(evs) || (ev_start == 0) ) {
+ /* It doesn't look like an event ID - assume it's part
+ * of the filename (which contains spaces) */
+ ev_start = 0;
+ }
- if ( n == 3 ) {
- /* Filename, event, dataset */
- fn = bits[0];
- evs = bits[1];
- ds = bits[2];
+ if ( ev_start > 0 ) {
+ evs = strndup(&line[ev_start], ds_start-ev_start-1);
+ fn = strndup(line, ev_start-1);
} else {
- fn = bits[0];
- evs = strdup("(none)");
- ds = bits[1];
+ evs = strdup("//");
+ fn = strndup(line, ds_start-1);
}
- free(bits);
id = malloc(strlen(fn) + strlen(evs) + 2);
strcpy(id, fn);