2 files changed, 44 insertions, 15 deletions
diff --git a/doc/man/partialator.1 b/doc/man/partialator.1
index 817fd7ed..6fa67593 100644
--- a/doc/man/partialator.1
+++ b/doc/man/partialator.1
@@ -251,7 +251,7 @@ partialator -i \fImy.stream \fR-o \fImy.hkl\fR -y \fImypointgroup \fB--model=xsp
 .SH CUSTOM DATASET SPLITTING
 When performing a time-resolved experiment (for example), it is preferable to ensure that the data for all time points has been processed identically.  Rather than processing each time point independently with separate runs of partialator, it is better to process them all together and do the splitting into time points just before the final output.  Consider, for example, the case of simple scaling (without a B factor): when merging independently, the resulting datasets would probably end up with different overall scaling factors.  When comparing the results, you would need to take this difference into account.  In practice, most programs can do that job easily, but what about if a B factor is included?  And what if partialities are included - how unique is the solution?
 
-With \fBpartialator --custom-split\fR, you can provide a separate text file containing a list of filenames, event numbers and \fIdataset names\fR, one event (detector frame) per line, with the fields separated by any number of spaces, commas or tabs.  For each unique \fIdataset name\fR, a separate reflection list will be output.  All crystals will be refined together, but they will be merged according to the dataset names you give.  The parameters (scaling factors, partialities etc) determined during the joint refinement will be applied.  For each dataset, a separate pair of split half-datasets will also be written, allowing you to calculate figures of merit such as Rsplit and CC1/2 for each one.
+With \fBpartialator --custom-split\fR, you can provide a separate text file containing a list of filenames, event numbers and \fIdataset names\fR, one event (detector frame) per line, with each field separated by exactly one space.  For each unique \fIdataset name\fR, a separate reflection list will be output.  All crystals will be refined together, but they will be merged according to the dataset names you give.  The parameters (scaling factors, partialities etc) determined during the joint refinement will be applied.  For each dataset, a separate pair of split half-datasets will also be written, allowing you to calculate figures of merit such as Rsplit and CC1/2 for each one.
 
 If the overall output filename (given with \fB-o\fR or \fB--output\fR) were \fBmerged.hkl\fR, then a dataset named \fIdataset\fR would be written to \fBmerged-\fIdataset\fB.hkl\fR.  The corresponding half-datasets would be written to \fBmerged-\fIdataset\fB.hkl1\fR and \fBmerged-\fIdataset\fB.hkl2\fR.
 
diff --git a/src/partialator.c b/src/partialator.c
index 20d46abb..c6f1c085 100644
--- a/src/partialator.c
+++ b/src/partialator.c
@@ -451,11 +451,22 @@ static void check_csplit(Crystal **crystals, int n_crystals,
 }
 
 
+static int looks_like_event(const char *str)
+{
+	if ( strstr(str, "//") == NULL ) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
+
 static struct custom_split *load_custom_split(const char *filename)
 {
 	struct custom_split *csplit;
 	FILE *fh;
 	int i;
+	int lno = 0;
 
 	csplit = malloc(sizeof(struct custom_split));
 	if ( csplit == NULL ) return NULL;
@@ -483,31 +494,49 @@ static struct custom_split *load_custom_split(const char *filename)
 		char *evs;
 		char *ds;
 		char *id;
-		int n;
-		char **bits;
+		size_t n, ev_start, ds_start;
 
+		lno++;
 		rval = fgets(line, 1023, fh);
 		if ( rval == NULL ) break;
 
 		chomp(line);
 		notrail(line);
-		n = assplode(line, " \t,", &bits, ASSPLODE_NONE);
-		if ( n < 2 ) {
-			ERROR("Badly formatted line '%s'\n", line);
+
+		/* Look for start of dataset */
+		n = strlen(line);
+		while ( line[n] != ' ' && n > 0 ) n--;
+		if ( n == 0 ) {
+			ERROR("Custom split file line %i has too few (only 1) "
+			      "fields.\n", lno);
+			free(csplit);
 			return NULL;
 		}
+		ds_start = n+1;
+		ds = strdup(&line[ds_start]);
+
+		n--;
+		while ( line[n] != ' ' && n > 0 ) n--;
+		if ( n == 0 ) {
+			ev_start = 0;
+		} else {
+			ev_start = n+1;
+		}
+
+		evs = strndup(&line[ev_start], ds_start-ev_start-1);
+		if ( !looks_like_event(evs) || (ev_start == 0) ) {
+			/* It doesn't look like an event ID - assume it's part
+			 * of the filename (which contains spaces) */
+			ev_start = 0;
+		}
 
-		if ( n == 3 ) {
-			/* Filename, event, dataset */
-			fn = bits[0];
-			evs = bits[1];
-			ds = bits[2];
+		if ( ev_start > 0 ) {
+			evs = strndup(&line[ev_start], ds_start-ev_start-1);
+			fn = strndup(line, ev_start-1);
 		} else {
-			fn = bits[0];
-			evs = strdup("(none)");
-			ds = bits[1];
+			evs = strdup("//");
+			fn = strndup(line, ds_start-1);
 		}
-		free(bits);
 
 		id = malloc(strlen(fn) + strlen(evs) + 2);
 		strcpy(id, fn);