aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-03-25 11:53:48 +0100
committerThomas White <taw@physics.org>2010-03-25 11:53:48 +0100
commitb372c5d77caa1bc9bd4c8f69de76d064f8d93ccd (patch)
treed1571f069f33730bef0d76964f9e8ce95d4bdb95
parentf93bd1a5a69ad5061a1289bf77c701c13275842f (diff)
Improved PDB parser
-rw-r--r--src/sfac.c66
-rw-r--r--src/sfac.h13
-rw-r--r--src/utils.c88
-rw-r--r--src/utils.h8
4 files changed, 153 insertions, 22 deletions
diff --git a/src/sfac.c b/src/sfac.c
index 81e381b3..3668217f 100644
--- a/src/sfac.c
+++ b/src/sfac.c
@@ -318,6 +318,8 @@ struct molecule *load_molecule()
char line[1024];
char *rval;
int i;
+ int nbits;
+ char **bits;
mol = malloc(sizeof(struct molecule));
if ( mol == NULL ) return NULL;
@@ -333,12 +335,11 @@ struct molecule *load_molecule()
do {
- char el[4];
+ char *el;
int j, r;
int done = 0;
float xf, yf, zf, occf, Bf;
double x, y, z, occ, B;
- char *coords;
rval = fgets(line, 1023, fh);
@@ -360,23 +361,49 @@ struct molecule *load_molecule()
/* Only interested in atoms */
if ( strncmp(line, "HETATM", 6) != 0 ) continue;
- /* The following crimes against programming style
- * were brought to you by Wizbit Enterprises, Inc. */
- if ( line[76] == ' ' ) {
- el[0] = line[77];
- el[1] = '\0';
- } else {
- el[0] = line[76];
- el[1] = line[77];
- el[2] = '\0';
- }
+ chomp(line);
+ nbits = assplode(line, " ", &bits, ASSPLODE_NONE);
+ if ( nbits == 0 ) continue;
+
+ if ( nbits == 11 ) {
+
+ /* Normal case- HETATM<space+>number<space+>stuff */
+ r = sscanf(bits[5], "%f", &xf);
+ r += sscanf(bits[6], "%f", &yf);
+ r += sscanf(bits[7], "%f", &zf);
+ r += sscanf(bits[8], "%f", &occf);
+ r += sscanf(bits[9], "%f", &Bf);
+ if ( r != 5 ) {
+ STATUS("PDB read failed (%i)\n", r);
+ for ( i=0; i<nbits; i++ ) free(bits[i]);
+ continue;
+ }
+ el = strdup(bits[10]);
+
+ } else if ( nbits == 10 ) {
+
+ /* Squished case- HETATMnumber<space+>stuff */
+ r = sscanf(bits[4], "%f", &xf);
+ r += sscanf(bits[5], "%f", &yf);
+ r += sscanf(bits[6], "%f", &zf);
+ r += sscanf(bits[7], "%f", &occf);
+ r += sscanf(bits[8], "%f", &Bf);
+ if ( r != 5 ) {
+ STATUS("PDB read failed (%i)\n", r);
+ for ( i=0; i<nbits; i++ ) free(bits[i]);
+ continue;
+ }
+ el = strdup(bits[9]);
- coords = line + 29;
- r = sscanf(coords, "%f %f %f %f %f", &xf, &yf, &zf, &occf, &Bf);
- if ( r != 5 ) {
- STATUS("I didn't understand a line in the PDB file.\n");
+ } else {
+ STATUS("Unrecognised line in the PDB file (%i)\n",
+ nbits);
+ STATUS("'%s'\n", line);
+ for ( i=0; i<nbits; i++ ) free(bits[i]);
continue;
}
+ for ( i=0; i<nbits; i++ ) free(bits[i]);
+
/* Promote to double precision */
x = xf; y = yf; z = zf; occ = occf; B = Bf;
@@ -397,6 +424,10 @@ struct molecule *load_molecule()
spec->occ[n] = occ;
spec->B[n] = B*1.0e-20; /* Convert to m^2 */
mol->species[j]->n_atoms++;
+ if ( mol->species[j]->n_atoms == MAX_ATOMS ) {
+ ERROR("Too many atoms of type '%s'!\n", el);
+ exit(1);
+ }
done = 1;
@@ -409,7 +440,7 @@ struct molecule *load_molecule()
spec = malloc(sizeof(struct mol_species));
- memcpy(spec->species, el, 4);
+ memcpy(spec->species, el, 1+strlen(el));
spec->x[0] = x*1.0e-10; /* Convert to m */
spec->y[0] = y*1.0e-10;
spec->z[0] = z*1.0e-10;
@@ -422,6 +453,7 @@ struct molecule *load_molecule()
}
+ free(el);
} while ( rval != NULL );
diff --git a/src/sfac.h b/src/sfac.h
index 000b859f..fea97022 100644
--- a/src/sfac.h
+++ b/src/sfac.h
@@ -21,16 +21,19 @@
#include "cell.h"
+#define MAX_ATOMS (128*1024)
+
+
struct mol_species
{
char species[4]; /* Species name */
int n_atoms; /* Number of atoms of this species */
- double x[32*1024];
- double y[32*1024];
- double z[32*1024];
- double occ[32*1024];
- double B[32*1024];
+ double x[MAX_ATOMS];
+ double y[MAX_ATOMS];
+ double z[MAX_ATOMS];
+ double occ[MAX_ATOMS];
+ double B[MAX_ATOMS];
};
diff --git a/src/utils.c b/src/utils.c
index 64423c2f..e3b8b667 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -195,3 +195,91 @@ struct rvec quat_rot(struct rvec q, struct quaternion z)
return res;
}
+
+
+/* Return non-zero if c is in delims */
+static int assplode_isdelim(const char c, const char *delims)
+{
+ size_t i;
+ for ( i=0; i<strlen(delims); i++ ) {
+ if ( c == delims[i] ) return 1;
+ }
+ return 0;
+}
+
+
+static int assplode_extract(char ***pbits, int n, size_t n_captured,
+ size_t start, const char *a)
+{
+ char **bits = *pbits;
+ bits = realloc(bits, sizeof(char *)*(n+1));
+ bits[n] = malloc(n_captured+1);
+ memcpy(bits[n], a+start, n_captured);
+ bits[n][n_captured] = '\0';
+ n++;
+ *pbits = bits;
+ return n;
+}
+
+
+/* Split the string 'a' using 'delims' as a zero-terminated list of
+ * deliminators.
+ * Store each segment in bits[0...n] where n is the number of segments and is
+ * the return value. pbits = &bits
+ * Each segment needs to be freed with free() when finished with.
+ * The array of bits also needs to be freed with free() when finished with,
+ * unless n=0 in which case bits==NULL
+ */
+int assplode(const char *a, const char *delims, char ***pbits,
+ AssplodeFlag flags)
+{
+ size_t i, start, n_captured;
+ int n, last_was_delim;
+ char **bits;
+
+ n = 0;
+ i = 0;
+ n_captured = 0;
+ start = 0;
+ last_was_delim = 0;
+ bits = NULL;
+ while ( i < strlen(a) ) {
+
+ if ( assplode_isdelim(a[i], delims) ) {
+
+ if ( n_captured > 0 ) {
+ /* This is a deliminator after a sequence of
+ * non-deliminator chars */
+ n = assplode_extract(&bits, n, n_captured,
+ start, a);
+ }
+
+ n_captured = 0;
+ if ( (flags & ASSPLODE_DUPS) && last_was_delim ) {
+ n = assplode_extract(&bits, n, 0, start, a);
+ }
+ last_was_delim = 1;
+
+ } else {
+
+ if ( n_captured == 0 ) {
+ /* No characters currently found, so this is the
+ * start */
+ start = i;
+ }
+ n_captured++;
+ last_was_delim = 0;
+
+ }
+
+ i++;
+
+ }
+ /* Left over characters at the end? */
+ if ( n_captured > 0 ) {
+ n = assplode_extract(&bits, n, n_captured, start, a);
+ }
+
+ *pbits = bits;
+ return n;
+}
diff --git a/src/utils.h b/src/utils.h
index 7d8274bd..441fa51d 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -67,6 +67,14 @@ extern struct rvec quat_rot(struct rvec q, struct quaternion z);
extern size_t skipspace(const char *s);
extern void chomp(char *s);
+
+typedef enum {
+ ASSPLODE_NONE = 0,
+ ASSPLODE_DUPS = 1<<0
+} AssplodeFlag;
+extern int assplode(const char *a, const char *delims, char ***pbits,
+ AssplodeFlag flags);
+
extern void progress_bar(int val, int total, const char *text);
extern int poisson_noise(double expected);