aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-12-03 16:43:19 +0100
committerThomas White <taw@physics.org>2012-02-22 15:27:07 +0100
commit9aafea1bdb0255ad7d2491d96174ac3407a6ca69 (patch)
tree69bb5865124beea2285917fa4eaa62e092896525
parentc25120f4b71da8b82476c8a14b1617c8f7b72d57 (diff)
Use symmetry when simulating on the GPU
-rw-r--r--data/diffraction.cl116
-rw-r--r--src/cl-utils.c12
-rw-r--r--src/cl-utils.h3
-rw-r--r--src/diffraction-gpu.c56
-rw-r--r--src/diffraction-gpu.h6
-rw-r--r--src/indexamajig.c2
-rw-r--r--src/pattern_sim.c3
7 files changed, 170 insertions, 28 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl
index ff07edff..9eae1b4c 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -70,21 +70,12 @@ float lattice_factor(float16 cell, float4 q,
}
-float get_intensity(global float *intensities, float16 cell, float4 q)
+float lookup_intensity(global float *intensities,
+ signed int h, signed int k, signed int l)
{
- float hf, kf, lf;
- int h, k, l;
int idx;
- hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */
- kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */
- lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */
-
- h = round(hf);
- k = round(kf);
- l = round(lf);
-
- /* Return a silly value if indices are out of range */
+ /* Out of range? */
if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
return 0.0;
}
@@ -93,14 +84,106 @@ float get_intensity(global float *intensities, float16 cell, float4 q)
k = (k>=0) ? k : k+IDIM;
l = (l>=0) ? l : l+IDIM;
- if ( (h>=IDIM) || (k>=IDIM) || (l>=IDIM) ) return 0.0;
-
idx = h + (IDIM*k) + (IDIM*IDIM*l);
return intensities[idx];
}
+float lookup_flagged_intensity(global float *intensities, global float *flags,
+ signed int h, signed int k, signed int l)
+{
+ return lookup_intensity(intensities, h, k, l)
+ * lookup_intensity(flags, h, k, l);
+}
+
+
+float molecule_factor(global float *intensities, global float *flags,
+ float16 cell, float4 q)
+{
+ float hf, kf, lf;
+ int h, k, l;
+ float val = 0.0;
+ signed int i;
+
+ #ifdef FLAT_INTENSITIES
+ return 1.0e5;
+ #else
+
+ hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */
+ kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */
+ lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */
+
+ h = round(hf);
+ k = round(kf);
+ l = round(lf);
+ i = -h-k;
+
+ #ifdef PG1
+ val += lookup_flagged_intensity(intensities, flags, h, k, l);
+ #endif /* PG1 */
+
+ #ifdef PG1BAR
+ val += lookup_flagged_intensity(intensities, flags, h, k, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
+ #endif /* PG1BAR */
+
+ #ifdef PG6
+ val += lookup_flagged_intensity(intensities, flags, h, k, l);
+ val += lookup_flagged_intensity(intensities, flags, i, h, l);
+ val += lookup_flagged_intensity(intensities, flags, k, i, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
+ #endif /* PG6 */
+
+ #ifdef PG6M
+ val += lookup_flagged_intensity(intensities, flags, h, k, l);
+ val += lookup_flagged_intensity(intensities, flags, i, h, l);
+ val += lookup_flagged_intensity(intensities, flags, k, i, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -h, -l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -i, -l);
+ val += lookup_flagged_intensity(intensities, flags, h, k, -l);
+ val += lookup_flagged_intensity(intensities, flags, i, h, -l);
+ val += lookup_flagged_intensity(intensities, flags, k, i, -l);
+ #endif /* PG6M */
+
+ #ifdef PG6MMM
+ val += lookup_flagged_intensity(intensities, flags, h, k, l);
+ val += lookup_flagged_intensity(intensities, flags, i, h, l);
+ val += lookup_flagged_intensity(intensities, flags, k, i, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -h, l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -i, l);
+ val += lookup_flagged_intensity(intensities, flags, k, h, -l);
+ val += lookup_flagged_intensity(intensities, flags, h, i, -l);
+ val += lookup_flagged_intensity(intensities, flags, i, k, -l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -h, -l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -i, -l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -k, -l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -k, -l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -h, -l);
+ val += lookup_flagged_intensity(intensities, flags, -k, i, -l);
+ val += lookup_flagged_intensity(intensities, flags, h, k, -l);
+ val += lookup_flagged_intensity(intensities, flags, i, h, -l);
+ val += lookup_flagged_intensity(intensities, flags, k, i, -l);
+ val += lookup_flagged_intensity(intensities, flags, -k, -h, l);
+ val += lookup_flagged_intensity(intensities, flags, -h, -i, l);
+ val += lookup_flagged_intensity(intensities, flags, -i, -k, l);
+ val += lookup_flagged_intensity(intensities, flags, k, h, l);
+ val += lookup_flagged_intensity(intensities, flags, h, i, l);
+ val += lookup_flagged_intensity(intensities, flags, i, k, l);
+ #endif /* PG6MMM */
+
+ return val;
+ #endif /* FLAT_INTENSITIIES */
+}
+
+
kernel void diffraction(global float *diff, global float *tt, float klow,
int w, float cx, float cy,
float res, float clen, float16 cell,
@@ -109,7 +192,8 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
float kstep,
read_only image2d_t func_a,
read_only image2d_t func_b,
- read_only image2d_t func_c)
+ read_only image2d_t func_c,
+ global float *flags)
{
float ttv;
const int x = get_global_id(0) + (xmin*sampling);
@@ -128,7 +212,7 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
/* Calculate value */
q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling);
f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
- I_molecule = get_intensity(intensities, cell, q);
+ I_molecule = molecule_factor(intensities, flags, cell, q);
I_lattice = pow(f_lattice, 2.0f);
/* Write the value to local memory */
diff --git a/src/cl-utils.c b/src/cl-utils.c
index 1b7b9811..0c6dce27 100644
--- a/src/cl-utils.c
+++ b/src/cl-utils.c
@@ -146,13 +146,14 @@ static void show_build_log(cl_program prog, cl_device_id dev)
cl_program load_program(const char *filename, cl_context ctx,
- cl_device_id dev, cl_int *err)
+ cl_device_id dev, cl_int *err, const char *extra_cflags)
{
FILE *fh;
cl_program prog;
char *source;
size_t len;
cl_int r;
+ char cflags[1024] = "";
fh = fopen(filename, "r");
if ( fh == NULL ) {
@@ -172,9 +173,12 @@ cl_program load_program(const char *filename, cl_context ctx,
return 0;
}
- r = clBuildProgram(prog, 0, NULL,
- "-Werror -I"DATADIR"/crystfel/ -cl-no-signed-zeros",
- NULL, NULL);
+ snprintf(cflags, 1023, "-Werror ");
+ strncat(cflags, "-I"DATADIR"/crystfel/ ", 1023-strlen(cflags));
+ strncat(cflags, "-cl-no-signed-zeros ", 1023-strlen(cflags));
+ strncat(cflags, extra_cflags, 1023-strlen(cflags));
+
+ r = clBuildProgram(prog, 0, NULL, cflags, NULL, NULL);
if ( r != CL_SUCCESS ) {
ERROR("Couldn't build program '%s'\n", filename);
show_build_log(prog, dev);
diff --git a/src/cl-utils.h b/src/cl-utils.h
index 790a149a..21a7ecd2 100644
--- a/src/cl-utils.h
+++ b/src/cl-utils.h
@@ -20,7 +20,8 @@
extern const char *clError(cl_int err);
extern cl_device_id get_cl_dev(cl_context ctx, int n);
extern cl_program load_program(const char *filename, cl_context ctx,
- cl_device_id dev, cl_int *err);
+ cl_device_id dev, cl_int *err,
+ const char *extra_cflags);
#endif /* CLUTILS_H */
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 2d8eda95..dd382ede 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -39,6 +39,7 @@ struct gpu_context
cl_program prog;
cl_kernel kern;
cl_mem intensities;
+ cl_mem flags;
cl_mem tt;
size_t tt_size;
@@ -219,6 +220,13 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
+ /* Flag array */
+ clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set flag array: %s\n", clError(err));
+ return;
+ }
+
/* Iterate over panels */
event = malloc(image->det->n_panels * sizeof(cl_event));
for ( p=0; p<image->det->n_panels; p++ ) {
@@ -331,7 +339,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* Setup the OpenCL stuff, create buffers, load the structure factor table */
struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities, unsigned char *flags,
- int dev_num)
+ const char *sym, int dev_num)
{
struct gpu_context *gctx;
cl_uint nplat;
@@ -341,8 +349,11 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
cl_device_id dev;
size_t intensities_size;
float *intensities_ptr;
+ size_t flags_size;
+ float *flags_ptr;
size_t maxwgsize;
int i;
+ char cflags[512] = "";
STATUS("Setting up GPU...\n");
@@ -396,8 +407,9 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
} else {
for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- intensities_ptr[i] = 1e10;
+ intensities_ptr[i] = 1e5;
}
+ strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags));
}
gctx->intensities = clCreateBuffer(gctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
@@ -409,6 +421,44 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
free(intensities_ptr);
+ if ( strcmp(sym, "1") == 0 ) {
+ strncat(cflags, "-DPG1 ", 511-strlen(cflags));
+ } else if ( strcmp(sym, "-1") == 0 ) {
+ strncat(cflags, "-DPG1BAR ", 511-strlen(cflags));
+ } else if ( strcmp(sym, "6/mmm") == 0 ) {
+ strncat(cflags, "-DPG6MMM ", 511-strlen(cflags));
+ } else if ( strcmp(sym, "6") == 0 ) {
+ strncat(cflags, "-DPG6 ", 511-strlen(cflags));
+ } else if ( strcmp(sym, "6/m") == 0 ) {
+ strncat(cflags, "-DPG6M ", 511-strlen(cflags));
+ } else {
+ ERROR("Sorry! Point group '%s' is not currently supported"
+ " on the GPU. I'm using '1' instead.\n", sym);
+ strncat(cflags, "-DPG1 ", 511-strlen(cflags));
+ }
+
+ /* Create a flag array */
+ flags_size = IDIM*IDIM*IDIM*sizeof(cl_float);
+ flags_ptr = malloc(flags_size);
+ if ( flags != NULL ) {
+ for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+ flags_ptr[i] = flags[i];
+ }
+ } else {
+ for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+ flags_ptr[i] = 1.0;
+ }
+ }
+ gctx->flags = clCreateBuffer(gctx->ctx,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ flags_size, flags_ptr, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate flag buffer\n");
+ free(gctx);
+ return NULL;
+ }
+ free(flags_ptr);
+
gctx->tt_size = image->width*image->height*sizeof(cl_float);
gctx->tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, gctx->tt_size,
NULL, &err);
@@ -419,7 +469,7 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
}
gctx->prog = load_program(DATADIR"/crystfel/diffraction.cl", gctx->ctx,
- dev, &err);
+ dev, &err, cflags);
if ( err != CL_SUCCESS ) {
free(gctx);
return NULL;
diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h
index 7e6048b3..02115317 100644
--- a/src/diffraction-gpu.h
+++ b/src/diffraction-gpu.h
@@ -27,7 +27,8 @@ extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, UnitCell *ucell);
extern struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities,
- const unsigned char *flags, int dev_num);
+ const unsigned char *flags,
+ const char *sym, int dev_num);
extern void cleanup_gpu(struct gpu_context *gctx);
#else
@@ -41,7 +42,8 @@ static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
static struct gpu_context *setup_gpu(int no_sfac, struct image *image,
const double *intensities,
- const unsigned char *flags, int dev_num)
+ const unsigned char *flags,
+ const char *sym, int dev_num)
{
return NULL;
}
diff --git a/src/indexamajig.c b/src/indexamajig.c
index e99b1bd3..2fb6dd85 100644
--- a/src/indexamajig.c
+++ b/src/indexamajig.c
@@ -286,7 +286,7 @@ static void simulate_and_write(struct image *simage, struct gpu_context **gctx,
* Unfortunately, setup has to go here since until now we don't know
* enough about the situation. */
if ( (gctx != NULL) && (*gctx == NULL) ) {
- *gctx = setup_gpu(0, simage, intensities, flags, gpu_dev);
+ *gctx = setup_gpu(0, simage, intensities, flags, sym, gpu_dev);
}
if ( (gctx != NULL) && (*gctx != NULL) ) {
diff --git a/src/pattern_sim.c b/src/pattern_sim.c
index 41dd3a26..92a0765a 100644
--- a/src/pattern_sim.c
+++ b/src/pattern_sim.c
@@ -539,7 +539,8 @@ int main(int argc, char *argv[])
if ( config_gpu ) {
if ( gctx == NULL ) {
gctx = setup_gpu(config_nosfac, &image,
- intensities, flags, gpu_dev);
+ intensities, flags, sym,
+ gpu_dev);
}
get_diffraction_gpu(gctx, &image, na, nb, nc, cell);
} else {