From b04ea39a8bf04b12765b73da3ff9eda7f2143bde Mon Sep 17 00:00:00 2001 From: Thomas White Date: Wed, 17 Feb 2010 15:40:00 +0100 Subject: GPU: Do rotation, other fixes --- data/diffraction.cl | 44 +++++++++++++++++++++++++++++++++++++++----- src/diffraction-gpu.c | 19 ++++++++++++++----- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/data/diffraction.cl b/data/diffraction.cl index 5c75d70b..bcdaa726 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -14,11 +14,43 @@ #define IDIM (INDMAX*2 +1) +float4 quat_rot(float4 q, float4 z) +{ + float4 res; + float t01, t02, t03, t11, t12, t13, t22, t23, t33; + + t01 = z.x*z.y; + t02 = z.x*z.z; + t03 = z.x*z.w; + t11 = z.y*z.y; + t12 = z.y*z.z; + t13 = z.y*z.w; + t22 = z.z*z.z; + t23 = z.z*z.w; + t33 = z.w*z.w; + + res.x = (1.0 - 2.0 * (t22 + t33)) * q.x + + (2.0 * (t12 + t03)) * q.y + + (2.0 * (t13 - t02)) * q.z; + + res.y = (2.0 * (t12 - t03)) * q.x + + (1.0 - 2.0 * (t11 + t33)) * q.y + + (2.0 * (t01 + t23)) * q.z; + + res.z = (2.0 * (t02 + t13)) * q.x + + (2.0 * (t23 - t01)) * q.y + + (1.0 - 2.0 * (t11 + t22)) * q.z; + + return res; +} + + float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k, - float *ttp) + float *ttp, float4 z) { float rx, ry, r; float ttx, tty, tt; + float4 q; rx = ((float)x - cx)/res; ry = ((float)y - cy)/res; @@ -31,7 +63,9 @@ float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k, *ttp = tt; - return (float4)(k*sin(ttx), k*sin(tty), k-k*cos(tt), 0.0); + q = (float4)(k*sin(ttx), k*sin(tty), k-k*cos(tt), 0.0); + + return quat_rot(q, z); } @@ -88,7 +122,7 @@ float2 get_sfac(global float2 *sfacs, float16 cell, float4 q) kernel void diffraction(global float2 *diff, global float *tt, float k, int w, float cx, float cy, float res, float clen, float16 cell, - global float2 *sfacs) + global float2 *sfacs, float4 z) { float ttv; const int x = get_global_id(0); @@ -96,11 +130,11 @@ kernel void diffraction(global float2 *diff, global float *tt, float k, float f_lattice; float2 f_molecule; - float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv); + float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv, z); f_lattice = lattice_factor(cell, q); f_molecule = get_sfac(sfacs, cell, q); - diff[x+w*y] = f_molecule*f_lattice; + diff[x+w*y] = f_molecule * f_lattice; tt[x+w*y] = ttv; } diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 4172da3f..a6db1980 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -128,6 +128,7 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, size_t diff_size; float *diff_ptr; int i; + cl_float4 orientation; if ( image->molecule == NULL ) return; @@ -156,7 +157,10 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, ERROR("Couldn't get platform IDs: %i\n", err); return; } - STATUS("%i platforms\n", nplat); + if ( nplat == 0 ) { + ERROR("Couldn't find at least one platform!\n"); + return; + } prop[0] = CL_CONTEXT_PLATFORM; prop[1] = (cl_context_properties)platforms[0]; prop[2] = 0; @@ -226,18 +230,24 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, /* Calculate wavelength */ kc = 1.0/image->lambda; /* Centre value */ + /* Orientation */ + orientation[0] = image->orientation.w; + orientation[1] = image->orientation.x; + orientation[2] = image->orientation.y; + orientation[3] = image->orientation.z; + clSetKernelArg(kern, 0, sizeof(cl_mem), &diff); clSetKernelArg(kern, 1, sizeof(cl_mem), &tt); clSetKernelArg(kern, 2, sizeof(cl_float), &kc); clSetKernelArg(kern, 3, sizeof(cl_int), &image->width); clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx); clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy); - clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution); - clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len); + clSetKernelArg(kern, 6, sizeof(cl_float), &image->det.panels[0].res); + clSetKernelArg(kern, 7, sizeof(cl_float), &image->det.panels[0].clen); clSetKernelArg(kern, 8, sizeof(cl_float16), &cell); clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs); + clSetKernelArg(kern, 10, sizeof(cl_float4), &orientation); - STATUS("Running...\n"); err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL, 0, NULL, NULL); if ( err != CL_SUCCESS ) { @@ -257,7 +267,6 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, ERROR("Couldn't map tt buffer\n"); return; } - STATUS("Done!\n"); image->sfacs = calloc(image->width * image->height, sizeof(double complex)); -- cgit v1.2.3