aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2011-03-02 18:40:28 +0100
committerThomas White <taw@physics.org>2012-02-22 15:27:16 +0100
commitc708b2162f76a228235983f183f6250dc68522c4 (patch)
tree223e0f62003f7529f073ba35001c1ec4712f668c
parent356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff)
Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)
-rw-r--r--config.h.in7
-rwxr-xr-xconfigure57
-rw-r--r--configure.ac37
-rw-r--r--data/diffraction.cl49
-rw-r--r--src/cl-utils.c10
-rw-r--r--src/detector.c41
-rw-r--r--src/diffraction-gpu.c208
7 files changed, 234 insertions, 175 deletions
diff --git a/config.h.in b/config.h.in
index 80cbca26..48575a27 100644
--- a/config.h.in
+++ b/config.h.in
@@ -18,6 +18,9 @@
/* Define to 1 if Cairo is available */
#undef HAVE_CAIRO
+/* Define to 1 if CL/cl.h should be used */
+#undef HAVE_CL_CL_H
+
/* Define to 1 if you have the declaration of `strndup', and to 0 if you
don't. */
#undef HAVE_DECL_STRNDUP
@@ -157,10 +160,6 @@
/* Define if you have the 'wchar_t' type. */
#undef HAVE_WCHAR_T
-/* Define to 1 if you have the
- </System/Library/Frameworks/OpenCL.framework/Headers/cl.h> header file. */
-#undef HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H
-
/* Define to a substitute value for mmap()'s MAP_ANONYMOUS flag. */
#undef MAP_ANONYMOUS
diff --git a/configure b/configure
index e30aa7c8..c060e2de 100755
--- a/configure
+++ b/configure
@@ -809,7 +809,7 @@ enable_silent_rules
with_hdf5
with_gsl
enable_gsl_fudge
-enable_opencl
+with_opencl
enable_gtk
enable_gtktest
enable_png
@@ -1464,7 +1464,6 @@ Optional Features:
--disable-silent-rules verbose build output (undo: `make V=0')
--enable-gsl-fudge Use this option if you get lots of linker errors
concerning GSL and CBLAS
- --enable-opencl Enable the use of OpenCL
--disable-gtk Disable GTK+/GLib
--disable-gtktest do not try to compile and run a test GTK+ program
--disable-png Disable the use of libPNG
@@ -1477,6 +1476,7 @@ Optional Packages:
--with-hdf5 specify location of HDF5 library
--with-gsl specify location of GSL (instead of using
pkg-config)
+ --with-opencl Use OpenCL
--with-libtiff specify location of libTIFF library
Some influential environment variables:
@@ -6442,37 +6442,56 @@ $as_echo "no" >&6; }
fi
-# Check whether --enable-opencl was given.
-if test "${enable_opencl+set}" = set; then :
- enableval=$enable_opencl;
+
+
+# Check whether --with-opencl was given.
+if test "${with_opencl+set}" = set; then :
+ withval=$with_opencl;
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use OpenCL" >&5
$as_echo_n "checking whether to use OpenCL... " >&6; }
-if test "x$enable_opencl" == "xyes"; then :
+if test "x$with_opencl" == "xyes"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
- for ac_header in /System/Library/Frameworks/OpenCL.framework/Headers/cl.h
-do :
- ac_fn_c_check_header_mongrel "$LINENO" "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" "ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" "$ac_includes_default"
-if test "x$ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" = x""yes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H 1
-_ACEOF
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking cl.h" >&5
+$as_echo_n "checking cl.h... " >&6; }
+ if test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: /System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&5
+$as_echo "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&6; }
OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
OPENCL_LIBS="-framework OpenCL"
-else
+fi
+ if test -f /usr/local/cuda/cuda/include/CL/cl.h; then :
- OPENCL_CFLAGS="-I/usr/include/CL"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/cuda/include/CL/cl.h" >&5
+$as_echo "/usr/local/cuda/cuda/include/CL/cl.h" >&6; }
+ OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
fi
+ if test -f /usr/local/cuda/include/CL/cl.h; then :
-done
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/include/CL/cl.h" >&5
+$as_echo "/usr/local/cuda/include/CL/cl.h" >&6; }
+ OPENCL_CFLAGS="-I/usr/local/cuda/include"
+ OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found, assuming /usr/include/CL/cl.h" >&5
+$as_echo "not found, assuming /usr/include/CL/cl.h" >&6; }
+ OPENCL_CFLAGS=""
+ OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
+
+fi
$as_echo "#define HAVE_OPENCL 1" >>confdefs.h
@@ -6492,8 +6511,14 @@ else
HAVE_OPENCL_FALSE=
fi
+if test "x$cl_cl_h" == "xtrue"; then :
+$as_echo "#define HAVE_CL_CL_H 1" >>confdefs.h
+
+
+fi
+
# Check whether --enable-gtk was given.
if test "${enable_gtk+set}" = set; then :
enableval=$enable_gtk;
diff --git a/configure.ac b/configure.ac
index f7eac7e6..3674d190 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,28 +53,49 @@ AS_IF([test "x$enable_gsl_fudge" == "xyes"],
AC_MSG_RESULT([no])
])
-AC_ARG_ENABLE(opencl, AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL]))
+
+AC_ARG_WITH(opencl, AS_HELP_STRING([--with-opencl], [Use OpenCL]))
AC_MSG_CHECKING([whether to use OpenCL])
-AS_IF([test "x$enable_opencl" == "xyes"],
+AS_IF([test "x$with_opencl" == "xyes"],
[
AC_MSG_RESULT([yes])
- AC_CHECK_HEADERS([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
+
+ AC_MSG_CHECKING([cl.h])
+ AS_IF([test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
[
+ AC_MSG_RESULT([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h])
OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
OPENCL_LIBS="-framework OpenCL"
- ],
+ ])
+ AS_IF([test -f /usr/local/cuda/cuda/include/CL/cl.h],
[
- OPENCL_CFLAGS="-I/usr/include/CL"
+ AC_MSG_RESULT([/usr/local/cuda/cuda/include/CL/cl.h])
+ OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
+ ])
+ AS_IF([test -f /usr/local/cuda/include/CL/cl.h],
+ [
+ AC_MSG_RESULT([/usr/local/cuda/include/CL/cl.h])
+ OPENCL_CFLAGS="-I/usr/local/cuda/include"
+ OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
+ ], [
+ AC_MSG_RESULT([not found, assuming /usr/include/CL/cl.h])
+ OPENCL_CFLAGS=""
+ OPENCL_LIBS="-lOpenCL"
+ cl_cl_h=true
])
AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available])
have_opencl=true
-],
-[
+], [
AC_MSG_RESULT([no])
])
AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue)
-
+AS_IF([test "x$cl_cl_h" == "xtrue"],
+[
+ AC_DEFINE([HAVE_CL_CL_H], [1], [Define to 1 if CL/cl.h should be used])
+])
AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [Disable GTK+/GLib]))
havegtk=false
diff --git a/data/diffraction.cl b/data/diffraction.cl
index 9eae1b4c..dbbad265 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -24,15 +24,20 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT
| CLK_FILTER_LINEAR;
-float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
- float *ttp, int sampling)
+float4 get_q(int fs, int ss, float res, float clen, float k,
+ float *ttp, float corner_x, float corner_y,
+ float fsx, float fsy, float ssx, float ssy)
{
float rx, ry, r;
float az, tt;
float4 q;
+ float xs, ys;
- rx = ((float)x - sampling*cx)/(res*sampling);
- ry = ((float)y - sampling*cy)/(res*sampling);
+ xs = fs*fsx + ss*ssx;
+ ys = fs*fsy + ss*ssy;
+
+ rx = (xs + corner_x) / res;
+ ry = (ys + corner_y) / res;
r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f));
@@ -185,19 +190,19 @@ float molecule_factor(global float *intensities, global float *flags,
kernel void diffraction(global float *diff, global float *tt, float klow,
- int w, float cx, float cy,
+ int w, float corner_x, float corner_y,
float res, float clen, float16 cell,
global float *intensities,
- int xmin, int ymin, int sampling, local float *tmp,
+ int min_fs, int min_ss, int sampling, local float *tmp,
float kstep,
read_only image2d_t func_a,
read_only image2d_t func_b,
read_only image2d_t func_c,
- global float *flags)
+ global float *flags,
+ float fsx, float fsy, float ssx, float ssy)
{
float ttv;
- const int x = get_global_id(0) + (xmin*sampling);
- const int y = get_global_id(1) + (ymin*sampling);
+ float fs, ss;
float f_lattice, I_lattice;
float I_molecule;
float4 q;
@@ -205,12 +210,21 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
const int ly = get_local_id(1);
const int lb = get_local_id(2);
float k = klow + kstep * get_local_id(2);
- const int ax = x / sampling;
- const int ay = y / sampling;
+ const int afs = fs / sampling;
+ const int ass = ss / sampling; /* Array index of target pixel */
float intensity;
- /* Calculate value */
- q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling);
+ /* Calculate fractional coordinates in fs/ss */
+ fs = convert_float(get_global_id(0) + (min_fs*sampling))
+ / convert_float(sampling);
+ ss = convert_float(get_global_id(1) + (min_ss*sampling))
+ / convert_float(sampling);
+
+ /* Get the scattering vector */
+ q = get_q(fs, ss, res, clen, k, &ttv,
+ corner_x, corner_y, fsx, fsy, ssx, ssy);
+
+ /* Calculate the diffraction */
f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
I_molecule = molecule_factor(intensities, flags, cell, q);
I_lattice = pow(f_lattice, 2.0f);
@@ -228,16 +242,19 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
int i;
float sum = 0.0;
float val;
+ int idx;
+
+ idx = convert_int_rtz(fs)+w*convert_int_rtz(ss);
for ( i=0; i<sampling*sampling*get_local_size(2); i++ )
sum += tmp[i];
- val = sum / (float)(sampling*sampling*get_local_size(2));
- diff[ax+w*ay] = val;
+ val = sum / convert_float(sampling*sampling*get_local_size(2));
+ diff[idx] = val;
/* Leader thread also records the 2theta value.
* This should really be averaged across all pixels, but
* I strongly suspect this would be a waste of time. */
- tt[ax+w*ay] = ttv;
+ tt[idx] = ttv;
}
}
diff --git a/src/cl-utils.c b/src/cl-utils.c
index 0c6dce27..7c1b23a3 100644
--- a/src/cl-utils.c
+++ b/src/cl-utils.c
@@ -3,17 +3,25 @@
*
* OpenCL utility functions
*
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
#include <cl.h>
+#endif
#include "utils.h"
diff --git a/src/detector.c b/src/detector.c
index eb5d1083..e53d5331 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson)
double ph_per_e;
double area;
double max_tt = 0.0;
+ int n_inf1 = 0;
+ int n_neg1 = 0;
+ int n_nan1 = 0;
+ int n_inf2 = 0;
+ int n_neg2 = 0;
+ int n_nan2 = 0;
/* How many photons are scattered per electron? */
area = M_PI*pow(image->beam->beam_radius, 2.0);
@@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson)
struct panel *p;
intensity = (double)image->data[x + image->width*y];
- if ( isinf(intensity) ) {
- ERROR("Infinity at %i,%i\n", x, y);
- }
- if ( intensity < 0.0 ) {
- ERROR("Negative at %i,%i\n", x, y);
- }
- if ( isnan(intensity) ) {
- ERROR("NaN at %i,%i\n", x, y);
- }
+ if ( isinf(intensity) ) n_inf1++;
+ if ( intensity < 0.0 ) n_neg1++;
+ if ( isnan(intensity) ) n_nan1++;
p = find_panel(image->det, x, y);
@@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson)
image->data[x + image->width*y] = counts
* image->beam->adu_per_photon;
- if ( isinf(image->data[x+image->width*y]) ) {
- ERROR("Processed infinity at %i,%i\n", x, y);
- }
- if ( isnan(image->data[x+image->width*y]) ) {
- ERROR("Processed NaN at %i,%i\n", x, y);
- }
- if ( image->data[x+image->width*y] < 0.0 ) {
- ERROR("Processed negative at %i,%i %f\n", x, y, counts);
- }
+
+ /* Sanity checks */
+ if ( isinf(image->data[x+image->width*y]) ) n_inf2++;
+ if ( isnan(image->data[x+image->width*y]) ) n_nan2++;
+ if ( image->data[x+image->width*y] < 0.0 ) n_neg2++;
if ( image->twotheta[x + image->width*y] > max_tt ) {
max_tt = image->twotheta[x + image->width*y];
@@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson)
rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9);
STATUS("Halve the d values to get the voxel size for a synthesis.\n");
+
+ if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) {
+ ERROR("WARNING: The raw calculation produced %i negative"
+ " values, %i infinities and %i NaNs.\n",
+ n_neg1, n_inf1, n_nan1);
+ ERROR("WARNING: After processing, there were %i negative"
+ " values, %i infinities and %i NaNs.\n",
+ n_neg2, n_inf2, n_nan2);
+ }
}
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index dd382ede..2f4fee77 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -3,19 +3,27 @@
*
* Calculate diffraction patterns by Fourier methods (GPU version)
*
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
*
* Part of CrystFEL - crystallography with a FEL
*
*/
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdlib.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <complex.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
#include <cl.h>
+#endif
#include "image.h"
#include "utils.h"
@@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n)
}
+static int sfloat(struct gpu_context *gctx, int idx, float val)
+{
+ cl_int err;
+ err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set kernel argument %i: %s\n",
+ idx, clError(err));
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int setint(struct gpu_context *gctx, int idx, int val)
+{
+ cl_int err;
+
+ err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set kernel argument %i: %s\n",
+ idx, clError(err));
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int setmem(struct gpu_context *gctx, int idx, cl_mem val)
+{
+ cl_int err;
+
+ err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set kernel argument %i: %s\n",
+ idx, clError(err));
+ return 1;
+ }
+
+ return 0;
+}
+
+
void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, UnitCell *ucell)
{
@@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
cl_int4 ncells;
const int sampling = SAMPLING;
cl_float bwstep;
+ int n_inf = 0;
+ int n_neg = 0;
+ int n_nan = 0;
+
if ( gctx == NULL ) {
ERROR("GPU setup failed.\n");
@@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
check_sinc_lut(gctx, nb);
check_sinc_lut(gctx, nc);
- err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 0: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 1: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 2: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 3: %s\n", clError(err));
- return;
- }
+ if ( setmem(gctx, 0, gctx->diff) ) return;
+ if ( setmem(gctx, 1, gctx->tt) ) return;
+ if ( setmem(gctx, 9, gctx->intensities) ) return;
+ if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
+ if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
+ if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
+ if ( setmem(gctx, 18, gctx->flags) ) return;
+
+ /* Unit cell */
clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 8: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 9: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 12: %s\n", clError(err));
+ ERROR("Couldn't set unit cell: %s\n", clError(err));
return;
}
+
/* Local memory for reduction */
clSetKernelArg(gctx->kern, 13,
BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL);
if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 13: %s\n", clError(err));
- return;
- }
- /* Bandwidth sampling step */
- clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 14: %s\n", clError(err));
- return;
- }
-
- /* LUT in 'a' direction */
- clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 15: %s\n", clError(err));
+ ERROR("Couldn't set local memory: %s\n", clError(err));
return;
}
- /* LUT in 'b' direction */
- clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 16: %s\n", clError(err));
- return;
- }
- /* LUT in 'c' direction */
- clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 17: %s\n", clError(err));
- return;
- }
-
- /* Flag array */
- clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set flag array: %s\n", clError(err));
- return;
- }
+ if ( sfloat(gctx, 2, klow) ) return;
+ if ( setint(gctx, 3, image->width) ) return;
+ if ( setint(gctx, 12, sampling) ) return;
+ if ( sfloat(gctx, 14, bwstep) ) return;
/* Iterate over panels */
event = malloc(image->det->n_panels * sizeof(cl_event));
@@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* In a future version of OpenCL, this could be done
* with a global work offset. But not yet... */
- dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x;
- dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y;
+ dims[0] = 1+image->det->panels[p].max_fs
+ -image->det->panels[p].min_fs;
+ dims[1] = 1+image->det->panels[p].max_ss
+ -image->det->panels[p].min_ss;
dims[0] *= SAMPLING;
dims[1] *= SAMPLING;
dims[2] = BWSAMPLING;
- clSetKernelArg(gctx->kern, 4, sizeof(cl_float),
- &image->det->panels[p].cx);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 4: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 5, sizeof(cl_float),
- &image->det->panels[p].cy);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 5: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 6, sizeof(cl_float),
- &image->det->panels[p].res);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 6: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 7, sizeof(cl_float),
- &image->det->panels[p].clen);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 7: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 10, sizeof(cl_int),
- &image->det->panels[p].min_x);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 10: %s\n", clError(err));
- return;
- }
- clSetKernelArg(gctx->kern, 11, sizeof(cl_int),
- &image->det->panels[p].min_y);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 11: %s\n", clError(err));
- return;
- }
+ if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return;
+ if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return;
+ if ( sfloat(gctx, 6, image->det->panels[p].res) ) return;
+ if ( sfloat(gctx, 7, image->det->panels[p].clen) ) return;
+ if ( setint(gctx, 10, image->det->panels[p].min_fs) ) return;
+ if ( setint(gctx, 11, image->det->panels[p].min_ss) ) return;
+ if ( sfloat(gctx, 19, image->det->panels[p].fsx) ) return;
+ if ( sfloat(gctx, 19, image->det->panels[p].fsy) ) return;
+ if ( sfloat(gctx, 20, image->det->panels[p].ssx) ) return;
+ if ( sfloat(gctx, 21, image->det->panels[p].ssy) ) return;
err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
dims, ldims, 0, NULL, &event[p]);
@@ -314,15 +298,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
float val, tt;
val = diff_ptr[x + image->width*y];
- if ( isinf(val) ) {
- ERROR("Extracting infinity at %i,%i\n", x, y);
- }
- if ( val < 0.0 ) {
- ERROR("Extracting negative at %i,%i\n", x, y);
- }
- if ( isnan(val) ) {
- ERROR("Extracting NaN at %i,%i\n", x, y);
- }
+ if ( isinf(val) ) n_inf++;
+ if ( val < 0.0 ) n_neg++;
+ if ( isnan(val) ) n_nan++;
tt = tt_ptr[x + image->width*y];
image->data[x + image->width*y] = val;
@@ -331,6 +309,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
}
}
+ if ( n_neg + n_inf + n_nan ) {
+ ERROR("WARNING: The GPU calculation produced %i negative"
+ " values, %i infinities and %i NaNs.\n",
+ n_neg, n_inf, n_nan);
+ }
+
clEnqueueUnmapMemObject(gctx->cq, gctx->diff, diff_ptr, 0, NULL, NULL);
clEnqueueUnmapMemObject(gctx->cq, gctx->tt, tt_ptr, 0, NULL, NULL);
}