[cig-commits] [commit] devel, master: remove texture definition when not used (92e5c5e)

Thu Nov 6 08:32:51 PST 2014

Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit 92e5c5ee0cf0fba6b17a75c2fab5ec023804ba7a
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date:   Thu Oct 23 14:22:30 2014 +0200

    remove texture definition when not used


>---------------------------------------------------------------

92e5c5ee0cf0fba6b17a75c2fab5ec023804ba7a
 src/gpu/compute_forces_crust_mantle_gpu.c |   6 +-
 src/gpu/compute_forces_inner_core_gpu.c   |   4 +-
 src/gpu/compute_forces_outer_core_gpu.c   |   3 +-
 src/gpu/initialize_gpu.c                  | 558 ++++++++++--------------------
 src/gpu/mesh_constants_gpu.h              |   6 +-
 src/gpu/prepare_mesh_constants_gpu.c      |  20 +-
 6 files changed, 204 insertions(+), 393 deletions(-)

diff --git a/src/gpu/compute_forces_crust_mantle_gpu.c b/src/gpu/compute_forces_crust_mantle_gpu.c
index b265b7a..ad29707 100644
--- a/src/gpu/compute_forces_crust_mantle_gpu.c
+++ b/src/gpu/compute_forces_crust_mantle_gpu.c
@@ -262,7 +262,7 @@ void crust_mantle (int nb_blocks_to_compute, Mesh *mp,
     clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_density_table.ocl));
     clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_wgll_cube.ocl));
     clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY));
-
+#ifdef USE_TEXTURES_FIELDS
     if (FORWARD_OR_ADJOINT == 1) {
       clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_cm_tex));
       clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_cm_tex));
@@ -270,8 +270,10 @@ void crust_mantle (int nb_blocks_to_compute, Mesh *mp,
       clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_cm_tex));
       clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_cm_tex));
     }
+#endif
+#ifdef USE_TEXTURES_CONSTANTS
     clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_hprime_xx_cm_tex));
-
+#endif
     local_work_size[0] = blocksize;
     local_work_size[1] = 1;
     global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/compute_forces_inner_core_gpu.c b/src/gpu/compute_forces_inner_core_gpu.c
index 6df30ba..448159c 100644
--- a/src/gpu/compute_forces_inner_core_gpu.c
+++ b/src/gpu/compute_forces_inner_core_gpu.c
@@ -221,7 +221,7 @@ void inner_core (int nb_blocks_to_compute, Mesh *mp,
     clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_wgll_cube.ocl));
     clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_INNER_CORE_STRAIN_ONLY));
     clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_INNER_CORE));
-
+#ifdef USE_TEXTURES_FIELDS
     if (FORWARD_OR_ADJOINT == 1) {
       clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_ic_tex));
       clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_ic_tex));
@@ -229,7 +229,7 @@ void inner_core (int nb_blocks_to_compute, Mesh *mp,
       clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_ic_tex));
       clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_ic_tex));
     }
-
+#endif
     local_work_size[0] = blocksize;
     local_work_size[1] = 1;
     global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/compute_forces_outer_core_gpu.c b/src/gpu/compute_forces_outer_core_gpu.c
index 14acdca..06d1985 100644
--- a/src/gpu/compute_forces_outer_core_gpu.c
+++ b/src/gpu/compute_forces_outer_core_gpu.c
@@ -142,6 +142,7 @@ void outer_core (int nb_blocks_to_compute, Mesh *mp,
       clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &d_b_B_array_rotation.ocl));
     }
     clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_OUTER_CORE));
+#ifdef USE_TEXTURES_FIELDS
     if (FORWARD_OR_ADJOINT == 1) {
       clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_oc_tex));
       clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_oc_tex));
@@ -149,7 +150,7 @@ void outer_core (int nb_blocks_to_compute, Mesh *mp,
       clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_oc_tex));
       clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_oc_tex));
     }
-
+#endif
     local_work_size[0] = blocksize;
     local_work_size[1] = 1;
     global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/initialize_gpu.c b/src/gpu/initialize_gpu.c
index 95ee58a..c6ffe8c 100644
--- a/src/gpu/initialize_gpu.c
+++ b/src/gpu/initialize_gpu.c
@@ -32,8 +32,8 @@
 
 // GPU initialization
 
+/* macro definitions used in GPU kernels */
 #ifdef USE_OPENCL
-// macro definitions used in GPU kernels
 
 #define STR(x) #x
 #define PASS(x) {#x, STR(x)}
@@ -42,7 +42,7 @@ static struct {
   const char *name;
   const char *value;
 } _macro_to_kernel[] = {
-  // macro values
+  /* macro values */
   PASS(NDIM),
   PASS(NGLLX), PASS(NGLL2), PASS(NGLL3), PASS(NGLL3_PADDED),
   PASS(N_SLS),
@@ -51,7 +51,7 @@ static struct {
   PASS(COLORING_MIN_NSPEC_OUTER_CORE), PASS(COLORING_MIN_NSPEC_INNER_CORE),
   PASS(R_EARTH_KM),
 
-  // macro functions: not working yet, spaces not allowed in OCL compiler
+  /* macro functions: not working yet, spaces not allowed in OCL compiler*/
 
 /* PASS(INDEX2(xsize, x, y)),
    PASS(INDEX3(xsize, ysize, x, y, z)),
@@ -60,7 +60,7 @@ static struct {
    PASS(INDEX5(xsize, ysize, zsize, isize, x, y, z, i, j)),
    PASS(INDEX6(xsize, ysize, zsize, isize, jsize, x, y, z, i, j, k)), */
 
-  // macro flags, passed only ifdefed
+  /* macro flags, passed only ifdefed */
   PASS(MANUALLY_UNROLLED_LOOPS), PASS(USE_TEXTURES_CONSTANTS), PASS(USE_TEXTURES_FIELDS),
 
   PASS(USE_LAUNCH_BOUNDS),
@@ -72,18 +72,13 @@ static struct {
 
 /* ----------------------------------------------------------------------------------------------- */
 
-// gpu runtime flags
 int run_cuda = 0;
 int run_opencl = 0;
 
 /* ----------------------------------------------------------------------------------------------- */
-// CUDA initialization
-/* ----------------------------------------------------------------------------------------------- */
 
 #ifdef USE_CUDA
 
-// initializes CUDA devices
-
 static void initialize_cuda_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
   int device_count = 0;
 
@@ -96,12 +91,12 @@ static void initialize_cuda_device(const char *platform_filter, const char *devi
   //
   // being verbose and catches error from first call to CUDA runtime function, without synchronize call
   cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess) {
+  if (err != cudaSuccess){
     fprintf(stderr,"Error after cudaGetDeviceCount: %s\n", cudaGetErrorString(err));
-    exit_on_error("\
-CUDA runtime error: cudaGetDeviceCount failed\n\n\
+    exit_on_error("CUDA runtime error: cudaGetDeviceCount failed\n\n\
 please check if driver and runtime libraries work together\n\
-or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n");
+or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n\
+exiting...\n");
   }
 
   // returns device count to fortran
@@ -133,7 +128,7 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
   }
 
   if (nbMatchingDevices == 0) {
-    printf("Error: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
+    printf("ERROR: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
     exit(1);
   }
 
@@ -144,7 +139,7 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
   cudaGetDeviceProperties(&deviceProp, myDevice);
 
   // exit if the machine has no CUDA-enabled device
-  if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
+  if (deviceProp.major == 9999 && deviceProp.minor == 9999){
     fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
     exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
   }
@@ -160,15 +155,15 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
     sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
   }
   // debugging
-  if (DEBUG) {
+  if (DEBUG){
     do_output_info = 1;
     sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
   }
 
   // output to file
-  if (do_output_info) {
+  if( do_output_info ){
     fp = fopen(filename,"w");
-    if (fp != NULL) {
+    if (fp != NULL){
       // display device properties
       fprintf(fp,"Device Name = %s\n",deviceProp.name);
       fprintf(fp,"memory:\n");
@@ -187,17 +182,17 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
       fprintf(fp,"features:\n");
       fprintf(fp,"  Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
       fprintf(fp,"  multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
-      if (deviceProp.canMapHostMemory) {
+      if(deviceProp.canMapHostMemory){
         fprintf(fp,"  canMapHostMemory: TRUE\n");
       }else{
         fprintf(fp,"  canMapHostMemory: FALSE\n");
       }
-      if (deviceProp.deviceOverlap) {
+      if(deviceProp.deviceOverlap){
         fprintf(fp,"  deviceOverlap: TRUE\n");
       }else{
         fprintf(fp,"  deviceOverlap: FALSE\n");
       }
-      if (deviceProp.concurrentKernels) {
+      if(deviceProp.concurrentKernels){
         fprintf(fp,"  concurrentKernels: TRUE\n");
       }else{
         fprintf(fp,"  concurrentKernels: FALSE\n");
@@ -215,17 +210,17 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
   }
 
   // make sure that the device has compute capability >= 1.3
-  if (deviceProp.major < 1) {
+  if (deviceProp.major < 1){
     fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
     exit_on_error("CUDA Compute capability major number should be at least 1\n");
   }
-  if (deviceProp.major == 1 && deviceProp.minor < 3) {
+  if (deviceProp.major == 1 && deviceProp.minor < 3){
     fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
     exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
   }
   // we use pinned memory for asynchronous copy
   if (GPU_ASYNC_COPY) {
-    if (! deviceProp.canMapHostMemory) {
+    if (! deviceProp.canMapHostMemory){
       fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
       exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
     }
@@ -258,91 +253,50 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
 }
 #endif
 
-/* ----------------------------------------------------------------------------------------------- */
-// OpenCL initialization
-/* ----------------------------------------------------------------------------------------------- */
-
 #ifdef USE_OPENCL
-
-// OpenCL mesh
 struct _mesh_opencl mocl;
 
-// function definitions
 cl_device_id oclGetMyDevice(int rank);
 void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices);
-void build_kernels (void);
 
-// initializes OpenCL devices
+void build_kernels (void);
 
 static void initialize_ocl_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
-
-  // selects device
   ocl_select_device(platform_filter, device_filter, myrank, nb_devices);
 
   // outputs device info to file
   char filename[BUFSIZ];
   FILE *fp;
-  int do_output_info = 0;
-
-  // by default, only master process outputs device info to avoid file cluttering
-  if (myrank == 0) {
-    do_output_info = 1;
-    sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
+  sprintf (filename, "OUTPUT_FILES/gpu_device_info_proc_%06d.txt", myrank);
+  fp = fopen (filename, "a+");
+  if (fp) {
+    cl_device_type device_type;
+    size_t max_work_group_size;
+    cl_ulong local_mem_size;
+    cl_uint max_compute_units;
+    char name[1024];
+    size_t image2d_max_size[2];
+    // display device properties
+    clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem_size), &local_mem_size, NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
+    clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
+
+    fprintf (fp, "Device Name = %s\n", name);
+    fprintf (fp, "Type: %d\n", (int) device_type);
+    fprintf (fp, "local_mem_size: %zu\n", local_mem_size);
+    fprintf (fp, "max_compute_units: %u\n", max_compute_units);
+    fprintf (fp, "max_work_group_size: %lu\n", max_work_group_size);
+    fprintf (fp, "image2d_max_size: %zux%zu\n", image2d_max_size[0], image2d_max_size[1]);
+
+    fclose (fp);
   }
-  // debugging
-  if (DEBUG) {
-    do_output_info = 1;
-    sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
-  }
-
-  // output to file
-  if (do_output_info) {
-    fp = fopen(filename,"w");
-    if (fp != NULL) {
-      cl_device_type device_type;
-      size_t max_work_group_size;
-      cl_ulong mem_size;
-      cl_uint units;
-      char name[1024];
-      size_t image2d_max_size[2];
 
-      // display device properties
-      clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
-      fprintf (fp, "Device Name = %s\n", name);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
-      fprintf (fp, "Device Vendor = %s\n", name);
-      fprintf (fp, "Memory:\n");
-      clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
-      fprintf (fp, "  local_mem_size (in KB) : %f\n", mem_size / 1024.f);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
-      fprintf (fp, "  global_mem_size (in MB): %f\n", mem_size / (1024.f * 1024.f));
-      clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
-      fprintf (fp, "  image2d_max_size: %zu x %zu\n", image2d_max_size[0], image2d_max_size[1]);
-      fprintf(fp,"blocks:\n");
-      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
-      fprintf (fp, "  max_compute_units: %u\n", units);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
-      fprintf (fp, "  max_work_group_size: %lu\n", max_work_group_size);
-      fprintf(fp,"features:\n");
-      clGetDeviceInfo(mocl.device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
-      fprintf (fp, "  device version : %s\n", name);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
-      fprintf (fp, "  device type: %d\n", (int) device_type);
-      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(units), &units, NULL);
-      fprintf (fp, "  device max_clock_frequency: %u\n", units);
-      clGetDeviceInfo(mocl.device, CL_DRIVER_VERSION, sizeof(name), name, NULL);
-      fprintf (fp, "  driver version : %s\n", name);
-
-      fclose (fp);
-    }
-  }
-
-  // builds OpenCL kernels
   build_kernels();
-
 }
-
 #define xQUOTE(str) #str
 #define QUOTE(str)  xQUOTE(str)
 
@@ -352,62 +306,62 @@ static void initialize_ocl_device(const char *platform_filter, const char *devic
 #define _OCL_GPU_CFLAGS ""
 #endif
 
-/* ----------------------------------------------------------------------------------------------- */
-
 #define PARAMETER_STR_SIZE 1024
-
 void build_kernels (void) {
-
   static char parameters[PARAMETER_STR_SIZE] = _OCL_GPU_CFLAGS " ";
   cl_int errcode;
   char *pos = parameters + strlen(_OCL_GPU_CFLAGS) + 1;
   int len = PARAMETER_STR_SIZE;
   int i;
 
-  // adds preprocessor definitions
-  // e.g. -DNDIM=3 -DNGLLX=5 ..
   for(i = 0; _macro_to_kernel[i].name != NULL; i++) {
     if (!strcmp(_macro_to_kernel[i].name, _macro_to_kernel[i].value)) {
       continue;
     }
     if (!len) {
-      printf("Error: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
-      exit(1);
+      printf("ERROR: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
     }
     int written = snprintf(pos, len, "-D%s=%s ", _macro_to_kernel[i].name, _macro_to_kernel[i].value);
     pos += written;
     len -= written;
   }
 
-  // debug
-  //printf("building OpenCL kernels: parameters = %s \n",parameters);
-
-  // adds kernels as const char definitions
   #include "kernel_inc_cl.c"
 
-  // defines OpenCL build program macro
 #undef BOAST_KERNEL
 #define BOAST_KERNEL(__kern_name__)                                     \
-  mocl.programs.__kern_name__##_program = clCreateProgramWithSource( mocl.context, 1, \
-                                                                     &__kern_name__##_program, NULL, clck_(&errcode));\
-  mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, 0, NULL, parameters, NULL, NULL);\
+  mocl.programs.__kern_name__##_program = clCreateProgramWithSource(    \
+                       mocl.context, 1,                                 \
+                       &__kern_name__##_program, NULL, clck_(&errcode));\
+  mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program,  \
+                                0, NULL, parameters, NULL, NULL);       \
   if (mocl_errcode != CL_SUCCESS) {                                     \
-    fprintf(stderr,"OpenCL Error: Failed to build program "#__kern_name__": %s\n", clewErrorString(mocl_errcode)); \
+    fprintf(stderr,"Error: Failed to build program "#__kern_name__": %s\n", \
+            clewErrorString(mocl_errcode));                             \
     char cBuildLog[10240];                                              \
-    clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, mocl.device, CL_PROGRAM_BUILD_LOG, \
-                          sizeof(cBuildLog), cBuildLog, NULL ); \
-    fprintf(stderr,"OpenCL Log: %s\n",cBuildLog);                                   \
+    clGetProgramBuildInfo(mocl.programs.__kern_name__##_program,        \
+                          mocl.device,                                  \
+                          CL_PROGRAM_BUILD_LOG,                         \
+                          sizeof(cBuildLog), cBuildLog, NULL );         \
+    fprintf(stderr,"%s\n",cBuildLog);                                   \
     exit(1);                                                            \
   }                                                                     \
-  mocl.kernels.__kern_name__ = clCreateKernel (mocl.programs.__kern_name__ ## _program, #__kern_name__ , clck_(&errcode));
+  mocl.kernels.__kern_name__ = clCreateKernel (                         \
+                               mocl.programs.__kern_name__ ## _program, \
+                               #__kern_name__ , clck_(&errcode));
 
-  // builds each OpenCL kernel
   #include "kernel_list.h"
-
 }
 
+void release_kernels (void) {
+#undef BOAST_KERNEL
+#define BOAST_KERNEL(__kern_name__)                                     \
+  clCheck (clReleaseKernel (mocl.kernels.__kern_name__));               \
+  clCheck (clReleaseProgram (mocl.programs.__kern_name__ ## _program));
+
+  #include "kernel_list.h"
+}
 
-/* ----------------------------------------------------------------------------------------------- */
 
 struct _opencl_version {
   cl_uint minor;
@@ -417,305 +371,188 @@ struct _opencl_version opencl_version_1_0 = {1,0};
 struct _opencl_version opencl_version_1_1 = {1,1};
 struct _opencl_version opencl_version_1_2 = {1,2};
 
-/* ----------------------------------------------------------------------------------------------- */
-
 cl_int compare_opencl_version(struct _opencl_version v1, struct _opencl_version v2) {
-  if (v1.major > v2.major)
+  if(v1.major > v2.major)
     return 1;
-  if (v1.major < v2.major)
+  if(v1.major < v2.major)
     return -1;
-  if (v1.minor > v2.minor)
+  if(v1.minor > v2.minor)
     return 1;
-  if (v1.minor < v2.minor)
+  if(v1.minor < v2.minor)
     return -1;
   return 0;
 }
 
-/* ----------------------------------------------------------------------------------------------- */
-
 static void get_platform_version(cl_platform_id platform_id, struct _opencl_version *version) {
+    size_t cl_platform_version_size;
+    clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
 
-  size_t cl_platform_version_size;
-  clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
+    char *cl_platform_version;
+    cl_platform_version = (char *) malloc(cl_platform_version_size);
 
-  char *cl_platform_version;
-  cl_platform_version = (char *) malloc(cl_platform_version_size);
-
-  if (cl_platform_version == NULL) {
-    fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
-    exit(1);
-  }
+    if (cl_platform_version == NULL) {
+      fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
+      exit(1);
+    }
 
-  clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
-
-  //OpenCL<space><major_version.minor_version><space><platform-specific information>
-  char minor[2], major[2];
-  major[0] = cl_platform_version[7];
-  major[1] = 0;
-  minor[0] = cl_platform_version[9];
-  minor[1] = 0;
-  version->major = atoi(major);
-  version->major = atoi(minor);
-  free(cl_platform_version);
+    clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
+    //OpenCL<space><major_version.minor_version><space><platform-specific information>
+    char minor[2], major[2];
+    major[0] = cl_platform_version[7];
+    major[1] = 0;
+    minor[0] = cl_platform_version[9];
+    minor[1] = 0;
+    version->major = atoi(major);
+    version->major = atoi(minor);
+    free(cl_platform_version);
 }
 
-/* ----------------------------------------------------------------------------------------------- */
-
 #define OCL_DEV_TYPE CL_DEVICE_TYPE_ALL
-
 void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
+    cl_int errcode = CL_SUCCESS;
+    cl_platform_id *platform_ids;
+    cl_uint num_platforms;
 
-  cl_int errcode = CL_SUCCESS;
-  cl_platform_id *platform_ids;
-  cl_uint num_platforms;
-
-  // first OpenCL call
-  // only gets number of platforms
-  clCheck( clGetPlatformIDs(0, NULL, &num_platforms) );
-
-  // checks if OpenCL platforms available
-  if (num_platforms == 0) {
-    fprintf(stderr,"OpenCL error: No OpenCL platform available!\n");
-    exit(1);
-  }
+    clGetPlatformIDs(0, NULL, &num_platforms);
 
-  platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
+    if (num_platforms == 0) {
+      fprintf(stderr,"No OpenCL platform available!\n");
+      exit(1);
+    }
 
-  // gets platform infos
-  clCheck( clGetPlatformIDs(num_platforms, platform_ids, NULL));
+    platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
 
-  cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
+    clGetPlatformIDs(num_platforms, platform_ids, NULL);
 
-  // temporary array to store infos
-  int i,j;
-  char *info_all[num_platforms][2];
-  // initializes pointers
-  for (i = 0; i < num_platforms; i++) {
-    info_all[i][0] = NULL;
-    info_all[i][1] = NULL;
-  }
+    cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
+    if (strlen(platform_filter)) {
+      cl_uint found = 0;
+      cl_uint i;
 
-  // looks for platform matching GPU_PLATFORM string given in Par_file
-  if (strlen(platform_filter)) {
-    cl_uint found = 0;
+      for (i = 0; i < num_platforms && !found; i++) {
+        size_t info_length;
+        char *info;
 
-    for (i = 0; i < num_platforms && !found; i++) {
-      size_t info_length;
-      char *info;
+        int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
+        int j;
 
-      // checks vendor and platform names for matching with GPU_PLATFORM
-      int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
-      for (j = 0; j < 2 && !found; j++) {
-        // gets property info length
-        clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length));
+        for (j = 0; j < 2 && !found; j++) {
+          clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length);
 
-        // checks info
-        if (info_length == 0) {
-          fprintf(stderr,"OpenCL error: No OpenCL platform info available!\n");
-          exit(1);
-        }
+          info = (char *) malloc(info_length * sizeof(char));
 
-        // allocates info buffer and gets info string
-        info = (char *) malloc(info_length * sizeof(char));
-        clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL));
+          clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL);
 
-        // stores info
-        info_all[i][j] = malloc( strlen(info) + 1);
-        strcpy(info_all[i][j],info);
+          if (strcasestr(info, platform_filter)) {
+            properties[1] = (cl_context_properties) platform_ids[i];
+            found = 1;
+          }
 
-        // sets matching platform id
-        if (strcasestr(info, platform_filter)) {
-          properties[1] = (cl_context_properties) platform_ids[i];
-          found = 1;
+          free(info);
         }
-        // frees temporary array
-        free(info);
       }
-    }
 
-    // checks if platform found
-    if (!found) {
-      if (myrank == 0) {
-        fprintf(stderr, "\nAvailable platforms are:\n");
-        for (i = 0; i < num_platforms; i++) {
-          if (info_all[i][0]) { fprintf(stderr, "  platform %i: vendor = %s , name = %s\n",i,info_all[i][0],info_all[i][1]);}
-        }
-        fprintf(stderr, "Please check your parameter GPU_PLATFORM in Par_file\n\n");
+      if (!found) {
+        fprintf(stderr, "No matching OpenCL platform available : %s!\n", platform_filter);
+        exit(1);
       }
-      // frees info array
-      for (i = 0; i < num_platforms; i++) {
-        if (info_all[i][0]) { free(info_all[i][0]); }
-        if (info_all[i][1]) { free(info_all[i][1]); }
-      }
-      // exits
-      fprintf(stderr, "No matching OpenCL platform available : %s\n", platform_filter);
-      exit(1);
+    } else {
+      properties[1] = (cl_context_properties) platform_ids[0];
     }
 
-    // frees info array
-    for (i = 0; i < num_platforms; i++) {
-      if (info_all[i][0]) { free(info_all[i][0]); }
-      if (info_all[i][1]) { free(info_all[i][1]); }
-    }
-
-  } else {
-    // wild-card platform filter given (GPU_PLATFORM set to '*'), takes first platform
-    properties[1] = (cl_context_properties) platform_ids[0];
-  }
-
-  // searches for device
-  if (strlen(device_filter)) {
-    cl_uint found = 0;
-    cl_uint i;
-    cl_uint num_devices;
-    cl_device_id *device_ids;
-    cl_device_id *matching_device_ids;
-
-    // only gets number of devices for this platform
-    clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices));
-
-    // checks
-    if (num_devices == 0) {
-      fprintf(stderr,"No OpenCL device of type %d!\n", (int) OCL_DEV_TYPE);
-      exit(1);
-    }
-
-    device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
-
-    matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
-
-    // gets device infos
-    clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL));
+    if (strlen(device_filter)) {
+      cl_uint found = 0;
+      cl_uint i;
+      cl_uint num_devices;
+      cl_device_id *device_ids;
+      cl_device_id *matching_device_ids;
+
+      clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices);
+      if (num_devices == 0) {
+        fprintf(stderr,"No device of type %d!\n", (int) OCL_DEV_TYPE);
+        exit(1);
+      }
 
-    // temporary array to store device infos
-    char *info_device_all[num_devices];
-    // initializes pointers
-    for (i = 0; i < num_devices; i++) {
-      info_device_all[i] = NULL;
-    }
+      device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
 
-    // searches device matching GPU_DEVICE string
-    for (i = 0; i < num_devices; i++) {
-      size_t info_length;
-      char *info;
+      matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
 
-      clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length));
+      clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL);
+      for (i = 0; i < num_devices; i++) {
+        size_t info_length;
+        char *info;
 
-      info = (char *) malloc(info_length * sizeof(char));
+        clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length);
 
-      clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL));
+        info = (char *) malloc(info_length * sizeof(char));
 
-      // stores info
-      info_device_all[i] = malloc( strlen(info) + 1);
-      strcpy(info_device_all[i],info);
+        clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL);
+        if (strcasestr(info, device_filter)) {
+          matching_device_ids[found] = device_ids[i];
+          found++;
+        }
 
-      // sets matching device id
-      if (strcasestr(info, device_filter)) {
-        matching_device_ids[found] = device_ids[i];
-        found++;
+        free(info);
       }
 
-      free(info);
-    }
-
-    if (!found) {
-      // user output
-      if (myrank == 0) {
-        fprintf(stderr, "\nAvailable devices are:\n");
-        for (i = 0; i < num_devices; i++) {
-          if (info_device_all[i]) { fprintf(stderr, "  device %i: name = %s\n",i,info_device_all[i]);}
-        }
-        fprintf(stderr, "Please check your parameter GPU_DEVICE in Par_file\n\n");
+      if (!found) {
+        fprintf(stderr, "No matching OpenCL device available : %s!\n", device_filter);
+        exit(1);
       }
-      // frees info array
-      for (i = 0; i < num_devices; i++) {
-        if (info_device_all[i]) { free(info_device_all[i]); }
-      }
-      // exits
-      fprintf(stderr, "No matching OpenCL device available : %s\n", device_filter);
-      exit(1);
-    }
 
-    // creates an OpenCL context
-    mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
-
-    // frees temporary arrays
-    free (matching_device_ids);
-    free (device_ids);
-    // frees info array
-    for (i = 0; i < num_devices; i++) {
-      if (info_device_all[i]) { free(info_device_all[i]); }
+      mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
+      free (matching_device_ids);
+      free (device_ids);
+    } else {
+      mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
     }
 
-  } else {
-    // wild-card GPU_DEVICE set to '*'
-    mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
-  }
-
-  //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
-  struct _opencl_version  platform_version;
-  get_platform_version((cl_platform_id) properties[1], &platform_version);
-
+    //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
+    struct _opencl_version  platform_version;
+    get_platform_version((cl_platform_id) properties[1], &platform_version);
 #ifdef CL_VERSION_1_1
-  if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
-    clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
-  } else
+   if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
+      clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
+   } else
 #endif
-  {
-    size_t nContextDescriptorSize;
-    clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
-    *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
-  }
-
-  // stores info in mesh opencl structure
-  mocl.nb_devices = *nb_devices;
-  free(platform_ids);
+    {
+      size_t nContextDescriptorSize;
+      clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
+      *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
+    }
+   mocl.nb_devices = *nb_devices;
+   free(platform_ids);
 
-  size_t szParmDataBytes;
-  cl_device_id* cdDevices;
+   size_t szParmDataBytes;
+   cl_device_id* cdDevices;
 
-  // get the list of GPU devices associated with this context
-  clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
-  cdDevices = (cl_device_id *) malloc(szParmDataBytes);
+   // get the list of GPU devices associated with context
+   clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
+   cdDevices = (cl_device_id *) malloc(szParmDataBytes);
 
-  clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
+   clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
 
-  mocl.device = cdDevices[myrank % mocl.nb_devices];
-  free(cdDevices);
+   mocl.device = cdDevices[myrank % mocl.nb_devices];
+   free(cdDevices);
 
-  // command kernel queues
-  mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
-  if (GPU_ASYNC_COPY) {
-    mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
-  }
+   mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+   mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
 }
 #endif
 
-/* ----------------------------------------------------------------------------------------------- */
-// GPU initialization
-/* ----------------------------------------------------------------------------------------------- */
-
 #define isspace(c) ((c) == ' ')
 
 static char *trim_and_default(char *s)
 {
   // trim before
-  while (*s != '\0' && isspace(*s)) { s++; }
+  while (*s != '\0' && isspace(*s)) s++;
 
   if (*s == '\0') {
     return s;
   }
 
-  // note: the platform_filter argument acts weird on apple platforms, giving a string "NVIDIA   Geforce", instead of just "NVIDIA" and "Geforce"
-  //       here we assume that maximum length of GPU_PLATFORM is 11 characters
-  //       todo - find better way to avoid this?
-  // debug
-  //printf("string: %s has length %i \n",s,strlen(s));
-  int len = strlen(s);
-  if (len > 11 ) len = 11;
-
   // trim after
-  char *back = s + len;
+  char *back = s + strlen(s);
   while (isspace(*--back));
   *(back + 1) = '\0';
 
@@ -727,57 +564,38 @@ static char *trim_and_default(char *s)
   return s;
 }
 
-/* ----------------------------------------------------------------------------------------------- */
-
 enum gpu_runtime_e {COMPILE, CUDA, OPENCL};
-
 extern EXTERN_LANG
 void FC_FUNC_ (initialize_gpu_device,
                INITIALIZE_GPU_DEVICE) (int *runtime_f, char *platform_filter, char *device_filter, int *myrank_f, int *nb_devices) {
-
   TRACE ("initialize_device");
 
   enum gpu_runtime_e runtime_type = (enum gpu_runtime_e) *runtime_f;
 
-  // trims GPU_PLATFORM and GPU_DEVICE strings
   platform_filter = trim_and_default(platform_filter);
   device_filter = trim_and_default(device_filter);
 
-  // sets and checks gpu runtime flags
 #if defined(USE_OPENCL) && defined(USE_CUDA)
   run_cuda = runtime_type == CUDA;
   run_opencl = runtime_type == OPENCL;
   if (runtime_type == COMPILE) {
-    if (*myrank_f == 0) {
-      printf("\
-Error: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled.\n\
-Please set Par_file accordingly...\n\n", COMPILE, OPENCL, CUDA);
-    }
+    printf("ERROR: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled ...\n", COMPILE, OPENCL, CUDA);
     exit(1);
   }
 #elif defined(USE_OPENCL)
   run_opencl = 1;
   if (runtime_type != COMPILE && runtime_type != OPENCL) {
-    if (*myrank_f == 0) {
-      printf("\
-Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d).\n\
-This simulation will continue using the OpenCL runtime...\n\n", runtime_type, OPENCL, COMPILE);
-    }
+    printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d). Defaulting to OpenCL.\n", runtime_type, OPENCL, COMPILE);
   }
 #elif defined(USE_CUDA)
   run_cuda = 1;
   if (runtime_type != COMPILE && runtime_type != CUDA) {
-    if (*myrank_f == 0) {
-      printf("\
-Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d).\n\
-This simulation will continue using the Cuda runtime...\n", runtime_type, CUDA, COMPILE);
-    }
+    printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d). Defaulting to Cuda.\n", runtime_type, CUDA, COMPILE);
   }
 #else
-  #error "GPU code compiled but neither CUDA nor OpenCL are enabled"
+  #error "GPU code compiled but neither Cuda nor OpenCL are enabled"
 #endif
 
-  // initializes gpu cards
 #ifdef USE_OPENCL
   if (run_opencl) {
     initialize_ocl_device(platform_filter, device_filter, *myrank_f, nb_devices);
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index fa93199..6e51c5d 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -928,7 +928,7 @@ typedef struct mesh_ {
   // specific OpenCL texture arrays
 #ifdef USE_OPENCL
 // note: need to be defined as they are passed as function arguments
-  // USE_TEXTURES_FIELDS
+#ifdef USE_TEXTURES_FIELDS
   // forward
   cl_mem d_displ_cm_tex;
   cl_mem d_accel_cm_tex;
@@ -948,12 +948,14 @@ typedef struct mesh_ {
 
   cl_mem d_b_displ_ic_tex;
   cl_mem d_b_accel_ic_tex;
-  // USE_TEXTURES_CONSTANTS
+#endif
+#ifdef USE_TEXTURES_CONSTANTS
   // hprime
   cl_mem d_hprime_xx_cm_tex;
   // weighted hprime
   cl_mem d_hprimewgll_xx_cm_tex;
 #endif
+#endif
 
   // ------------------------------------------------------------------ //
   // LDDRK
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index abde0fc..2dd64a7 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -159,9 +159,6 @@ void FC_FUNC_ (prepare_constants_device,
 
     mp->d_hprime_xx_cm_tex = clCreateImage2D (mocl.context, CL_MEM_READ_ONLY, &format, NGLL2, 1, 0, mp->d_hprime_xx.ocl, clck_(&errcode));
     mp->d_hprimewgll_xx_cm_tex = clCreateImage2D (mocl.context, CL_MEM_READ_ONLY, &format, NGLL2, 1, 0, mp->d_hprimewgll_xx.ocl, clck_(&errcode));
-#else //USE_TEXTURES_CONSTANTS
-    mp->d_hprime_xx_cm_tex = moclGetDummyImage2D(mp);
-    mp->d_hprimewgll_xx_cm_tex = moclGetDummyImage2D(mp);
 #endif //USE_TEXTURES_CONSTANTS
   }
 #endif
@@ -1622,12 +1619,6 @@ void FC_FUNC_ (prepare_crust_mantle_device,
       mp->d_b_displ_cm_tex = moclGetDummyImage2D(mp);
       mp->d_b_accel_cm_tex = moclGetDummyImage2D(mp);
     }
-#else
-    mp->d_displ_cm_tex = moclGetDummyImage2D(mp);
-    mp->d_accel_cm_tex = moclGetDummyImage2D(mp);
-    // backward/reconstructed fields
-    mp->d_b_displ_cm_tex = moclGetDummyImage2D(mp);
-    mp->d_b_accel_cm_tex = moclGetDummyImage2D(mp);
 #endif
   }
 #endif
@@ -2307,12 +2298,6 @@ void FC_FUNC_ (prepare_inner_core_device,
       mp->d_b_displ_ic_tex = moclGetDummyImage2D(mp);
       mp->d_b_accel_ic_tex = moclGetDummyImage2D(mp);
     }
-#else
-    mp->d_displ_ic_tex = moclGetDummyImage2D(mp);
-    mp->d_accel_ic_tex = moclGetDummyImage2D(mp);
-    // backward/reconstructed fields
-    mp->d_b_displ_ic_tex = moclGetDummyImage2D(mp);
-    mp->d_b_accel_ic_tex = moclGetDummyImage2D(mp);
 #endif
   }
 #endif
@@ -2570,8 +2555,10 @@ void FC_FUNC_ (prepare_cleanup_device,
   //------------------------------------------
 #ifdef USE_OPENCL
   if (run_opencl) {
+#ifdef USE_TEXTURES_CONSTANTS
     clReleaseMemObject (mp->d_hprime_xx.ocl);
     clReleaseMemObject (mp->d_hprimewgll_xx.ocl);
+#endif
 
     clReleaseMemObject (mp->d_wgllwgll_xy.ocl);
     clReleaseMemObject (mp->d_wgllwgll_xz.ocl);
@@ -3046,9 +3033,9 @@ void FC_FUNC_ (prepare_cleanup_device,
     gpuFree (&mp->d_normal_ocean_load);
   }
 
+#ifdef USE_TEXTURES_FIELDS
 #ifdef USE_OPENCL
   if (run_opencl) {
-    // note: texture arrays in OpenCL are always allocated (either dummy or valid ones)
     clReleaseMemObject (mp->d_displ_cm_tex);
     clReleaseMemObject (mp->d_accel_cm_tex);
     clReleaseMemObject (mp->d_b_displ_cm_tex);
@@ -3068,6 +3055,7 @@ void FC_FUNC_ (prepare_cleanup_device,
     clReleaseMemObject (mp->d_hprimewgll_xx_cm_tex);
   }
 #endif
+#endif
   
   // synchronizes device
   gpuSynchronize();