[cig-commits] [commit] devel, master: revert src/gpu/initialize_gpu.c to original state (c1bfb4d)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Thu Nov 6 08:33:00 PST 2014


Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit c1bfb4d4a1cbf81ada884582f5888f9f9fffc1b2
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date:   Thu Oct 23 16:12:56 2014 +0200

    revert src/gpu/initialize_gpu.c to original state


>---------------------------------------------------------------

c1bfb4d4a1cbf81ada884582f5888f9f9fffc1b2
 src/gpu/initialize_gpu.c | 551 +++++++++++++++++++++++++++++++----------------
 1 file changed, 371 insertions(+), 180 deletions(-)

diff --git a/src/gpu/initialize_gpu.c b/src/gpu/initialize_gpu.c
index cc40d3e..95ee58a 100644
--- a/src/gpu/initialize_gpu.c
+++ b/src/gpu/initialize_gpu.c
@@ -32,8 +32,8 @@
 
 // GPU initialization
 
-/* macro definitions used in GPU kernels */
 #ifdef USE_OPENCL
+// macro definitions used in GPU kernels
 
 #define STR(x) #x
 #define PASS(x) {#x, STR(x)}
@@ -42,7 +42,7 @@ static struct {
   const char *name;
   const char *value;
 } _macro_to_kernel[] = {
-  /* macro values */
+  // macro values
   PASS(NDIM),
   PASS(NGLLX), PASS(NGLL2), PASS(NGLL3), PASS(NGLL3_PADDED),
   PASS(N_SLS),
@@ -51,7 +51,7 @@ static struct {
   PASS(COLORING_MIN_NSPEC_OUTER_CORE), PASS(COLORING_MIN_NSPEC_INNER_CORE),
   PASS(R_EARTH_KM),
 
-  /* macro functions: not working yet, spaces not allowed in OCL compiler*/
+  // macro functions: not working yet, spaces not allowed in OCL compiler
 
 /* PASS(INDEX2(xsize, x, y)),
    PASS(INDEX3(xsize, ysize, x, y, z)),
@@ -60,7 +60,7 @@ static struct {
    PASS(INDEX5(xsize, ysize, zsize, isize, x, y, z, i, j)),
    PASS(INDEX6(xsize, ysize, zsize, isize, jsize, x, y, z, i, j, k)), */
 
-  /* macro flags, passed only ifdefed */
+  // macro flags, passed only ifdefed
   PASS(MANUALLY_UNROLLED_LOOPS), PASS(USE_TEXTURES_CONSTANTS), PASS(USE_TEXTURES_FIELDS),
 
   PASS(USE_LAUNCH_BOUNDS),
@@ -72,13 +72,18 @@ static struct {
 
 /* ----------------------------------------------------------------------------------------------- */
 
+// gpu runtime flags
 int run_cuda = 0;
 int run_opencl = 0;
 
 /* ----------------------------------------------------------------------------------------------- */
+// CUDA initialization
+/* ----------------------------------------------------------------------------------------------- */
 
 #ifdef USE_CUDA
 
+// initializes CUDA devices
+
 static void initialize_cuda_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
   int device_count = 0;
 
@@ -91,12 +96,12 @@ static void initialize_cuda_device(const char *platform_filter, const char *devi
   //
   // being verbose and catches error from first call to CUDA runtime function, without synchronize call
   cudaError_t err = cudaGetLastError();
-  if (err != cudaSuccess){
+  if (err != cudaSuccess) {
     fprintf(stderr,"Error after cudaGetDeviceCount: %s\n", cudaGetErrorString(err));
-    exit_on_error("CUDA runtime error: cudaGetDeviceCount failed\n\n\
+    exit_on_error("\
+CUDA runtime error: cudaGetDeviceCount failed\n\n\
 please check if driver and runtime libraries work together\n\
-or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n\
-exiting...\n");
+or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n");
   }
 
   // returns device count to fortran
@@ -128,7 +133,7 @@ exiting...\n");
   }
 
   if (nbMatchingDevices == 0) {
-    printf("ERROR: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
+    printf("Error: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
     exit(1);
   }
 
@@ -139,7 +144,7 @@ exiting...\n");
   cudaGetDeviceProperties(&deviceProp, myDevice);
 
   // exit if the machine has no CUDA-enabled device
-  if (deviceProp.major == 9999 && deviceProp.minor == 9999){
+  if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
     fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
     exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
   }
@@ -155,15 +160,15 @@ exiting...\n");
     sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
   }
   // debugging
-  if (DEBUG){
+  if (DEBUG) {
     do_output_info = 1;
     sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
   }
 
   // output to file
-  if( do_output_info ){
+  if (do_output_info) {
     fp = fopen(filename,"w");
-    if (fp != NULL){
+    if (fp != NULL) {
       // display device properties
       fprintf(fp,"Device Name = %s\n",deviceProp.name);
       fprintf(fp,"memory:\n");
@@ -182,17 +187,17 @@ exiting...\n");
       fprintf(fp,"features:\n");
       fprintf(fp,"  Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
       fprintf(fp,"  multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
-      if(deviceProp.canMapHostMemory){
+      if (deviceProp.canMapHostMemory) {
         fprintf(fp,"  canMapHostMemory: TRUE\n");
       }else{
         fprintf(fp,"  canMapHostMemory: FALSE\n");
       }
-      if(deviceProp.deviceOverlap){
+      if (deviceProp.deviceOverlap) {
         fprintf(fp,"  deviceOverlap: TRUE\n");
       }else{
         fprintf(fp,"  deviceOverlap: FALSE\n");
       }
-      if(deviceProp.concurrentKernels){
+      if (deviceProp.concurrentKernels) {
         fprintf(fp,"  concurrentKernels: TRUE\n");
       }else{
         fprintf(fp,"  concurrentKernels: FALSE\n");
@@ -210,17 +215,17 @@ exiting...\n");
   }
 
   // make sure that the device has compute capability >= 1.3
-  if (deviceProp.major < 1){
+  if (deviceProp.major < 1) {
     fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
     exit_on_error("CUDA Compute capability major number should be at least 1\n");
   }
-  if (deviceProp.major == 1 && deviceProp.minor < 3){
+  if (deviceProp.major == 1 && deviceProp.minor < 3) {
     fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
     exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
   }
   // we use pinned memory for asynchronous copy
   if (GPU_ASYNC_COPY) {
-    if (! deviceProp.canMapHostMemory){
+    if (! deviceProp.canMapHostMemory) {
       fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
       exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
     }
@@ -253,50 +258,91 @@ exiting...\n");
 }
 #endif
 
+/* ----------------------------------------------------------------------------------------------- */
+// OpenCL initialization
+/* ----------------------------------------------------------------------------------------------- */
+
 #ifdef USE_OPENCL
+
+// OpenCL mesh
 struct _mesh_opencl mocl;
 
+// function definitions
 cl_device_id oclGetMyDevice(int rank);
 void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices);
-
 void build_kernels (void);
 
+// initializes OpenCL devices
+
 static void initialize_ocl_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
+
+  // selects device
   ocl_select_device(platform_filter, device_filter, myrank, nb_devices);
 
   // outputs device info to file
   char filename[BUFSIZ];
   FILE *fp;
-  sprintf (filename, "OUTPUT_FILES/gpu_device_info_proc_%06d.txt", myrank);
-  fp = fopen (filename, "a+");
-  if (fp) {
-    cl_device_type device_type;
-    size_t max_work_group_size;
-    cl_ulong local_mem_size;
-    cl_uint max_compute_units;
-    char name[1024];
-    size_t image2d_max_size[2];
-    // display device properties
-    clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem_size), &local_mem_size, NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
-    clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
-
-    fprintf (fp, "Device Name = %s\n", name);
-    fprintf (fp, "Type: %d\n", (int) device_type);
-    fprintf (fp, "local_mem_size: %zu\n", local_mem_size);
-    fprintf (fp, "max_compute_units: %u\n", max_compute_units);
-    fprintf (fp, "max_work_group_size: %lu\n", max_work_group_size);
-    fprintf (fp, "image2d_max_size: %zux%zu\n", image2d_max_size[0], image2d_max_size[1]);
-
-    fclose (fp);
+  int do_output_info = 0;
+
+  // by default, only master process outputs device info to avoid file cluttering
+  if (myrank == 0) {
+    do_output_info = 1;
+    sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
+  }
+  // debugging
+  if (DEBUG) {
+    do_output_info = 1;
+    sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
+  }
+
+  // output to file
+  if (do_output_info) {
+    fp = fopen(filename,"w");
+    if (fp != NULL) {
+      cl_device_type device_type;
+      size_t max_work_group_size;
+      cl_ulong mem_size;
+      cl_uint units;
+      char name[1024];
+      size_t image2d_max_size[2];
+
+      // display device properties
+      clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
+      fprintf (fp, "Device Name = %s\n", name);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
+      fprintf (fp, "Device Vendor = %s\n", name);
+      fprintf (fp, "Memory:\n");
+      clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
+      fprintf (fp, "  local_mem_size (in KB) : %f\n", mem_size / 1024.f);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
+      fprintf (fp, "  global_mem_size (in MB): %f\n", mem_size / (1024.f * 1024.f));
+      clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
+      fprintf (fp, "  image2d_max_size: %zu x %zu\n", image2d_max_size[0], image2d_max_size[1]);
+      fprintf(fp,"blocks:\n");
+      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
+      fprintf (fp, "  max_compute_units: %u\n", units);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+      fprintf (fp, "  max_work_group_size: %lu\n", max_work_group_size);
+      fprintf(fp,"features:\n");
+      clGetDeviceInfo(mocl.device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
+      fprintf (fp, "  device version : %s\n", name);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+      fprintf (fp, "  device type: %d\n", (int) device_type);
+      clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(units), &units, NULL);
+      fprintf (fp, "  device max_clock_frequency: %u\n", units);
+      clGetDeviceInfo(mocl.device, CL_DRIVER_VERSION, sizeof(name), name, NULL);
+      fprintf (fp, "  driver version : %s\n", name);
+
+      fclose (fp);
+    }
   }
 
+  // builds OpenCL kernels
   build_kernels();
+
 }
+
 #define xQUOTE(str) #str
 #define QUOTE(str)  xQUOTE(str)
 
@@ -306,54 +352,63 @@ static void initialize_ocl_device(const char *platform_filter, const char *devic
 #define _OCL_GPU_CFLAGS ""
 #endif
 
+/* ----------------------------------------------------------------------------------------------- */
+
 #define PARAMETER_STR_SIZE 1024
+
 void build_kernels (void) {
+
   static char parameters[PARAMETER_STR_SIZE] = _OCL_GPU_CFLAGS " ";
   cl_int errcode;
   char *pos = parameters + strlen(_OCL_GPU_CFLAGS) + 1;
   int len = PARAMETER_STR_SIZE;
   int i;
 
+  // adds preprocessor definitions
+  // e.g. -DNDIM=3 -DNGLLX=5 ..
   for(i = 0; _macro_to_kernel[i].name != NULL; i++) {
     if (!strcmp(_macro_to_kernel[i].name, _macro_to_kernel[i].value)) {
       continue;
     }
     if (!len) {
-      printf("ERROR: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
+      printf("Error: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
+      exit(1);
     }
     int written = snprintf(pos, len, "-D%s=%s ", _macro_to_kernel[i].name, _macro_to_kernel[i].value);
     pos += written;
     len -= written;
   }
 
+  // debug
+  //printf("building OpenCL kernels: parameters = %s \n",parameters);
+
+  // adds kernels as const char definitions
   #include "kernel_inc_cl.c"
 
+  // defines OpenCL build program macro
 #undef BOAST_KERNEL
 #define BOAST_KERNEL(__kern_name__)                                     \
-  mocl.programs.__kern_name__##_program = clCreateProgramWithSource(    \
-                       mocl.context, 1,                                 \
-                       &__kern_name__##_program, NULL, clck_(&errcode));\
-  mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program,  \
-                                0, NULL, parameters, NULL, NULL);       \
+  mocl.programs.__kern_name__##_program = clCreateProgramWithSource( mocl.context, 1, \
+                                                                     &__kern_name__##_program, NULL, clck_(&errcode));\
+  mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, 0, NULL, parameters, NULL, NULL);\
   if (mocl_errcode != CL_SUCCESS) {                                     \
-    fprintf(stderr,"Error: Failed to build program "#__kern_name__": %s\n", \
-            clewErrorString(mocl_errcode));                             \
+    fprintf(stderr,"OpenCL Error: Failed to build program "#__kern_name__": %s\n", clewErrorString(mocl_errcode)); \
     char cBuildLog[10240];                                              \
-    clGetProgramBuildInfo(mocl.programs.__kern_name__##_program,        \
-                          mocl.device,                                  \
-                          CL_PROGRAM_BUILD_LOG,                         \
-                          sizeof(cBuildLog), cBuildLog, NULL );         \
-    fprintf(stderr,"%s\n",cBuildLog);                                   \
+    clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, mocl.device, CL_PROGRAM_BUILD_LOG, \
+                          sizeof(cBuildLog), cBuildLog, NULL ); \
+    fprintf(stderr,"OpenCL Log: %s\n",cBuildLog);                                   \
     exit(1);                                                            \
   }                                                                     \
-  mocl.kernels.__kern_name__ = clCreateKernel (                         \
-                               mocl.programs.__kern_name__ ## _program, \
-                               #__kern_name__ , clck_(&errcode));
+  mocl.kernels.__kern_name__ = clCreateKernel (mocl.programs.__kern_name__ ## _program, #__kern_name__ , clck_(&errcode));
 
+  // builds each OpenCL kernel
   #include "kernel_list.h"
+
 }
 
 
+/* ----------------------------------------------------------------------------------------------- */
+
 struct _opencl_version {
   cl_uint minor;
   cl_uint major;
@@ -362,188 +417,305 @@ struct _opencl_version opencl_version_1_0 = {1,0};
 struct _opencl_version opencl_version_1_1 = {1,1};
 struct _opencl_version opencl_version_1_2 = {1,2};
 
+/* ----------------------------------------------------------------------------------------------- */
+
 cl_int compare_opencl_version(struct _opencl_version v1, struct _opencl_version v2) {
-  if(v1.major > v2.major)
+  if (v1.major > v2.major)
     return 1;
-  if(v1.major < v2.major)
+  if (v1.major < v2.major)
     return -1;
-  if(v1.minor > v2.minor)
+  if (v1.minor > v2.minor)
     return 1;
-  if(v1.minor < v2.minor)
+  if (v1.minor < v2.minor)
     return -1;
   return 0;
 }
 
+/* ----------------------------------------------------------------------------------------------- */
+
 static void get_platform_version(cl_platform_id platform_id, struct _opencl_version *version) {
-    size_t cl_platform_version_size;
-    clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
 
-    char *cl_platform_version;
-    cl_platform_version = (char *) malloc(cl_platform_version_size);
+  size_t cl_platform_version_size;
+  clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
 
-    if (cl_platform_version == NULL) {
-      fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
-      exit(1);
-    }
+  char *cl_platform_version;
+  cl_platform_version = (char *) malloc(cl_platform_version_size);
 
-    clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
-    //OpenCL<space><major_version.minor_version><space><platform-specific information>
-    char minor[2], major[2];
-    major[0] = cl_platform_version[7];
-    major[1] = 0;
-    minor[0] = cl_platform_version[9];
-    minor[1] = 0;
-    version->major = atoi(major);
-    version->major = atoi(minor);
-    free(cl_platform_version);
+  if (cl_platform_version == NULL) {
+    fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
+    exit(1);
+  }
+
+  clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
+
+  //OpenCL<space><major_version.minor_version><space><platform-specific information>
+  char minor[2], major[2];
+  major[0] = cl_platform_version[7];
+  major[1] = 0;
+  minor[0] = cl_platform_version[9];
+  minor[1] = 0;
+  version->major = atoi(major);
+  version->major = atoi(minor);
+  free(cl_platform_version);
 }
 
+/* ----------------------------------------------------------------------------------------------- */
+
 #define OCL_DEV_TYPE CL_DEVICE_TYPE_ALL
+
 void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
-    cl_int errcode = CL_SUCCESS;
-    cl_platform_id *platform_ids;
-    cl_uint num_platforms;
 
-    clGetPlatformIDs(0, NULL, &num_platforms);
+  cl_int errcode = CL_SUCCESS;
+  cl_platform_id *platform_ids;
+  cl_uint num_platforms;
 
-    if (num_platforms == 0) {
-      fprintf(stderr,"No OpenCL platform available!\n");
-      exit(1);
-    }
+  // first OpenCL call
+  // only gets number of platforms
+  clCheck( clGetPlatformIDs(0, NULL, &num_platforms) );
+
+  // checks if OpenCL platforms available
+  if (num_platforms == 0) {
+    fprintf(stderr,"OpenCL error: No OpenCL platform available!\n");
+    exit(1);
+  }
 
-    platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
+  platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
 
-    clGetPlatformIDs(num_platforms, platform_ids, NULL);
+  // gets platform infos
+  clCheck( clGetPlatformIDs(num_platforms, platform_ids, NULL));
 
-    cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
-    if (strlen(platform_filter)) {
-      cl_uint found = 0;
-      cl_uint i;
+  cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
 
-      for (i = 0; i < num_platforms && !found; i++) {
-        size_t info_length;
-        char *info;
+  // temporary array to store infos
+  int i,j;
+  char *info_all[num_platforms][2];
+  // initializes pointers
+  for (i = 0; i < num_platforms; i++) {
+    info_all[i][0] = NULL;
+    info_all[i][1] = NULL;
+  }
 
-        int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
-        int j;
+  // looks for platform matching GPU_PLATFORM string given in Par_file
+  if (strlen(platform_filter)) {
+    cl_uint found = 0;
 
-        for (j = 0; j < 2 && !found; j++) {
-          clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length);
+    for (i = 0; i < num_platforms && !found; i++) {
+      size_t info_length;
+      char *info;
 
-          info = (char *) malloc(info_length * sizeof(char));
+      // checks vendor and platform names for matching with GPU_PLATFORM
+      int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
+      for (j = 0; j < 2 && !found; j++) {
+        // gets property info length
+        clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length));
 
-          clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL);
+        // checks info
+        if (info_length == 0) {
+          fprintf(stderr,"OpenCL error: No OpenCL platform info available!\n");
+          exit(1);
+        }
 
-          if (strcasestr(info, platform_filter)) {
-            properties[1] = (cl_context_properties) platform_ids[i];
-            found = 1;
-          }
+        // allocates info buffer and gets info string
+        info = (char *) malloc(info_length * sizeof(char));
+        clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL));
 
-          free(info);
+        // stores info
+        info_all[i][j] = malloc( strlen(info) + 1);
+        strcpy(info_all[i][j],info);
+
+        // sets matching platform id
+        if (strcasestr(info, platform_filter)) {
+          properties[1] = (cl_context_properties) platform_ids[i];
+          found = 1;
         }
+        // frees temporary array
+        free(info);
       }
+    }
 
-      if (!found) {
-        fprintf(stderr, "No matching OpenCL platform available : %s!\n", platform_filter);
-        exit(1);
+    // checks if platform found
+    if (!found) {
+      if (myrank == 0) {
+        fprintf(stderr, "\nAvailable platforms are:\n");
+        for (i = 0; i < num_platforms; i++) {
+          if (info_all[i][0]) { fprintf(stderr, "  platform %i: vendor = %s , name = %s\n",i,info_all[i][0],info_all[i][1]);}
+        }
+        fprintf(stderr, "Please check your parameter GPU_PLATFORM in Par_file\n\n");
+      }
+      // frees info array
+      for (i = 0; i < num_platforms; i++) {
+        if (info_all[i][0]) { free(info_all[i][0]); }
+        if (info_all[i][1]) { free(info_all[i][1]); }
       }
-    } else {
-      properties[1] = (cl_context_properties) platform_ids[0];
+      // exits
+      fprintf(stderr, "No matching OpenCL platform available : %s\n", platform_filter);
+      exit(1);
     }
 
-    if (strlen(device_filter)) {
-      cl_uint found = 0;
-      cl_uint i;
-      cl_uint num_devices;
-      cl_device_id *device_ids;
-      cl_device_id *matching_device_ids;
-
-      clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices);
-      if (num_devices == 0) {
-        fprintf(stderr,"No device of type %d!\n", (int) OCL_DEV_TYPE);
-        exit(1);
-      }
+    // frees info array
+    for (i = 0; i < num_platforms; i++) {
+      if (info_all[i][0]) { free(info_all[i][0]); }
+      if (info_all[i][1]) { free(info_all[i][1]); }
+    }
 
-      device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
+  } else {
+    // wild-card platform filter given (GPU_PLATFORM set to '*'), takes first platform
+    properties[1] = (cl_context_properties) platform_ids[0];
+  }
 
-      matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
+  // searches for device
+  if (strlen(device_filter)) {
+    cl_uint found = 0;
+    cl_uint i;
+    cl_uint num_devices;
+    cl_device_id *device_ids;
+    cl_device_id *matching_device_ids;
 
-      clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL);
-      for (i = 0; i < num_devices; i++) {
-        size_t info_length;
-        char *info;
+    // only gets number of devices for this platform
+    clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices));
 
-        clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length);
+    // checks
+    if (num_devices == 0) {
+      fprintf(stderr,"No OpenCL device of type %d!\n", (int) OCL_DEV_TYPE);
+      exit(1);
+    }
 
-        info = (char *) malloc(info_length * sizeof(char));
+    device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
 
-        clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL);
-        if (strcasestr(info, device_filter)) {
-          matching_device_ids[found] = device_ids[i];
-          found++;
-        }
+    matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
 
-        free(info);
+    // gets device infos
+    clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL));
+
+    // temporary array to store device infos
+    char *info_device_all[num_devices];
+    // initializes pointers
+    for (i = 0; i < num_devices; i++) {
+      info_device_all[i] = NULL;
+    }
+
+    // searches device matching GPU_DEVICE string
+    for (i = 0; i < num_devices; i++) {
+      size_t info_length;
+      char *info;
+
+      clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length));
+
+      info = (char *) malloc(info_length * sizeof(char));
+
+      clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL));
+
+      // stores info
+      info_device_all[i] = malloc( strlen(info) + 1);
+      strcpy(info_device_all[i],info);
+
+      // sets matching device id
+      if (strcasestr(info, device_filter)) {
+        matching_device_ids[found] = device_ids[i];
+        found++;
       }
 
-      if (!found) {
-        fprintf(stderr, "No matching OpenCL device available : %s!\n", device_filter);
-        exit(1);
+      free(info);
+    }
+
+    if (!found) {
+      // user output
+      if (myrank == 0) {
+        fprintf(stderr, "\nAvailable devices are:\n");
+        for (i = 0; i < num_devices; i++) {
+          if (info_device_all[i]) { fprintf(stderr, "  device %i: name = %s\n",i,info_device_all[i]);}
+        }
+        fprintf(stderr, "Please check your parameter GPU_DEVICE in Par_file\n\n");
+      }
+      // frees info array
+      for (i = 0; i < num_devices; i++) {
+        if (info_device_all[i]) { free(info_device_all[i]); }
       }
+      // exits
+      fprintf(stderr, "No matching OpenCL device available : %s\n", device_filter);
+      exit(1);
+    }
+
+    // creates an OpenCL context
+    mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
 
-      mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
-      free (matching_device_ids);
-      free (device_ids);
-    } else {
-      mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
+    // frees temporary arrays
+    free (matching_device_ids);
+    free (device_ids);
+    // frees info array
+    for (i = 0; i < num_devices; i++) {
+      if (info_device_all[i]) { free(info_device_all[i]); }
     }
 
-    //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
-    struct _opencl_version  platform_version;
-    get_platform_version((cl_platform_id) properties[1], &platform_version);
+  } else {
+    // wild-card GPU_DEVICE set to '*'
+    mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
+  }
+
+  //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
+  struct _opencl_version  platform_version;
+  get_platform_version((cl_platform_id) properties[1], &platform_version);
+
 #ifdef CL_VERSION_1_1
-   if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
-      clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
-   } else
+  if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
+    clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
+  } else
 #endif
-    {
-      size_t nContextDescriptorSize;
-      clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
-      *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
-    }
-   mocl.nb_devices = *nb_devices;
-   free(platform_ids);
+  {
+    size_t nContextDescriptorSize;
+    clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
+    *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
+  }
+
+  // stores info in mesh opencl structure
+  mocl.nb_devices = *nb_devices;
+  free(platform_ids);
 
-   size_t szParmDataBytes;
-   cl_device_id* cdDevices;
+  size_t szParmDataBytes;
+  cl_device_id* cdDevices;
 
-   // get the list of GPU devices associated with context
-   clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
-   cdDevices = (cl_device_id *) malloc(szParmDataBytes);
+  // get the list of GPU devices associated with this context
+  clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
+  cdDevices = (cl_device_id *) malloc(szParmDataBytes);
 
-   clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
+  clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
 
-   mocl.device = cdDevices[myrank % mocl.nb_devices];
-   free(cdDevices);
+  mocl.device = cdDevices[myrank % mocl.nb_devices];
+  free(cdDevices);
 
-   mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
-   mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+  // command kernel queues
+  mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+  if (GPU_ASYNC_COPY) {
+    mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+  }
 }
 #endif
 
+/* ----------------------------------------------------------------------------------------------- */
+// GPU initialization
+/* ----------------------------------------------------------------------------------------------- */
+
 #define isspace(c) ((c) == ' ')
 
 static char *trim_and_default(char *s)
 {
   // trim before
-  while (*s != '\0' && isspace(*s)) s++;
+  while (*s != '\0' && isspace(*s)) { s++; }
 
   if (*s == '\0') {
     return s;
   }
 
+  // note: the platform_filter argument acts weird on apple platforms, giving a string "NVIDIA   Geforce", instead of just "NVIDIA" and "Geforce"
+  //       here we assume that maximum length of GPU_PLATFORM is 11 characters
+  //       todo - find better way to avoid this?
+  // debug
+  //printf("string: %s has length %i \n",s,strlen(s));
+  int len = strlen(s);
+  if (len > 11 ) len = 11;
+
   // trim after
-  char *back = s + strlen(s);
+  char *back = s + len;
   while (isspace(*--back));
   *(back + 1) = '\0';
 
@@ -555,38 +727,57 @@ static char *trim_and_default(char *s)
   return s;
 }
 
+/* ----------------------------------------------------------------------------------------------- */
+
 enum gpu_runtime_e {COMPILE, CUDA, OPENCL};
+
 extern EXTERN_LANG
 void FC_FUNC_ (initialize_gpu_device,
                INITIALIZE_GPU_DEVICE) (int *runtime_f, char *platform_filter, char *device_filter, int *myrank_f, int *nb_devices) {
+
   TRACE ("initialize_device");
 
   enum gpu_runtime_e runtime_type = (enum gpu_runtime_e) *runtime_f;
 
+  // trims GPU_PLATFORM and GPU_DEVICE strings
   platform_filter = trim_and_default(platform_filter);
   device_filter = trim_and_default(device_filter);
 
+  // sets and checks gpu runtime flags
 #if defined(USE_OPENCL) && defined(USE_CUDA)
   run_cuda = runtime_type == CUDA;
   run_opencl = runtime_type == OPENCL;
   if (runtime_type == COMPILE) {
-    printf("ERROR: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled ...\n", COMPILE, OPENCL, CUDA);
+    if (*myrank_f == 0) {
+      printf("\
+Error: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled.\n\
+Please set Par_file accordingly...\n\n", COMPILE, OPENCL, CUDA);
+    }
     exit(1);
   }
 #elif defined(USE_OPENCL)
   run_opencl = 1;
   if (runtime_type != COMPILE && runtime_type != OPENCL) {
-    printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d). Defaulting to OpenCL.\n", runtime_type, OPENCL, COMPILE);
+    if (*myrank_f == 0) {
+      printf("\
+Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d).\n\
+This simulation will continue using the OpenCL runtime...\n\n", runtime_type, OPENCL, COMPILE);
+    }
   }
 #elif defined(USE_CUDA)
   run_cuda = 1;
   if (runtime_type != COMPILE && runtime_type != CUDA) {
-    printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d). Defaulting to Cuda.\n", runtime_type, CUDA, COMPILE);
+    if (*myrank_f == 0) {
+      printf("\
+Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d).\n\
+This simulation will continue using the Cuda runtime...\n", runtime_type, CUDA, COMPILE);
+    }
   }
 #else
-  #error "GPU code compiled but neither Cuda nor OpenCL are enabled"
+  #error "GPU code compiled but neither CUDA nor OpenCL are enabled"
 #endif
 
+  // initializes gpu cards
 #ifdef USE_OPENCL
   if (run_opencl) {
     initialize_ocl_device(platform_filter, device_filter, *myrank_f, nb_devices);



More information about the CIG-COMMITS mailing list