[cig-commits] [commit] devel, master: revert src/gpu/initialize_gpu.c to original state (c1bfb4d)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:33:00 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit c1bfb4d4a1cbf81ada884582f5888f9f9fffc1b2
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date: Thu Oct 23 16:12:56 2014 +0200
revert src/gpu/initialize_gpu.c to original state
>---------------------------------------------------------------
c1bfb4d4a1cbf81ada884582f5888f9f9fffc1b2
src/gpu/initialize_gpu.c | 551 +++++++++++++++++++++++++++++++----------------
1 file changed, 371 insertions(+), 180 deletions(-)
diff --git a/src/gpu/initialize_gpu.c b/src/gpu/initialize_gpu.c
index cc40d3e..95ee58a 100644
--- a/src/gpu/initialize_gpu.c
+++ b/src/gpu/initialize_gpu.c
@@ -32,8 +32,8 @@
// GPU initialization
-/* macro definitions used in GPU kernels */
#ifdef USE_OPENCL
+// macro definitions used in GPU kernels
#define STR(x) #x
#define PASS(x) {#x, STR(x)}
@@ -42,7 +42,7 @@ static struct {
const char *name;
const char *value;
} _macro_to_kernel[] = {
- /* macro values */
+ // macro values
PASS(NDIM),
PASS(NGLLX), PASS(NGLL2), PASS(NGLL3), PASS(NGLL3_PADDED),
PASS(N_SLS),
@@ -51,7 +51,7 @@ static struct {
PASS(COLORING_MIN_NSPEC_OUTER_CORE), PASS(COLORING_MIN_NSPEC_INNER_CORE),
PASS(R_EARTH_KM),
- /* macro functions: not working yet, spaces not allowed in OCL compiler*/
+ // macro functions: not working yet, spaces not allowed in OCL compiler
/* PASS(INDEX2(xsize, x, y)),
PASS(INDEX3(xsize, ysize, x, y, z)),
@@ -60,7 +60,7 @@ static struct {
PASS(INDEX5(xsize, ysize, zsize, isize, x, y, z, i, j)),
PASS(INDEX6(xsize, ysize, zsize, isize, jsize, x, y, z, i, j, k)), */
- /* macro flags, passed only ifdefed */
+ // macro flags, passed only ifdefed
PASS(MANUALLY_UNROLLED_LOOPS), PASS(USE_TEXTURES_CONSTANTS), PASS(USE_TEXTURES_FIELDS),
PASS(USE_LAUNCH_BOUNDS),
@@ -72,13 +72,18 @@ static struct {
/* ----------------------------------------------------------------------------------------------- */
+// gpu runtime flags
int run_cuda = 0;
int run_opencl = 0;
/* ----------------------------------------------------------------------------------------------- */
+// CUDA initialization
+/* ----------------------------------------------------------------------------------------------- */
#ifdef USE_CUDA
+// initializes CUDA devices
+
static void initialize_cuda_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
int device_count = 0;
@@ -91,12 +96,12 @@ static void initialize_cuda_device(const char *platform_filter, const char *devi
//
// being verbose and catches error from first call to CUDA runtime function, without synchronize call
cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess){
+ if (err != cudaSuccess) {
fprintf(stderr,"Error after cudaGetDeviceCount: %s\n", cudaGetErrorString(err));
- exit_on_error("CUDA runtime error: cudaGetDeviceCount failed\n\n\
+ exit_on_error("\
+CUDA runtime error: cudaGetDeviceCount failed\n\n\
please check if driver and runtime libraries work together\n\
-or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n\
-exiting...\n");
+or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n");
}
// returns device count to fortran
@@ -128,7 +133,7 @@ exiting...\n");
}
if (nbMatchingDevices == 0) {
- printf("ERROR: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
+ printf("Error: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
exit(1);
}
@@ -139,7 +144,7 @@ exiting...\n");
cudaGetDeviceProperties(&deviceProp, myDevice);
// exit if the machine has no CUDA-enabled device
- if (deviceProp.major == 9999 && deviceProp.minor == 9999){
+ if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
}
@@ -155,15 +160,15 @@ exiting...\n");
sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
}
// debugging
- if (DEBUG){
+ if (DEBUG) {
do_output_info = 1;
sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
}
// output to file
- if( do_output_info ){
+ if (do_output_info) {
fp = fopen(filename,"w");
- if (fp != NULL){
+ if (fp != NULL) {
// display device properties
fprintf(fp,"Device Name = %s\n",deviceProp.name);
fprintf(fp,"memory:\n");
@@ -182,17 +187,17 @@ exiting...\n");
fprintf(fp,"features:\n");
fprintf(fp," Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
fprintf(fp," multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
- if(deviceProp.canMapHostMemory){
+ if (deviceProp.canMapHostMemory) {
fprintf(fp," canMapHostMemory: TRUE\n");
}else{
fprintf(fp," canMapHostMemory: FALSE\n");
}
- if(deviceProp.deviceOverlap){
+ if (deviceProp.deviceOverlap) {
fprintf(fp," deviceOverlap: TRUE\n");
}else{
fprintf(fp," deviceOverlap: FALSE\n");
}
- if(deviceProp.concurrentKernels){
+ if (deviceProp.concurrentKernels) {
fprintf(fp," concurrentKernels: TRUE\n");
}else{
fprintf(fp," concurrentKernels: FALSE\n");
@@ -210,17 +215,17 @@ exiting...\n");
}
// make sure that the device has compute capability >= 1.3
- if (deviceProp.major < 1){
+ if (deviceProp.major < 1) {
fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
exit_on_error("CUDA Compute capability major number should be at least 1\n");
}
- if (deviceProp.major == 1 && deviceProp.minor < 3){
+ if (deviceProp.major == 1 && deviceProp.minor < 3) {
fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
}
// we use pinned memory for asynchronous copy
if (GPU_ASYNC_COPY) {
- if (! deviceProp.canMapHostMemory){
+ if (! deviceProp.canMapHostMemory) {
fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
}
@@ -253,50 +258,91 @@ exiting...\n");
}
#endif
+/* ----------------------------------------------------------------------------------------------- */
+// OpenCL initialization
+/* ----------------------------------------------------------------------------------------------- */
+
#ifdef USE_OPENCL
+
+// OpenCL mesh
struct _mesh_opencl mocl;
+// function definitions
cl_device_id oclGetMyDevice(int rank);
void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices);
-
void build_kernels (void);
+// initializes OpenCL devices
+
static void initialize_ocl_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
+
+ // selects device
ocl_select_device(platform_filter, device_filter, myrank, nb_devices);
// outputs device info to file
char filename[BUFSIZ];
FILE *fp;
- sprintf (filename, "OUTPUT_FILES/gpu_device_info_proc_%06d.txt", myrank);
- fp = fopen (filename, "a+");
- if (fp) {
- cl_device_type device_type;
- size_t max_work_group_size;
- cl_ulong local_mem_size;
- cl_uint max_compute_units;
- char name[1024];
- size_t image2d_max_size[2];
- // display device properties
- clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem_size), &local_mem_size, NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
-
- fprintf (fp, "Device Name = %s\n", name);
- fprintf (fp, "Type: %d\n", (int) device_type);
- fprintf (fp, "local_mem_size: %zu\n", local_mem_size);
- fprintf (fp, "max_compute_units: %u\n", max_compute_units);
- fprintf (fp, "max_work_group_size: %lu\n", max_work_group_size);
- fprintf (fp, "image2d_max_size: %zux%zu\n", image2d_max_size[0], image2d_max_size[1]);
-
- fclose (fp);
+ int do_output_info = 0;
+
+ // by default, only master process outputs device info to avoid file cluttering
+ if (myrank == 0) {
+ do_output_info = 1;
+ sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
+ }
+ // debugging
+ if (DEBUG) {
+ do_output_info = 1;
+ sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
+ }
+
+ // output to file
+ if (do_output_info) {
+ fp = fopen(filename,"w");
+ if (fp != NULL) {
+ cl_device_type device_type;
+ size_t max_work_group_size;
+ cl_ulong mem_size;
+ cl_uint units;
+ char name[1024];
+ size_t image2d_max_size[2];
+
+ // display device properties
+ clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
+ fprintf (fp, "Device Name = %s\n", name);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
+ fprintf (fp, "Device Vendor = %s\n", name);
+ fprintf (fp, "Memory:\n");
+ clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
+ fprintf (fp, " local_mem_size (in KB) : %f\n", mem_size / 1024.f);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
+ fprintf (fp, " global_mem_size (in MB): %f\n", mem_size / (1024.f * 1024.f));
+ clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
+ fprintf (fp, " image2d_max_size: %zu x %zu\n", image2d_max_size[0], image2d_max_size[1]);
+ fprintf(fp,"blocks:\n");
+ clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
+ fprintf (fp, " max_compute_units: %u\n", units);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+ fprintf (fp, " max_work_group_size: %lu\n", max_work_group_size);
+ fprintf(fp,"features:\n");
+ clGetDeviceInfo(mocl.device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
+ fprintf (fp, " device version : %s\n", name);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+ fprintf (fp, " device type: %d\n", (int) device_type);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(units), &units, NULL);
+ fprintf (fp, " device max_clock_frequency: %u\n", units);
+ clGetDeviceInfo(mocl.device, CL_DRIVER_VERSION, sizeof(name), name, NULL);
+ fprintf (fp, " driver version : %s\n", name);
+
+ fclose (fp);
+ }
}
+ // builds OpenCL kernels
build_kernels();
+
}
+
#define xQUOTE(str) #str
#define QUOTE(str) xQUOTE(str)
@@ -306,54 +352,63 @@ static void initialize_ocl_device(const char *platform_filter, const char *devic
#define _OCL_GPU_CFLAGS ""
#endif
+/* ----------------------------------------------------------------------------------------------- */
+
#define PARAMETER_STR_SIZE 1024
+
void build_kernels (void) {
+
static char parameters[PARAMETER_STR_SIZE] = _OCL_GPU_CFLAGS " ";
cl_int errcode;
char *pos = parameters + strlen(_OCL_GPU_CFLAGS) + 1;
int len = PARAMETER_STR_SIZE;
int i;
+ // adds preprocessor definitions
+ // e.g. -DNDIM=3 -DNGLLX=5 ..
for(i = 0; _macro_to_kernel[i].name != NULL; i++) {
if (!strcmp(_macro_to_kernel[i].name, _macro_to_kernel[i].value)) {
continue;
}
if (!len) {
- printf("ERROR: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
+ printf("Error: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
+ exit(1);
}
int written = snprintf(pos, len, "-D%s=%s ", _macro_to_kernel[i].name, _macro_to_kernel[i].value);
pos += written;
len -= written;
}
+ // debug
+ //printf("building OpenCL kernels: parameters = %s \n",parameters);
+
+ // adds kernels as const char definitions
#include "kernel_inc_cl.c"
+ // defines OpenCL build program macro
#undef BOAST_KERNEL
#define BOAST_KERNEL(__kern_name__) \
- mocl.programs.__kern_name__##_program = clCreateProgramWithSource( \
- mocl.context, 1, \
- &__kern_name__##_program, NULL, clck_(&errcode));\
- mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, \
- 0, NULL, parameters, NULL, NULL); \
+ mocl.programs.__kern_name__##_program = clCreateProgramWithSource( mocl.context, 1, \
+ &__kern_name__##_program, NULL, clck_(&errcode));\
+ mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, 0, NULL, parameters, NULL, NULL);\
if (mocl_errcode != CL_SUCCESS) { \
- fprintf(stderr,"Error: Failed to build program "#__kern_name__": %s\n", \
- clewErrorString(mocl_errcode)); \
+ fprintf(stderr,"OpenCL Error: Failed to build program "#__kern_name__": %s\n", clewErrorString(mocl_errcode)); \
char cBuildLog[10240]; \
- clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, \
- mocl.device, \
- CL_PROGRAM_BUILD_LOG, \
- sizeof(cBuildLog), cBuildLog, NULL ); \
- fprintf(stderr,"%s\n",cBuildLog); \
+ clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, mocl.device, CL_PROGRAM_BUILD_LOG, \
+ sizeof(cBuildLog), cBuildLog, NULL ); \
+ fprintf(stderr,"OpenCL Log: %s\n",cBuildLog); \
exit(1); \
} \
- mocl.kernels.__kern_name__ = clCreateKernel ( \
- mocl.programs.__kern_name__ ## _program, \
- #__kern_name__ , clck_(&errcode));
+ mocl.kernels.__kern_name__ = clCreateKernel (mocl.programs.__kern_name__ ## _program, #__kern_name__ , clck_(&errcode));
+ // builds each OpenCL kernel
#include "kernel_list.h"
+
}
+/* ----------------------------------------------------------------------------------------------- */
+
struct _opencl_version {
cl_uint minor;
cl_uint major;
@@ -362,188 +417,305 @@ struct _opencl_version opencl_version_1_0 = {1,0};
struct _opencl_version opencl_version_1_1 = {1,1};
struct _opencl_version opencl_version_1_2 = {1,2};
+/* ----------------------------------------------------------------------------------------------- */
+
cl_int compare_opencl_version(struct _opencl_version v1, struct _opencl_version v2) {
- if(v1.major > v2.major)
+ if (v1.major > v2.major)
return 1;
- if(v1.major < v2.major)
+ if (v1.major < v2.major)
return -1;
- if(v1.minor > v2.minor)
+ if (v1.minor > v2.minor)
return 1;
- if(v1.minor < v2.minor)
+ if (v1.minor < v2.minor)
return -1;
return 0;
}
+/* ----------------------------------------------------------------------------------------------- */
+
static void get_platform_version(cl_platform_id platform_id, struct _opencl_version *version) {
- size_t cl_platform_version_size;
- clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
- char *cl_platform_version;
- cl_platform_version = (char *) malloc(cl_platform_version_size);
+ size_t cl_platform_version_size;
+ clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
- if (cl_platform_version == NULL) {
- fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
- exit(1);
- }
+ char *cl_platform_version;
+ cl_platform_version = (char *) malloc(cl_platform_version_size);
- clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
- //OpenCL<space><major_version.minor_version><space><platform-specific information>
- char minor[2], major[2];
- major[0] = cl_platform_version[7];
- major[1] = 0;
- minor[0] = cl_platform_version[9];
- minor[1] = 0;
- version->major = atoi(major);
- version->major = atoi(minor);
- free(cl_platform_version);
+ if (cl_platform_version == NULL) {
+ fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
+ exit(1);
+ }
+
+ clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
+
+ //OpenCL<space><major_version.minor_version><space><platform-specific information>
+ char minor[2], major[2];
+ major[0] = cl_platform_version[7];
+ major[1] = 0;
+ minor[0] = cl_platform_version[9];
+ minor[1] = 0;
+ version->major = atoi(major);
+ version->major = atoi(minor);
+ free(cl_platform_version);
}
+/* ----------------------------------------------------------------------------------------------- */
+
#define OCL_DEV_TYPE CL_DEVICE_TYPE_ALL
+
void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
- cl_int errcode = CL_SUCCESS;
- cl_platform_id *platform_ids;
- cl_uint num_platforms;
- clGetPlatformIDs(0, NULL, &num_platforms);
+ cl_int errcode = CL_SUCCESS;
+ cl_platform_id *platform_ids;
+ cl_uint num_platforms;
- if (num_platforms == 0) {
- fprintf(stderr,"No OpenCL platform available!\n");
- exit(1);
- }
+ // first OpenCL call
+ // only gets number of platforms
+ clCheck( clGetPlatformIDs(0, NULL, &num_platforms) );
+
+ // checks if OpenCL platforms available
+ if (num_platforms == 0) {
+ fprintf(stderr,"OpenCL error: No OpenCL platform available!\n");
+ exit(1);
+ }
- platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
+ platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
- clGetPlatformIDs(num_platforms, platform_ids, NULL);
+ // gets platform infos
+ clCheck( clGetPlatformIDs(num_platforms, platform_ids, NULL));
- cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
- if (strlen(platform_filter)) {
- cl_uint found = 0;
- cl_uint i;
+ cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
- for (i = 0; i < num_platforms && !found; i++) {
- size_t info_length;
- char *info;
+ // temporary array to store infos
+ int i,j;
+ char *info_all[num_platforms][2];
+ // initializes pointers
+ for (i = 0; i < num_platforms; i++) {
+ info_all[i][0] = NULL;
+ info_all[i][1] = NULL;
+ }
- int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
- int j;
+ // looks for platform matching GPU_PLATFORM string given in Par_file
+ if (strlen(platform_filter)) {
+ cl_uint found = 0;
- for (j = 0; j < 2 && !found; j++) {
- clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length);
+ for (i = 0; i < num_platforms && !found; i++) {
+ size_t info_length;
+ char *info;
- info = (char *) malloc(info_length * sizeof(char));
+ // checks vendor and platform names for matching with GPU_PLATFORM
+ int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
+ for (j = 0; j < 2 && !found; j++) {
+ // gets property info length
+ clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length));
- clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL);
+ // checks info
+ if (info_length == 0) {
+ fprintf(stderr,"OpenCL error: No OpenCL platform info available!\n");
+ exit(1);
+ }
- if (strcasestr(info, platform_filter)) {
- properties[1] = (cl_context_properties) platform_ids[i];
- found = 1;
- }
+ // allocates info buffer and gets info string
+ info = (char *) malloc(info_length * sizeof(char));
+ clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL));
- free(info);
+ // stores info
+ info_all[i][j] = malloc( strlen(info) + 1);
+ strcpy(info_all[i][j],info);
+
+ // sets matching platform id
+ if (strcasestr(info, platform_filter)) {
+ properties[1] = (cl_context_properties) platform_ids[i];
+ found = 1;
}
+ // frees temporary array
+ free(info);
}
+ }
- if (!found) {
- fprintf(stderr, "No matching OpenCL platform available : %s!\n", platform_filter);
- exit(1);
+ // checks if platform found
+ if (!found) {
+ if (myrank == 0) {
+ fprintf(stderr, "\nAvailable platforms are:\n");
+ for (i = 0; i < num_platforms; i++) {
+ if (info_all[i][0]) { fprintf(stderr, " platform %i: vendor = %s , name = %s\n",i,info_all[i][0],info_all[i][1]);}
+ }
+ fprintf(stderr, "Please check your parameter GPU_PLATFORM in Par_file\n\n");
+ }
+ // frees info array
+ for (i = 0; i < num_platforms; i++) {
+ if (info_all[i][0]) { free(info_all[i][0]); }
+ if (info_all[i][1]) { free(info_all[i][1]); }
}
- } else {
- properties[1] = (cl_context_properties) platform_ids[0];
+ // exits
+ fprintf(stderr, "No matching OpenCL platform available : %s\n", platform_filter);
+ exit(1);
}
- if (strlen(device_filter)) {
- cl_uint found = 0;
- cl_uint i;
- cl_uint num_devices;
- cl_device_id *device_ids;
- cl_device_id *matching_device_ids;
-
- clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices);
- if (num_devices == 0) {
- fprintf(stderr,"No device of type %d!\n", (int) OCL_DEV_TYPE);
- exit(1);
- }
+ // frees info array
+ for (i = 0; i < num_platforms; i++) {
+ if (info_all[i][0]) { free(info_all[i][0]); }
+ if (info_all[i][1]) { free(info_all[i][1]); }
+ }
- device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
+ } else {
+ // wild-card platform filter given (GPU_PLATFORM set to '*'), takes first platform
+ properties[1] = (cl_context_properties) platform_ids[0];
+ }
- matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
+ // searches for device
+ if (strlen(device_filter)) {
+ cl_uint found = 0;
+ cl_uint i;
+ cl_uint num_devices;
+ cl_device_id *device_ids;
+ cl_device_id *matching_device_ids;
- clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL);
- for (i = 0; i < num_devices; i++) {
- size_t info_length;
- char *info;
+ // only gets number of devices for this platform
+ clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices));
- clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length);
+ // checks
+ if (num_devices == 0) {
+ fprintf(stderr,"No OpenCL device of type %d!\n", (int) OCL_DEV_TYPE);
+ exit(1);
+ }
- info = (char *) malloc(info_length * sizeof(char));
+ device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
- clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL);
- if (strcasestr(info, device_filter)) {
- matching_device_ids[found] = device_ids[i];
- found++;
- }
+ matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
- free(info);
+ // gets device infos
+ clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL));
+
+ // temporary array to store device infos
+ char *info_device_all[num_devices];
+ // initializes pointers
+ for (i = 0; i < num_devices; i++) {
+ info_device_all[i] = NULL;
+ }
+
+ // searches device matching GPU_DEVICE string
+ for (i = 0; i < num_devices; i++) {
+ size_t info_length;
+ char *info;
+
+ clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length));
+
+ info = (char *) malloc(info_length * sizeof(char));
+
+ clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL));
+
+ // stores info
+ info_device_all[i] = malloc( strlen(info) + 1);
+ strcpy(info_device_all[i],info);
+
+ // sets matching device id
+ if (strcasestr(info, device_filter)) {
+ matching_device_ids[found] = device_ids[i];
+ found++;
}
- if (!found) {
- fprintf(stderr, "No matching OpenCL device available : %s!\n", device_filter);
- exit(1);
+ free(info);
+ }
+
+ if (!found) {
+ // user output
+ if (myrank == 0) {
+ fprintf(stderr, "\nAvailable devices are:\n");
+ for (i = 0; i < num_devices; i++) {
+ if (info_device_all[i]) { fprintf(stderr, " device %i: name = %s\n",i,info_device_all[i]);}
+ }
+ fprintf(stderr, "Please check your parameter GPU_DEVICE in Par_file\n\n");
+ }
+ // frees info array
+ for (i = 0; i < num_devices; i++) {
+ if (info_device_all[i]) { free(info_device_all[i]); }
}
+ // exits
+ fprintf(stderr, "No matching OpenCL device available : %s\n", device_filter);
+ exit(1);
+ }
+
+ // creates an OpenCL context
+ mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
- mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
- free (matching_device_ids);
- free (device_ids);
- } else {
- mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
+ // frees temporary arrays
+ free (matching_device_ids);
+ free (device_ids);
+ // frees info array
+ for (i = 0; i < num_devices; i++) {
+ if (info_device_all[i]) { free(info_device_all[i]); }
}
- //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
- struct _opencl_version platform_version;
- get_platform_version((cl_platform_id) properties[1], &platform_version);
+ } else {
+ // wild-card GPU_DEVICE set to '*'
+ mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
+ }
+
+ //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
+ struct _opencl_version platform_version;
+ get_platform_version((cl_platform_id) properties[1], &platform_version);
+
#ifdef CL_VERSION_1_1
- if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
- clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
- } else
+ if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
+ clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
+ } else
#endif
- {
- size_t nContextDescriptorSize;
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
- *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
- }
- mocl.nb_devices = *nb_devices;
- free(platform_ids);
+ {
+ size_t nContextDescriptorSize;
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
+ *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
+ }
+
+ // stores info in mesh opencl structure
+ mocl.nb_devices = *nb_devices;
+ free(platform_ids);
- size_t szParmDataBytes;
- cl_device_id* cdDevices;
+ size_t szParmDataBytes;
+ cl_device_id* cdDevices;
- // get the list of GPU devices associated with context
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
- cdDevices = (cl_device_id *) malloc(szParmDataBytes);
+ // get the list of GPU devices associated with this context
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
+ cdDevices = (cl_device_id *) malloc(szParmDataBytes);
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
- mocl.device = cdDevices[myrank % mocl.nb_devices];
- free(cdDevices);
+ mocl.device = cdDevices[myrank % mocl.nb_devices];
+ free(cdDevices);
- mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
- mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+ // command kernel queues
+ mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+ if (GPU_ASYNC_COPY) {
+ mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+ }
}
#endif
+/* ----------------------------------------------------------------------------------------------- */
+// GPU initialization
+/* ----------------------------------------------------------------------------------------------- */
+
#define isspace(c) ((c) == ' ')
static char *trim_and_default(char *s)
{
// trim before
- while (*s != '\0' && isspace(*s)) s++;
+ while (*s != '\0' && isspace(*s)) { s++; }
if (*s == '\0') {
return s;
}
+ // note: the platform_filter argument acts weird on apple platforms, giving a string "NVIDIA Geforce", instead of just "NVIDIA" and "Geforce"
+ // here we assume that maximum length of GPU_PLATFORM is 11 characters
+ // todo - find better way to avoid this?
+ // debug
+ //printf("string: %s has length %i \n",s,strlen(s));
+ int len = strlen(s);
+ if (len > 11 ) len = 11;
+
// trim after
- char *back = s + strlen(s);
+ char *back = s + len;
while (isspace(*--back));
*(back + 1) = '\0';
@@ -555,38 +727,57 @@ static char *trim_and_default(char *s)
return s;
}
+/* ----------------------------------------------------------------------------------------------- */
+
enum gpu_runtime_e {COMPILE, CUDA, OPENCL};
+
extern EXTERN_LANG
void FC_FUNC_ (initialize_gpu_device,
INITIALIZE_GPU_DEVICE) (int *runtime_f, char *platform_filter, char *device_filter, int *myrank_f, int *nb_devices) {
+
TRACE ("initialize_device");
enum gpu_runtime_e runtime_type = (enum gpu_runtime_e) *runtime_f;
+ // trims GPU_PLATFORM and GPU_DEVICE strings
platform_filter = trim_and_default(platform_filter);
device_filter = trim_and_default(device_filter);
+ // sets and checks gpu runtime flags
#if defined(USE_OPENCL) && defined(USE_CUDA)
run_cuda = runtime_type == CUDA;
run_opencl = runtime_type == OPENCL;
if (runtime_type == COMPILE) {
- printf("ERROR: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled ...\n", COMPILE, OPENCL, CUDA);
+ if (*myrank_f == 0) {
+ printf("\
+Error: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled.\n\
+Please set Par_file accordingly...\n\n", COMPILE, OPENCL, CUDA);
+ }
exit(1);
}
#elif defined(USE_OPENCL)
run_opencl = 1;
if (runtime_type != COMPILE && runtime_type != OPENCL) {
- printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d). Defaulting to OpenCL.\n", runtime_type, OPENCL, COMPILE);
+ if (*myrank_f == 0) {
+ printf("\
+Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d).\n\
+This simulation will continue using the OpenCL runtime...\n\n", runtime_type, OPENCL, COMPILE);
+ }
}
#elif defined(USE_CUDA)
run_cuda = 1;
if (runtime_type != COMPILE && runtime_type != CUDA) {
- printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d). Defaulting to Cuda.\n", runtime_type, CUDA, COMPILE);
+ if (*myrank_f == 0) {
+ printf("\
+Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d).\n\
+This simulation will continue using the Cuda runtime...\n", runtime_type, CUDA, COMPILE);
+ }
}
#else
- #error "GPU code compiled but neither Cuda nor OpenCL are enabled"
+ #error "GPU code compiled but neither CUDA nor OpenCL are enabled"
#endif
+ // initializes gpu cards
#ifdef USE_OPENCL
if (run_opencl) {
initialize_ocl_device(platform_filter, device_filter, *myrank_f, nb_devices);
More information about the CIG-COMMITS
mailing list