[cig-commits] [commit] devel, master: remove texture definition when not used (92e5c5e)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:32:51 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit 92e5c5ee0cf0fba6b17a75c2fab5ec023804ba7a
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date: Thu Oct 23 14:22:30 2014 +0200
remove texture definition when not used
>---------------------------------------------------------------
92e5c5ee0cf0fba6b17a75c2fab5ec023804ba7a
src/gpu/compute_forces_crust_mantle_gpu.c | 6 +-
src/gpu/compute_forces_inner_core_gpu.c | 4 +-
src/gpu/compute_forces_outer_core_gpu.c | 3 +-
src/gpu/initialize_gpu.c | 558 ++++++++++--------------------
src/gpu/mesh_constants_gpu.h | 6 +-
src/gpu/prepare_mesh_constants_gpu.c | 20 +-
6 files changed, 204 insertions(+), 393 deletions(-)
diff --git a/src/gpu/compute_forces_crust_mantle_gpu.c b/src/gpu/compute_forces_crust_mantle_gpu.c
index b265b7a..ad29707 100644
--- a/src/gpu/compute_forces_crust_mantle_gpu.c
+++ b/src/gpu/compute_forces_crust_mantle_gpu.c
@@ -262,7 +262,7 @@ void crust_mantle (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_density_table.ocl));
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_wgll_cube.ocl));
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_CRUST_MANTLE_STRAIN_ONLY));
-
+#ifdef USE_TEXTURES_FIELDS
if (FORWARD_OR_ADJOINT == 1) {
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_cm_tex));
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_cm_tex));
@@ -270,8 +270,10 @@ void crust_mantle (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_cm_tex));
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_cm_tex));
}
+#endif
+#ifdef USE_TEXTURES_CONSTANTS
clCheck (clSetKernelArg (*crust_mantle_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_hprime_xx_cm_tex));
-
+#endif
local_work_size[0] = blocksize;
local_work_size[1] = 1;
global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/compute_forces_inner_core_gpu.c b/src/gpu/compute_forces_inner_core_gpu.c
index 6df30ba..448159c 100644
--- a/src/gpu/compute_forces_inner_core_gpu.c
+++ b/src/gpu/compute_forces_inner_core_gpu.c
@@ -221,7 +221,7 @@ void inner_core (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_wgll_cube.ocl));
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_INNER_CORE_STRAIN_ONLY));
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_INNER_CORE));
-
+#ifdef USE_TEXTURES_FIELDS
if (FORWARD_OR_ADJOINT == 1) {
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_ic_tex));
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_ic_tex));
@@ -229,7 +229,7 @@ void inner_core (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_ic_tex));
clCheck (clSetKernelArg (*inner_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_ic_tex));
}
-
+#endif
local_work_size[0] = blocksize;
local_work_size[1] = 1;
global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/compute_forces_outer_core_gpu.c b/src/gpu/compute_forces_outer_core_gpu.c
index 14acdca..06d1985 100644
--- a/src/gpu/compute_forces_outer_core_gpu.c
+++ b/src/gpu/compute_forces_outer_core_gpu.c
@@ -142,6 +142,7 @@ void outer_core (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &d_b_B_array_rotation.ocl));
}
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (int), (void *) &mp->NSPEC_OUTER_CORE));
+#ifdef USE_TEXTURES_FIELDS
if (FORWARD_OR_ADJOINT == 1) {
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_displ_oc_tex));
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_accel_oc_tex));
@@ -149,7 +150,7 @@ void outer_core (int nb_blocks_to_compute, Mesh *mp,
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_displ_oc_tex));
clCheck (clSetKernelArg (*outer_core_kernel_p, idx++, sizeof (cl_mem), (void *) &mp->d_b_accel_oc_tex));
}
-
+#endif
local_work_size[0] = blocksize;
local_work_size[1] = 1;
global_work_size[0] = num_blocks_x * blocksize;
diff --git a/src/gpu/initialize_gpu.c b/src/gpu/initialize_gpu.c
index 95ee58a..c6ffe8c 100644
--- a/src/gpu/initialize_gpu.c
+++ b/src/gpu/initialize_gpu.c
@@ -32,8 +32,8 @@
// GPU initialization
+/* macro definitions used in GPU kernels */
#ifdef USE_OPENCL
-// macro definitions used in GPU kernels
#define STR(x) #x
#define PASS(x) {#x, STR(x)}
@@ -42,7 +42,7 @@ static struct {
const char *name;
const char *value;
} _macro_to_kernel[] = {
- // macro values
+ /* macro values */
PASS(NDIM),
PASS(NGLLX), PASS(NGLL2), PASS(NGLL3), PASS(NGLL3_PADDED),
PASS(N_SLS),
@@ -51,7 +51,7 @@ static struct {
PASS(COLORING_MIN_NSPEC_OUTER_CORE), PASS(COLORING_MIN_NSPEC_INNER_CORE),
PASS(R_EARTH_KM),
- // macro functions: not working yet, spaces not allowed in OCL compiler
+ /* macro functions: not working yet, spaces not allowed in OCL compiler*/
/* PASS(INDEX2(xsize, x, y)),
PASS(INDEX3(xsize, ysize, x, y, z)),
@@ -60,7 +60,7 @@ static struct {
PASS(INDEX5(xsize, ysize, zsize, isize, x, y, z, i, j)),
PASS(INDEX6(xsize, ysize, zsize, isize, jsize, x, y, z, i, j, k)), */
- // macro flags, passed only ifdefed
+ /* macro flags, passed only ifdefed */
PASS(MANUALLY_UNROLLED_LOOPS), PASS(USE_TEXTURES_CONSTANTS), PASS(USE_TEXTURES_FIELDS),
PASS(USE_LAUNCH_BOUNDS),
@@ -72,18 +72,13 @@ static struct {
/* ----------------------------------------------------------------------------------------------- */
-// gpu runtime flags
int run_cuda = 0;
int run_opencl = 0;
/* ----------------------------------------------------------------------------------------------- */
-// CUDA initialization
-/* ----------------------------------------------------------------------------------------------- */
#ifdef USE_CUDA
-// initializes CUDA devices
-
static void initialize_cuda_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
int device_count = 0;
@@ -96,12 +91,12 @@ static void initialize_cuda_device(const char *platform_filter, const char *devi
//
// being verbose and catches error from first call to CUDA runtime function, without synchronize call
cudaError_t err = cudaGetLastError();
- if (err != cudaSuccess) {
+ if (err != cudaSuccess){
fprintf(stderr,"Error after cudaGetDeviceCount: %s\n", cudaGetErrorString(err));
- exit_on_error("\
-CUDA runtime error: cudaGetDeviceCount failed\n\n\
+ exit_on_error("CUDA runtime error: cudaGetDeviceCount failed\n\n\
please check if driver and runtime libraries work together\n\
-or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n");
+or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multiple MPI processes\n\n\
+exiting...\n");
}
// returns device count to fortran
@@ -133,7 +128,7 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
}
if (nbMatchingDevices == 0) {
- printf("Error: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
+ printf("ERROR: no matching devices for criteria %s/%s\n", platform_filter, device_filter);
exit(1);
}
@@ -144,7 +139,7 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
cudaGetDeviceProperties(&deviceProp, myDevice);
// exit if the machine has no CUDA-enabled device
- if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
+ if (deviceProp.major == 9999 && deviceProp.minor == 9999){
fprintf(stderr,"No CUDA-enabled device found, exiting...\n\n");
exit_on_error("CUDA runtime error: there is no CUDA-enabled device found\n");
}
@@ -160,15 +155,15 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
}
// debugging
- if (DEBUG) {
+ if (DEBUG){
do_output_info = 1;
sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
}
// output to file
- if (do_output_info) {
+ if( do_output_info ){
fp = fopen(filename,"w");
- if (fp != NULL) {
+ if (fp != NULL){
// display device properties
fprintf(fp,"Device Name = %s\n",deviceProp.name);
fprintf(fp,"memory:\n");
@@ -187,17 +182,17 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
fprintf(fp,"features:\n");
fprintf(fp," Compute capability of the device = %d.%d\n", deviceProp.major, deviceProp.minor);
fprintf(fp," multiProcessorCount: %d\n",deviceProp.multiProcessorCount);
- if (deviceProp.canMapHostMemory) {
+ if(deviceProp.canMapHostMemory){
fprintf(fp," canMapHostMemory: TRUE\n");
}else{
fprintf(fp," canMapHostMemory: FALSE\n");
}
- if (deviceProp.deviceOverlap) {
+ if(deviceProp.deviceOverlap){
fprintf(fp," deviceOverlap: TRUE\n");
}else{
fprintf(fp," deviceOverlap: FALSE\n");
}
- if (deviceProp.concurrentKernels) {
+ if(deviceProp.concurrentKernels){
fprintf(fp," concurrentKernels: TRUE\n");
}else{
fprintf(fp," concurrentKernels: FALSE\n");
@@ -215,17 +210,17 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
}
// make sure that the device has compute capability >= 1.3
- if (deviceProp.major < 1) {
+ if (deviceProp.major < 1){
fprintf(stderr,"Compute capability major number should be at least 1, exiting...\n\n");
exit_on_error("CUDA Compute capability major number should be at least 1\n");
}
- if (deviceProp.major == 1 && deviceProp.minor < 3) {
+ if (deviceProp.major == 1 && deviceProp.minor < 3){
fprintf(stderr,"Compute capability should be at least 1.3, exiting...\n");
exit_on_error("CUDA Compute capability major number should be at least 1.3\n");
}
// we use pinned memory for asynchronous copy
if (GPU_ASYNC_COPY) {
- if (! deviceProp.canMapHostMemory) {
+ if (! deviceProp.canMapHostMemory){
fprintf(stderr,"Device capability should allow to map host memory, exiting...\n");
exit_on_error("CUDA Device capability canMapHostMemory should be TRUE\n");
}
@@ -258,91 +253,50 @@ or on titan enable environment: CRAY_CUDA_PROXY=1 to use single GPU with multipl
}
#endif
-/* ----------------------------------------------------------------------------------------------- */
-// OpenCL initialization
-/* ----------------------------------------------------------------------------------------------- */
-
#ifdef USE_OPENCL
-
-// OpenCL mesh
struct _mesh_opencl mocl;
-// function definitions
cl_device_id oclGetMyDevice(int rank);
void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices);
-void build_kernels (void);
-// initializes OpenCL devices
+void build_kernels (void);
static void initialize_ocl_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
-
- // selects device
ocl_select_device(platform_filter, device_filter, myrank, nb_devices);
// outputs device info to file
char filename[BUFSIZ];
FILE *fp;
- int do_output_info = 0;
-
- // by default, only master process outputs device info to avoid file cluttering
- if (myrank == 0) {
- do_output_info = 1;
- sprintf(filename, "OUTPUT_FILES/gpu_device_info.txt");
+ sprintf (filename, "OUTPUT_FILES/gpu_device_info_proc_%06d.txt", myrank);
+ fp = fopen (filename, "a+");
+ if (fp) {
+ cl_device_type device_type;
+ size_t max_work_group_size;
+ cl_ulong local_mem_size;
+ cl_uint max_compute_units;
+ char name[1024];
+ size_t image2d_max_size[2];
+ // display device properties
+ clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(local_mem_size), &local_mem_size, NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
+ clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
+
+ fprintf (fp, "Device Name = %s\n", name);
+ fprintf (fp, "Type: %d\n", (int) device_type);
+ fprintf (fp, "local_mem_size: %zu\n", local_mem_size);
+ fprintf (fp, "max_compute_units: %u\n", max_compute_units);
+ fprintf (fp, "max_work_group_size: %lu\n", max_work_group_size);
+ fprintf (fp, "image2d_max_size: %zux%zu\n", image2d_max_size[0], image2d_max_size[1]);
+
+ fclose (fp);
}
- // debugging
- if (DEBUG) {
- do_output_info = 1;
- sprintf(filename,"OUTPUT_FILES/gpu_device_info_proc_%06d.txt",myrank);
- }
-
- // output to file
- if (do_output_info) {
- fp = fopen(filename,"w");
- if (fp != NULL) {
- cl_device_type device_type;
- size_t max_work_group_size;
- cl_ulong mem_size;
- cl_uint units;
- char name[1024];
- size_t image2d_max_size[2];
- // display device properties
- clGetDeviceInfo(mocl.device, CL_DEVICE_NAME, sizeof(name), name, NULL);
- fprintf (fp, "Device Name = %s\n", name);
- clGetDeviceInfo(mocl.device, CL_DEVICE_VENDOR, sizeof(name), name, NULL);
- fprintf (fp, "Device Vendor = %s\n", name);
- fprintf (fp, "Memory:\n");
- clGetDeviceInfo(mocl.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
- fprintf (fp, " local_mem_size (in KB) : %f\n", mem_size / 1024.f);
- clGetDeviceInfo(mocl.device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(mem_size), &mem_size, NULL);
- fprintf (fp, " global_mem_size (in MB): %f\n", mem_size / (1024.f * 1024.f));
- clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_size[0], NULL);
- clGetDeviceInfo(mocl.device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_size[1], NULL);
- fprintf (fp, " image2d_max_size: %zu x %zu\n", image2d_max_size[0], image2d_max_size[1]);
- fprintf(fp,"blocks:\n");
- clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(units), &units, NULL);
- fprintf (fp, " max_compute_units: %u\n", units);
- clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
- fprintf (fp, " max_work_group_size: %lu\n", max_work_group_size);
- fprintf(fp,"features:\n");
- clGetDeviceInfo(mocl.device, CL_DEVICE_VERSION, sizeof(name), name, NULL);
- fprintf (fp, " device version : %s\n", name);
- clGetDeviceInfo(mocl.device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
- fprintf (fp, " device type: %d\n", (int) device_type);
- clGetDeviceInfo(mocl.device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(units), &units, NULL);
- fprintf (fp, " device max_clock_frequency: %u\n", units);
- clGetDeviceInfo(mocl.device, CL_DRIVER_VERSION, sizeof(name), name, NULL);
- fprintf (fp, " driver version : %s\n", name);
-
- fclose (fp);
- }
- }
-
- // builds OpenCL kernels
build_kernels();
-
}
-
#define xQUOTE(str) #str
#define QUOTE(str) xQUOTE(str)
@@ -352,62 +306,62 @@ static void initialize_ocl_device(const char *platform_filter, const char *devic
#define _OCL_GPU_CFLAGS ""
#endif
-/* ----------------------------------------------------------------------------------------------- */
-
#define PARAMETER_STR_SIZE 1024
-
void build_kernels (void) {
-
static char parameters[PARAMETER_STR_SIZE] = _OCL_GPU_CFLAGS " ";
cl_int errcode;
char *pos = parameters + strlen(_OCL_GPU_CFLAGS) + 1;
int len = PARAMETER_STR_SIZE;
int i;
- // adds preprocessor definitions
- // e.g. -DNDIM=3 -DNGLLX=5 ..
for(i = 0; _macro_to_kernel[i].name != NULL; i++) {
if (!strcmp(_macro_to_kernel[i].name, _macro_to_kernel[i].value)) {
continue;
}
if (!len) {
- printf("Error: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
- exit(1);
+ printf("ERROR: OpenCL buffer for macro parameters is not large enough, please review its size (%s:%d)\n", __FILE__, __LINE__);
}
int written = snprintf(pos, len, "-D%s=%s ", _macro_to_kernel[i].name, _macro_to_kernel[i].value);
pos += written;
len -= written;
}
- // debug
- //printf("building OpenCL kernels: parameters = %s \n",parameters);
-
- // adds kernels as const char definitions
#include "kernel_inc_cl.c"
- // defines OpenCL build program macro
#undef BOAST_KERNEL
#define BOAST_KERNEL(__kern_name__) \
- mocl.programs.__kern_name__##_program = clCreateProgramWithSource( mocl.context, 1, \
- &__kern_name__##_program, NULL, clck_(&errcode));\
- mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, 0, NULL, parameters, NULL, NULL);\
+ mocl.programs.__kern_name__##_program = clCreateProgramWithSource( \
+ mocl.context, 1, \
+ &__kern_name__##_program, NULL, clck_(&errcode));\
+ mocl_errcode = clBuildProgram(mocl.programs.__kern_name__##_program, \
+ 0, NULL, parameters, NULL, NULL); \
if (mocl_errcode != CL_SUCCESS) { \
- fprintf(stderr,"OpenCL Error: Failed to build program "#__kern_name__": %s\n", clewErrorString(mocl_errcode)); \
+ fprintf(stderr,"Error: Failed to build program "#__kern_name__": %s\n", \
+ clewErrorString(mocl_errcode)); \
char cBuildLog[10240]; \
- clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, mocl.device, CL_PROGRAM_BUILD_LOG, \
- sizeof(cBuildLog), cBuildLog, NULL ); \
- fprintf(stderr,"OpenCL Log: %s\n",cBuildLog); \
+ clGetProgramBuildInfo(mocl.programs.__kern_name__##_program, \
+ mocl.device, \
+ CL_PROGRAM_BUILD_LOG, \
+ sizeof(cBuildLog), cBuildLog, NULL ); \
+ fprintf(stderr,"%s\n",cBuildLog); \
exit(1); \
} \
- mocl.kernels.__kern_name__ = clCreateKernel (mocl.programs.__kern_name__ ## _program, #__kern_name__ , clck_(&errcode));
+ mocl.kernels.__kern_name__ = clCreateKernel ( \
+ mocl.programs.__kern_name__ ## _program, \
+ #__kern_name__ , clck_(&errcode));
- // builds each OpenCL kernel
#include "kernel_list.h"
-
}
+void release_kernels (void) {
+#undef BOAST_KERNEL
+#define BOAST_KERNEL(__kern_name__) \
+ clCheck (clReleaseKernel (mocl.kernels.__kern_name__)); \
+ clCheck (clReleaseProgram (mocl.programs.__kern_name__ ## _program));
+
+ #include "kernel_list.h"
+}
-/* ----------------------------------------------------------------------------------------------- */
struct _opencl_version {
cl_uint minor;
@@ -417,305 +371,188 @@ struct _opencl_version opencl_version_1_0 = {1,0};
struct _opencl_version opencl_version_1_1 = {1,1};
struct _opencl_version opencl_version_1_2 = {1,2};
-/* ----------------------------------------------------------------------------------------------- */
-
cl_int compare_opencl_version(struct _opencl_version v1, struct _opencl_version v2) {
- if (v1.major > v2.major)
+ if(v1.major > v2.major)
return 1;
- if (v1.major < v2.major)
+ if(v1.major < v2.major)
return -1;
- if (v1.minor > v2.minor)
+ if(v1.minor > v2.minor)
return 1;
- if (v1.minor < v2.minor)
+ if(v1.minor < v2.minor)
return -1;
return 0;
}
-/* ----------------------------------------------------------------------------------------------- */
-
static void get_platform_version(cl_platform_id platform_id, struct _opencl_version *version) {
+ size_t cl_platform_version_size;
+ clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
- size_t cl_platform_version_size;
- clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, 0, NULL, &cl_platform_version_size));
+ char *cl_platform_version;
+ cl_platform_version = (char *) malloc(cl_platform_version_size);
- char *cl_platform_version;
- cl_platform_version = (char *) malloc(cl_platform_version_size);
-
- if (cl_platform_version == NULL) {
- fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
- exit(1);
- }
+ if (cl_platform_version == NULL) {
+ fprintf(stderr,"Error: Failed to create string (out of memory)!\n");
+ exit(1);
+ }
- clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
-
- //OpenCL<space><major_version.minor_version><space><platform-specific information>
- char minor[2], major[2];
- major[0] = cl_platform_version[7];
- major[1] = 0;
- minor[0] = cl_platform_version[9];
- minor[1] = 0;
- version->major = atoi(major);
- version->major = atoi(minor);
- free(cl_platform_version);
+ clCheck(clGetPlatformInfo(platform_id, CL_PLATFORM_VERSION, cl_platform_version_size, cl_platform_version, NULL));
+ //OpenCL<space><major_version.minor_version><space><platform-specific information>
+ char minor[2], major[2];
+ major[0] = cl_platform_version[7];
+ major[1] = 0;
+ minor[0] = cl_platform_version[9];
+ minor[1] = 0;
+ version->major = atoi(major);
+ version->major = atoi(minor);
+ free(cl_platform_version);
}
-/* ----------------------------------------------------------------------------------------------- */
-
#define OCL_DEV_TYPE CL_DEVICE_TYPE_ALL
-
void ocl_select_device(const char *platform_filter, const char *device_filter, int myrank, int *nb_devices) {
+ cl_int errcode = CL_SUCCESS;
+ cl_platform_id *platform_ids;
+ cl_uint num_platforms;
- cl_int errcode = CL_SUCCESS;
- cl_platform_id *platform_ids;
- cl_uint num_platforms;
-
- // first OpenCL call
- // only gets number of platforms
- clCheck( clGetPlatformIDs(0, NULL, &num_platforms) );
-
- // checks if OpenCL platforms available
- if (num_platforms == 0) {
- fprintf(stderr,"OpenCL error: No OpenCL platform available!\n");
- exit(1);
- }
+ clGetPlatformIDs(0, NULL, &num_platforms);
- platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
+ if (num_platforms == 0) {
+ fprintf(stderr,"No OpenCL platform available!\n");
+ exit(1);
+ }
- // gets platform infos
- clCheck( clGetPlatformIDs(num_platforms, platform_ids, NULL));
+ platform_ids = (cl_platform_id *) malloc(num_platforms * sizeof(cl_platform_id));
- cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
+ clGetPlatformIDs(num_platforms, platform_ids, NULL);
- // temporary array to store infos
- int i,j;
- char *info_all[num_platforms][2];
- // initializes pointers
- for (i = 0; i < num_platforms; i++) {
- info_all[i][0] = NULL;
- info_all[i][1] = NULL;
- }
+ cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, 0, 0 };
+ if (strlen(platform_filter)) {
+ cl_uint found = 0;
+ cl_uint i;
- // looks for platform matching GPU_PLATFORM string given in Par_file
- if (strlen(platform_filter)) {
- cl_uint found = 0;
+ for (i = 0; i < num_platforms && !found; i++) {
+ size_t info_length;
+ char *info;
- for (i = 0; i < num_platforms && !found; i++) {
- size_t info_length;
- char *info;
+ int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
+ int j;
- // checks vendor and platform names for matching with GPU_PLATFORM
- int props_to_check[] = {CL_PLATFORM_VENDOR, CL_PLATFORM_NAME};
- for (j = 0; j < 2 && !found; j++) {
- // gets property info length
- clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length));
+ for (j = 0; j < 2 && !found; j++) {
+ clGetPlatformInfo(platform_ids[i], props_to_check[j], 0, NULL, &info_length);
- // checks info
- if (info_length == 0) {
- fprintf(stderr,"OpenCL error: No OpenCL platform info available!\n");
- exit(1);
- }
+ info = (char *) malloc(info_length * sizeof(char));
- // allocates info buffer and gets info string
- info = (char *) malloc(info_length * sizeof(char));
- clCheck( clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL));
+ clGetPlatformInfo(platform_ids[i], props_to_check[j], info_length, info, NULL);
- // stores info
- info_all[i][j] = malloc( strlen(info) + 1);
- strcpy(info_all[i][j],info);
+ if (strcasestr(info, platform_filter)) {
+ properties[1] = (cl_context_properties) platform_ids[i];
+ found = 1;
+ }
- // sets matching platform id
- if (strcasestr(info, platform_filter)) {
- properties[1] = (cl_context_properties) platform_ids[i];
- found = 1;
+ free(info);
}
- // frees temporary array
- free(info);
}
- }
- // checks if platform found
- if (!found) {
- if (myrank == 0) {
- fprintf(stderr, "\nAvailable platforms are:\n");
- for (i = 0; i < num_platforms; i++) {
- if (info_all[i][0]) { fprintf(stderr, " platform %i: vendor = %s , name = %s\n",i,info_all[i][0],info_all[i][1]);}
- }
- fprintf(stderr, "Please check your parameter GPU_PLATFORM in Par_file\n\n");
+ if (!found) {
+ fprintf(stderr, "No matching OpenCL platform available : %s!\n", platform_filter);
+ exit(1);
}
- // frees info array
- for (i = 0; i < num_platforms; i++) {
- if (info_all[i][0]) { free(info_all[i][0]); }
- if (info_all[i][1]) { free(info_all[i][1]); }
- }
- // exits
- fprintf(stderr, "No matching OpenCL platform available : %s\n", platform_filter);
- exit(1);
+ } else {
+ properties[1] = (cl_context_properties) platform_ids[0];
}
- // frees info array
- for (i = 0; i < num_platforms; i++) {
- if (info_all[i][0]) { free(info_all[i][0]); }
- if (info_all[i][1]) { free(info_all[i][1]); }
- }
-
- } else {
- // wild-card platform filter given (GPU_PLATFORM set to '*'), takes first platform
- properties[1] = (cl_context_properties) platform_ids[0];
- }
-
- // searches for device
- if (strlen(device_filter)) {
- cl_uint found = 0;
- cl_uint i;
- cl_uint num_devices;
- cl_device_id *device_ids;
- cl_device_id *matching_device_ids;
-
- // only gets number of devices for this platform
- clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices));
-
- // checks
- if (num_devices == 0) {
- fprintf(stderr,"No OpenCL device of type %d!\n", (int) OCL_DEV_TYPE);
- exit(1);
- }
-
- device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
-
- matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
-
- // gets device infos
- clCheck( clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL));
+ if (strlen(device_filter)) {
+ cl_uint found = 0;
+ cl_uint i;
+ cl_uint num_devices;
+ cl_device_id *device_ids;
+ cl_device_id *matching_device_ids;
+
+ clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, 0, NULL, &num_devices);
+ if (num_devices == 0) {
+ fprintf(stderr,"No device of type %d!\n", (int) OCL_DEV_TYPE);
+ exit(1);
+ }
- // temporary array to store device infos
- char *info_device_all[num_devices];
- // initializes pointers
- for (i = 0; i < num_devices; i++) {
- info_device_all[i] = NULL;
- }
+ device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
- // searches device matching GPU_DEVICE string
- for (i = 0; i < num_devices; i++) {
- size_t info_length;
- char *info;
+ matching_device_ids = (cl_device_id *) malloc(num_devices * sizeof(cl_device_id));
- clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length));
+ clGetDeviceIDs((cl_platform_id) properties[1], OCL_DEV_TYPE, num_devices, device_ids, NULL);
+ for (i = 0; i < num_devices; i++) {
+ size_t info_length;
+ char *info;
- info = (char *) malloc(info_length * sizeof(char));
+ clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 0, NULL, &info_length);
- clCheck( clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL));
+ info = (char *) malloc(info_length * sizeof(char));
- // stores info
- info_device_all[i] = malloc( strlen(info) + 1);
- strcpy(info_device_all[i],info);
+ clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, info_length, info, NULL);
+ if (strcasestr(info, device_filter)) {
+ matching_device_ids[found] = device_ids[i];
+ found++;
+ }
- // sets matching device id
- if (strcasestr(info, device_filter)) {
- matching_device_ids[found] = device_ids[i];
- found++;
+ free(info);
}
- free(info);
- }
-
- if (!found) {
- // user output
- if (myrank == 0) {
- fprintf(stderr, "\nAvailable devices are:\n");
- for (i = 0; i < num_devices; i++) {
- if (info_device_all[i]) { fprintf(stderr, " device %i: name = %s\n",i,info_device_all[i]);}
- }
- fprintf(stderr, "Please check your parameter GPU_DEVICE in Par_file\n\n");
+ if (!found) {
+ fprintf(stderr, "No matching OpenCL device available : %s!\n", device_filter);
+ exit(1);
}
- // frees info array
- for (i = 0; i < num_devices; i++) {
- if (info_device_all[i]) { free(info_device_all[i]); }
- }
- // exits
- fprintf(stderr, "No matching OpenCL device available : %s\n", device_filter);
- exit(1);
- }
- // creates an OpenCL context
- mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
-
- // frees temporary arrays
- free (matching_device_ids);
- free (device_ids);
- // frees info array
- for (i = 0; i < num_devices; i++) {
- if (info_device_all[i]) { free(info_device_all[i]); }
+ mocl.context = clCreateContext(properties, found, matching_device_ids, NULL, NULL, clck_(&errcode));
+ free (matching_device_ids);
+ free (device_ids);
+ } else {
+ mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
}
- } else {
- // wild-card GPU_DEVICE set to '*'
- mocl.context = clCreateContextFromType(properties, OCL_DEV_TYPE, NULL, NULL, clck_(&errcode));
- }
-
- //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
- struct _opencl_version platform_version;
- get_platform_version((cl_platform_id) properties[1], &platform_version);
-
+ //get the number of devices available in the context (devices which are of DEVICE_TYPE_GPU of platform platform_ids[0])
+ struct _opencl_version platform_version;
+ get_platform_version((cl_platform_id) properties[1], &platform_version);
#ifdef CL_VERSION_1_1
- if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
- clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
- } else
+ if (compare_opencl_version(platform_version, opencl_version_1_1) >= 0 ) {
+ clGetContextInfo(mocl.context, CL_CONTEXT_NUM_DEVICES, sizeof(*nb_devices), nb_devices, NULL);
+ } else
#endif
- {
- size_t nContextDescriptorSize;
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
- *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
- }
-
- // stores info in mesh opencl structure
- mocl.nb_devices = *nb_devices;
- free(platform_ids);
+ {
+ size_t nContextDescriptorSize;
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, 0, &nContextDescriptorSize);
+ *nb_devices = nContextDescriptorSize / sizeof(cl_device_id);
+ }
+ mocl.nb_devices = *nb_devices;
+ free(platform_ids);
- size_t szParmDataBytes;
- cl_device_id* cdDevices;
+ size_t szParmDataBytes;
+ cl_device_id* cdDevices;
- // get the list of GPU devices associated with this context
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
- cdDevices = (cl_device_id *) malloc(szParmDataBytes);
+ // get the list of GPU devices associated with context
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
+ cdDevices = (cl_device_id *) malloc(szParmDataBytes);
- clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
+ clGetContextInfo(mocl.context, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
- mocl.device = cdDevices[myrank % mocl.nb_devices];
- free(cdDevices);
+ mocl.device = cdDevices[myrank % mocl.nb_devices];
+ free(cdDevices);
- // command kernel queues
- mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
- if (GPU_ASYNC_COPY) {
- mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
- }
+ mocl.command_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
+ mocl.copy_queue = clCreateCommandQueue(mocl.context, mocl.device, 0, clck_(&errcode));
}
#endif
-/* ----------------------------------------------------------------------------------------------- */
-// GPU initialization
-/* ----------------------------------------------------------------------------------------------- */
-
#define isspace(c) ((c) == ' ')
static char *trim_and_default(char *s)
{
// trim before
- while (*s != '\0' && isspace(*s)) { s++; }
+ while (*s != '\0' && isspace(*s)) s++;
if (*s == '\0') {
return s;
}
- // note: the platform_filter argument acts weird on apple platforms, giving a string "NVIDIA Geforce", instead of just "NVIDIA" and "Geforce"
- // here we assume that maximum length of GPU_PLATFORM is 11 characters
- // todo - find better way to avoid this?
- // debug
- //printf("string: %s has length %i \n",s,strlen(s));
- int len = strlen(s);
- if (len > 11 ) len = 11;
-
// trim after
- char *back = s + len;
+ char *back = s + strlen(s);
while (isspace(*--back));
*(back + 1) = '\0';
@@ -727,57 +564,38 @@ static char *trim_and_default(char *s)
return s;
}
-/* ----------------------------------------------------------------------------------------------- */
-
enum gpu_runtime_e {COMPILE, CUDA, OPENCL};
-
extern EXTERN_LANG
void FC_FUNC_ (initialize_gpu_device,
INITIALIZE_GPU_DEVICE) (int *runtime_f, char *platform_filter, char *device_filter, int *myrank_f, int *nb_devices) {
-
TRACE ("initialize_device");
enum gpu_runtime_e runtime_type = (enum gpu_runtime_e) *runtime_f;
- // trims GPU_PLATFORM and GPU_DEVICE strings
platform_filter = trim_and_default(platform_filter);
device_filter = trim_and_default(device_filter);
- // sets and checks gpu runtime flags
#if defined(USE_OPENCL) && defined(USE_CUDA)
run_cuda = runtime_type == CUDA;
run_opencl = runtime_type == OPENCL;
if (runtime_type == COMPILE) {
- if (*myrank_f == 0) {
- printf("\
-Error: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled.\n\
-Please set Par_file accordingly...\n\n", COMPILE, OPENCL, CUDA);
- }
+ printf("ERROR: GPU_RUNTIME set to compile time decision (%d), but both OpenCL (%d) and CUDA (%d) are compiled ...\n", COMPILE, OPENCL, CUDA);
exit(1);
}
#elif defined(USE_OPENCL)
run_opencl = 1;
if (runtime_type != COMPILE && runtime_type != OPENCL) {
- if (*myrank_f == 0) {
- printf("\
-Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d).\n\
-This simulation will continue using the OpenCL runtime...\n\n", runtime_type, OPENCL, COMPILE);
- }
+ printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with OpenCL-only compilation (OPENCL=%d, COMPILE=%d). Defaulting to OpenCL.\n", runtime_type, OPENCL, COMPILE);
}
#elif defined(USE_CUDA)
run_cuda = 1;
if (runtime_type != COMPILE && runtime_type != CUDA) {
- if (*myrank_f == 0) {
- printf("\
-Warning: GPU_RUNTIME parameter in Par_file set to (%d) is incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d).\n\
-This simulation will continue using the Cuda runtime...\n", runtime_type, CUDA, COMPILE);
- }
+ printf("WARNING: GPU_RUNTIME parameter (=%d) incompatible with Cuda-only compilation (CUDA=%d, COMPILE=%d). Defaulting to Cuda.\n", runtime_type, CUDA, COMPILE);
}
#else
- #error "GPU code compiled but neither CUDA nor OpenCL are enabled"
+ #error "GPU code compiled but neither Cuda nor OpenCL are enabled"
#endif
- // initializes gpu cards
#ifdef USE_OPENCL
if (run_opencl) {
initialize_ocl_device(platform_filter, device_filter, *myrank_f, nb_devices);
diff --git a/src/gpu/mesh_constants_gpu.h b/src/gpu/mesh_constants_gpu.h
index fa93199..6e51c5d 100644
--- a/src/gpu/mesh_constants_gpu.h
+++ b/src/gpu/mesh_constants_gpu.h
@@ -928,7 +928,7 @@ typedef struct mesh_ {
// specific OpenCL texture arrays
#ifdef USE_OPENCL
// note: need to be defined as they are passed as function arguments
- // USE_TEXTURES_FIELDS
+#ifdef USE_TEXTURES_FIELDS
// forward
cl_mem d_displ_cm_tex;
cl_mem d_accel_cm_tex;
@@ -948,12 +948,14 @@ typedef struct mesh_ {
cl_mem d_b_displ_ic_tex;
cl_mem d_b_accel_ic_tex;
- // USE_TEXTURES_CONSTANTS
+#endif
+#ifdef USE_TEXTURES_CONSTANTS
// hprime
cl_mem d_hprime_xx_cm_tex;
// weighted hprime
cl_mem d_hprimewgll_xx_cm_tex;
#endif
+#endif
// ------------------------------------------------------------------ //
// LDDRK
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index abde0fc..2dd64a7 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -159,9 +159,6 @@ void FC_FUNC_ (prepare_constants_device,
mp->d_hprime_xx_cm_tex = clCreateImage2D (mocl.context, CL_MEM_READ_ONLY, &format, NGLL2, 1, 0, mp->d_hprime_xx.ocl, clck_(&errcode));
mp->d_hprimewgll_xx_cm_tex = clCreateImage2D (mocl.context, CL_MEM_READ_ONLY, &format, NGLL2, 1, 0, mp->d_hprimewgll_xx.ocl, clck_(&errcode));
-#else //USE_TEXTURES_CONSTANTS
- mp->d_hprime_xx_cm_tex = moclGetDummyImage2D(mp);
- mp->d_hprimewgll_xx_cm_tex = moclGetDummyImage2D(mp);
#endif //USE_TEXTURES_CONSTANTS
}
#endif
@@ -1622,12 +1619,6 @@ void FC_FUNC_ (prepare_crust_mantle_device,
mp->d_b_displ_cm_tex = moclGetDummyImage2D(mp);
mp->d_b_accel_cm_tex = moclGetDummyImage2D(mp);
}
-#else
- mp->d_displ_cm_tex = moclGetDummyImage2D(mp);
- mp->d_accel_cm_tex = moclGetDummyImage2D(mp);
- // backward/reconstructed fields
- mp->d_b_displ_cm_tex = moclGetDummyImage2D(mp);
- mp->d_b_accel_cm_tex = moclGetDummyImage2D(mp);
#endif
}
#endif
@@ -2307,12 +2298,6 @@ void FC_FUNC_ (prepare_inner_core_device,
mp->d_b_displ_ic_tex = moclGetDummyImage2D(mp);
mp->d_b_accel_ic_tex = moclGetDummyImage2D(mp);
}
-#else
- mp->d_displ_ic_tex = moclGetDummyImage2D(mp);
- mp->d_accel_ic_tex = moclGetDummyImage2D(mp);
- // backward/reconstructed fields
- mp->d_b_displ_ic_tex = moclGetDummyImage2D(mp);
- mp->d_b_accel_ic_tex = moclGetDummyImage2D(mp);
#endif
}
#endif
@@ -2570,8 +2555,10 @@ void FC_FUNC_ (prepare_cleanup_device,
//------------------------------------------
#ifdef USE_OPENCL
if (run_opencl) {
+#ifdef USE_TEXTURES_CONSTANTS
clReleaseMemObject (mp->d_hprime_xx.ocl);
clReleaseMemObject (mp->d_hprimewgll_xx.ocl);
+#endif
clReleaseMemObject (mp->d_wgllwgll_xy.ocl);
clReleaseMemObject (mp->d_wgllwgll_xz.ocl);
@@ -3046,9 +3033,9 @@ void FC_FUNC_ (prepare_cleanup_device,
gpuFree (&mp->d_normal_ocean_load);
}
+#ifdef USE_TEXTURES_FIELDS
#ifdef USE_OPENCL
if (run_opencl) {
- // note: texture arrays in OpenCL are always allocated (either dummy or valid ones)
clReleaseMemObject (mp->d_displ_cm_tex);
clReleaseMemObject (mp->d_accel_cm_tex);
clReleaseMemObject (mp->d_b_displ_cm_tex);
@@ -3068,6 +3055,7 @@ void FC_FUNC_ (prepare_cleanup_device,
clReleaseMemObject (mp->d_hprimewgll_xx_cm_tex);
}
#endif
+#endif
// synchronizes device
gpuSynchronize();
More information about the CIG-COMMITS
mailing list