[cig-commits] [commit] devel, master: remove reqd_work_group_size from OpenCL kernels (inefficient) (e45b2ec)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Thu Nov 6 08:15:58 PST 2014


Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit e45b2ec8ca697bcad7f9a3f4e5cec8ee84de61b2
Author: Kevin Pouget <kevin.pouget at imag.fr>
Date:   Thu May 15 10:07:53 2014 +0200

    remove reqd_work_group_size from OpenCL kernels (inefficient)


>---------------------------------------------------------------

e45b2ec8ca697bcad7f9a3f4e5cec8ee84de61b2
 src/gpu/boast/inner_core_impl_kernel_forward.rb           | 2 +-
 src/gpu/kernels.gen/crust_mantle_impl_kernel_adjoint_cl.c | 2 +-
 src/gpu/kernels.gen/crust_mantle_impl_kernel_forward_cl.c | 2 +-
 src/gpu/kernels.gen/inner_core_impl_kernel_adjoint_cl.c   | 2 +-
 src/gpu/kernels.gen/inner_core_impl_kernel_forward_cl.c   | 2 +-
 src/gpu/rules.mk                                          | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gpu/boast/inner_core_impl_kernel_forward.rb b/src/gpu/boast/inner_core_impl_kernel_forward.rb
index 881af18..da09cd8 100644
--- a/src/gpu/boast/inner_core_impl_kernel_forward.rb
+++ b/src/gpu/boast/inner_core_impl_kernel_forward.rb
@@ -553,7 +553,7 @@ module BOAST
     if(get_lang == CUDA ) then
       qualifiers = "\n#ifdef #{use_launch_bounds}\n__launch_bounds__(#{ngll3_padded}, #{launch_min_blocks})\n#endif\n"
     elsif(get_lang == CL ) then
-      qualifiers = "__attribute__((reqd_work_group_size(#{ngll3_padded},1,1))) "
+      qualifiers = "" # "__attribute__((reqd_work_group_size(#{ngll3_padded},1,1))) " (inefficient)
     end
 
     p = Procedure(function_name, v, constants, :qualifiers => qualifiers)
diff --git a/src/gpu/kernels.gen/crust_mantle_impl_kernel_adjoint_cl.c b/src/gpu/kernels.gen/crust_mantle_impl_kernel_adjoint_cl.c
index 192c6dd..a1af17b 100644
--- a/src/gpu/kernels.gen/crust_mantle_impl_kernel_adjoint_cl.c
+++ b/src/gpu/kernels.gen/crust_mantle_impl_kernel_adjoint_cl.c
@@ -413,7 +413,7 @@ void compute_element_cm_tiso(const int offset, const __global float * d_kappavst
   *(sigma_xz) = (c15) * (duxdxl) + (c56) * (duxdyl_plus_duydxl) + (c25) * (duydyl) + (c55) * (duzdxl_plus_duxdzl) + (c45) * (duzdyl_plus_duydzl) + (c35) * (duzdzl);\n\
   *(sigma_yz) = (c14) * (duxdxl) + (c46) * (duxdyl_plus_duydxl) + (c24) * (duydyl) + (c45) * (duzdxl_plus_duxdzl) + (c44) * (duzdyl_plus_duydzl) + (c34) * (duzdzl);\n\
 }\n\
-__kernel __attribute__((reqd_work_group_size(NGLL3_PADDED,1,1)))  void crust_mantle_impl_kernel_adjoint(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_ispec_is_tiso, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d
 _wgllwgll_yz, const __global float * restrict d_kappavstore, const __global float * restrict d_muvstore, const __global float * restrict d_kappahstore, const __global float * restrict d_muhstore, const __global float * restrict d_eta_anisostore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __glo
 bal float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c14store, const __global float * restrict d_c15store, const __global float * restrict d_c16store, const __global float * restrict d_c22store, const __global float * restrict d_c23store, const __global float * restrict d_c24store, const __global float * restrict d_c25store, const __global float * restrict d_c26store, const __global float * restrict d_c33store, const __global float * restrict d_c34store, const __global float * restrict d_c35store, const __global float * restrict d_c36store, const __global float * restrict d_c44store, const __global float * restrict d_c45store, const __global float * restrict d_c46store, const __global float * restrict d_c55store, const __global float * restrict d_c56store, const __global float * restrict d_c66store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * re
 strict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_CRUST_MANTLE_STRAIN_ONLY, __read_only image2d_t d_b_displ_cm_tex, __read_only image2d_t d_b_accel_cm_tex, __read_only image2d_t d_hprime_xx_tex){\n\
+__kernel  void crust_mantle_impl_kernel_adjoint(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_ispec_is_tiso, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllwgll_yz, const __global float * restrict d_kappavst
 ore, const __global float * restrict d_muvstore, const __global float * restrict d_kappahstore, const __global float * restrict d_muhstore, const __global float * restrict d_eta_anisostore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * 
 restrict d_c13store, const __global float * restrict d_c14store, const __global float * restrict d_c15store, const __global float * restrict d_c16store, const __global float * restrict d_c22store, const __global float * restrict d_c23store, const __global float * restrict d_c24store, const __global float * restrict d_c25store, const __global float * restrict d_c26store, const __global float * restrict d_c33store, const __global float * restrict d_c34store, const __global float * restrict d_c35store, const __global float * restrict d_c36store, const __global float * restrict d_c44store, const __global float * restrict d_c45store, const __global float * restrict d_c46store, const __global float * restrict d_c55store, const __global float * restrict d_c56store, const __global float * restrict d_c66store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus
 _gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_CRUST_MANTLE_STRAIN_ONLY, __read_only image2d_t d_b_displ_cm_tex, __read_only image2d_t d_b_accel_cm_tex, __read_only image2d_t d_hprime_xx_tex){\n\
 #ifdef USE_TEXTURES_FIELDS\n\
   const sampler_t sampler_d_b_displ_cm_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
   const sampler_t sampler_d_b_accel_cm_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
diff --git a/src/gpu/kernels.gen/crust_mantle_impl_kernel_forward_cl.c b/src/gpu/kernels.gen/crust_mantle_impl_kernel_forward_cl.c
index 003a43f..104bff6 100644
--- a/src/gpu/kernels.gen/crust_mantle_impl_kernel_forward_cl.c
+++ b/src/gpu/kernels.gen/crust_mantle_impl_kernel_forward_cl.c
@@ -413,7 +413,7 @@ void compute_element_cm_tiso(const int offset, const __global float * d_kappavst
   *(sigma_xz) = (c15) * (duxdxl) + (c56) * (duxdyl_plus_duydxl) + (c25) * (duydyl) + (c55) * (duzdxl_plus_duxdzl) + (c45) * (duzdyl_plus_duydzl) + (c35) * (duzdzl);\n\
   *(sigma_yz) = (c14) * (duxdxl) + (c46) * (duxdyl_plus_duydxl) + (c24) * (duydyl) + (c45) * (duzdxl_plus_duxdzl) + (c44) * (duzdyl_plus_duydzl) + (c34) * (duzdzl);\n\
 }\n\
-__kernel __attribute__((reqd_work_group_size(NGLL3_PADDED,1,1)))  void crust_mantle_impl_kernel_forward(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_ispec_is_tiso, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d
 _wgllwgll_yz, const __global float * restrict d_kappavstore, const __global float * restrict d_muvstore, const __global float * restrict d_kappahstore, const __global float * restrict d_muhstore, const __global float * restrict d_eta_anisostore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __glo
 bal float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c14store, const __global float * restrict d_c15store, const __global float * restrict d_c16store, const __global float * restrict d_c22store, const __global float * restrict d_c23store, const __global float * restrict d_c24store, const __global float * restrict d_c25store, const __global float * restrict d_c26store, const __global float * restrict d_c33store, const __global float * restrict d_c34store, const __global float * restrict d_c35store, const __global float * restrict d_c36store, const __global float * restrict d_c44store, const __global float * restrict d_c45store, const __global float * restrict d_c46store, const __global float * restrict d_c55store, const __global float * restrict d_c56store, const __global float * restrict d_c66store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * re
 strict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_CRUST_MANTLE_STRAIN_ONLY, __read_only image2d_t d_displ_cm_tex, __read_only image2d_t d_accel_cm_tex, __read_only image2d_t d_hprime_xx_tex){\n\
+__kernel  void crust_mantle_impl_kernel_forward(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_ispec_is_tiso, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllwgll_yz, const __global float * restrict d_kappavst
 ore, const __global float * restrict d_muvstore, const __global float * restrict d_kappahstore, const __global float * restrict d_muhstore, const __global float * restrict d_eta_anisostore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * 
 restrict d_c13store, const __global float * restrict d_c14store, const __global float * restrict d_c15store, const __global float * restrict d_c16store, const __global float * restrict d_c22store, const __global float * restrict d_c23store, const __global float * restrict d_c24store, const __global float * restrict d_c25store, const __global float * restrict d_c26store, const __global float * restrict d_c33store, const __global float * restrict d_c34store, const __global float * restrict d_c35store, const __global float * restrict d_c36store, const __global float * restrict d_c44store, const __global float * restrict d_c45store, const __global float * restrict d_c46store, const __global float * restrict d_c55store, const __global float * restrict d_c56store, const __global float * restrict d_c66store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus
 _gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_CRUST_MANTLE_STRAIN_ONLY, __read_only image2d_t d_displ_cm_tex, __read_only image2d_t d_accel_cm_tex, __read_only image2d_t d_hprime_xx_tex){\n\
 #ifdef USE_TEXTURES_FIELDS\n\
   const sampler_t sampler_d_displ_cm_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
   const sampler_t sampler_d_accel_cm_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
diff --git a/src/gpu/kernels.gen/inner_core_impl_kernel_adjoint_cl.c b/src/gpu/kernels.gen/inner_core_impl_kernel_adjoint_cl.c
index 13f2aa4..c0788ff 100644
--- a/src/gpu/kernels.gen/inner_core_impl_kernel_adjoint_cl.c
+++ b/src/gpu/kernels.gen/inner_core_impl_kernel_adjoint_cl.c
@@ -196,7 +196,7 @@ void compute_element_ic_att_memory(const int tx, const int working_element, cons
     R_yz[offset - 0] = (alphaval_loc) * (R_yz[offset - 0]) + (betaval_loc) * (sn) + (gammaval_loc) * (snp1);\n\
   }\n\
 }\n\
-__kernel __attribute__((reqd_work_group_size(NGLL3_PADDED,1,1)))  void inner_core_impl_kernel_adjoint(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_idoubling, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllw
 gll_yz, const __global float * restrict d_kappavstore, const __global float * restrict d_muvstore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c33store, const __global float * res
 trict d_c44store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_INNER_CORE_STRAIN_ONLY, const int NSPEC_INNER_CORE, __read_only image2d_t d_b_displ_ic_tex, __read_only image2d_t d_b_accel_ic_tex){\n\
+__kernel  void inner_core_impl_kernel_adjoint(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_idoubling, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllwgll_yz, const __global float * restrict d_kappavstore, c
 onst __global float * restrict d_muvstore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c33store, const __global float * restrict d_c44store, const int GRAVITY, const __global floa
 t * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_INNER_CORE_STRAIN_ONLY, const int NSPEC_INNER_CORE, __read_only image2d_t d_b_displ_ic_tex, __read_only image2d_t d_b_accel_ic_tex){\n\
 #ifdef USE_TEXTURES_FIELDS\n\
   const sampler_t sampler_d_b_displ_ic_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
   const sampler_t sampler_d_b_accel_ic_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
diff --git a/src/gpu/kernels.gen/inner_core_impl_kernel_forward_cl.c b/src/gpu/kernels.gen/inner_core_impl_kernel_forward_cl.c
index 76b7e43..80df582 100644
--- a/src/gpu/kernels.gen/inner_core_impl_kernel_forward_cl.c
+++ b/src/gpu/kernels.gen/inner_core_impl_kernel_forward_cl.c
@@ -196,7 +196,7 @@ void compute_element_ic_att_memory(const int tx, const int working_element, cons
     R_yz[offset - 0] = (alphaval_loc) * (R_yz[offset - 0]) + (betaval_loc) * (sn) + (gammaval_loc) * (snp1);\n\
   }\n\
 }\n\
-__kernel __attribute__((reqd_work_group_size(NGLL3_PADDED,1,1)))  void inner_core_impl_kernel_forward(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_idoubling, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllw
 gll_yz, const __global float * restrict d_kappavstore, const __global float * restrict d_muvstore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c33store, const __global float * res
 trict d_c44store, const int GRAVITY, const __global float * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_INNER_CORE_STRAIN_ONLY, const int NSPEC_INNER_CORE, __read_only image2d_t d_displ_ic_tex, __read_only image2d_t d_accel_ic_tex){\n\
+__kernel  void inner_core_impl_kernel_forward(const int nb_blocks_to_compute, const __global int * d_ibool, const __global int * d_idoubling, const __global int * d_phase_ispec_inner, const int num_phase_ispec, const int d_iphase, const float deltat, const int use_mesh_coloring_gpu, const __global float * restrict d_displ, __global float * d_accel, const __global float * restrict d_xix, const __global float * restrict d_xiy, const __global float * restrict d_xiz, const __global float * restrict d_etax, const __global float * restrict d_etay, const __global float * restrict d_etaz, const __global float * restrict d_gammax, const __global float * restrict d_gammay, const __global float * restrict d_gammaz, const __global float * restrict d_hprime_xx, const __global float * restrict d_hprimewgll_xx, const __global float * restrict d_wgllwgll_xy, const __global float * restrict d_wgllwgll_xz, const __global float * restrict d_wgllwgll_yz, const __global float * restrict d_kappavstore, c
 onst __global float * restrict d_muvstore, const int COMPUTE_AND_STORE_STRAIN, __global float * epsilondev_xx, __global float * epsilondev_yy, __global float * epsilondev_xy, __global float * epsilondev_xz, __global float * epsilondev_yz, __global float * epsilon_trace_over_3, const int ATTENUATION, const int PARTIAL_PHYS_DISPERSION_ONLY, const int USE_3D_ATTENUATION_ARRAYS, const __global float * restrict one_minus_sum_beta, const __global float * restrict factor_common, __global float * R_xx, __global float * R_yy, __global float * R_xy, __global float * R_xz, __global float * R_yz, const __global float * restrict alphaval, const __global float * restrict betaval, const __global float * restrict gammaval, const int ANISOTROPY, const __global float * restrict d_c11store, const __global float * restrict d_c12store, const __global float * restrict d_c13store, const __global float * restrict d_c33store, const __global float * restrict d_c44store, const int GRAVITY, const __global floa
 t * restrict d_xstore, const __global float * restrict d_ystore, const __global float * restrict d_zstore, const __global float * restrict d_minus_gravity_table, const __global float * restrict d_minus_deriv_gravity_table, const __global float * restrict d_density_table, const __global float * restrict wgll_cube, const int NSPEC_INNER_CORE_STRAIN_ONLY, const int NSPEC_INNER_CORE, __read_only image2d_t d_displ_ic_tex, __read_only image2d_t d_accel_ic_tex){\n\
 #ifdef USE_TEXTURES_FIELDS\n\
   const sampler_t sampler_d_displ_ic_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
   const sampler_t sampler_d_accel_ic_tex = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n\
diff --git a/src/gpu/rules.mk b/src/gpu/rules.mk
index 7e92a88..034336e 100644
--- a/src/gpu/rules.mk
+++ b/src/gpu/rules.mk
@@ -122,7 +122,7 @@ BUILD_VERSION_TXT += support
 CUDA_DEBUG := --cudart=shared
 
 boast_kernels :
-	cd $S/boast ;\
+	cd boast ;\
 	mkdir ../$(BOAST_DIR_NAME) -p ;\
 	ruby kernels.rb --output-dir ../$(BOAST_DIR_NAME)
 



More information about the CIG-COMMITS mailing list