[cig-commits] r19664 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: UTILS src/cuda src/specfem3D
danielpeter at geodynamics.org
danielpeter at geodynamics.org
Wed Feb 22 22:14:46 PST 2012
Author: danielpeter
Date: 2012-02-22 22:14:45 -0800 (Wed, 22 Feb 2012)
New Revision: 19664
Added:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/UTILS/create_specfem3D_gpu_cuda_method_stubs.pl
Modified:
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_kernels.f90
Log:
updates kernel routines
Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/UTILS/create_specfem3D_gpu_cuda_method_stubs.pl
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/UTILS/create_specfem3D_gpu_cuda_method_stubs.pl (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/UTILS/create_specfem3D_gpu_cuda_method_stubs.pl 2012-02-23 06:14:45 UTC (rev 19664)
@@ -0,0 +1,141 @@
+#!/usr/bin/perl
+
+#
+# Script to extract the function declarations in cuda files
+#
+#
+# usage: ./ceate_specfem3D_gpu_cuda_method_stubs.pl
+# run in directory root SPECFEM3D/
+#
+
+$outfile = "src/cuda/specfem3D_gpu_cuda_method_stubs.c";
+
+
+open(IOUT,"> _____temp_tutu_____");
+
+$header = <<END;
+/*
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 0
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and University of Pau / CNRS / INRIA
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+! April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "config.h"
+
+typedef float realw;
+
+END
+
+
+$warning = <<END;
+ fprintf(stderr,"ERROR: GPU_MODE enabled without GPU/CUDA Support. To enable GPU support, reconfigure with --with-cuda flag.\\n");
+ exit(1);
+END
+
+print IOUT "$header \n";
+
+$success = 0;
+
+ at objects = `ls src/cuda/*.cu`;
+
+foreach $name (@objects) {
+ chop $name;
+ print "extracting word in file $name ...\n";
+
+ print IOUT "\n//\n// $name\n//\n\n";
+
+ # change tabs to white spaces
+ system("expand -2 < $name > _____temp_tutu01_____");
+ open(IIN,"<_____temp_tutu01_____");
+
+
+ # open the source file
+ $success = 1;
+ $do_extract = 0;
+ while($line = <IIN>) {
+ chop $line;
+
+ # suppress trailing white spaces and carriage return
+ $line =~ s/\s*$//;
+
+ # change the version number and copyright information
+ # $line =~ s#\(c\) California Institute of Technology and University of Pau, October 2007#\(c\) California Institute of Technology and University of Pau, November 2007#og;
+ # $line =~ s#rmass_sigma#rmass_time_integral_of_sigma#og;
+
+ if($line =~ /extern "C"/){
+ # new function declaration starts
+ #print "$line\n";
+ if( $line =~/FC_FUNC/ ){
+ # function declaration on same line as extern, ask for line skip
+ print "problem: please add a line break after extern 'C' here:";
+ print "$line\n";
+ $success = 0;
+ close(IIN);
+ exit;
+ }
+ $do_extract = 1;
+ next;
+ }
+
+ # extract section
+ if($do_extract == 1 ){
+ # function declaration
+ if($line =~ /{/){
+ # function declaration ends
+ if( $line =~ /PREPARE_CUDA_DEVICE/ ){
+ # adds warning
+ print IOUT "$line \n$warning\} \n\n";
+ }else{
+ print IOUT "$line\} \n\n";
+ }
+ $do_extract = 0;
+ }else{
+ # write line to the output file
+ print IOUT "$line\n";
+ }
+ next;
+ }
+ }
+ close(IIN);
+
+ if( $success == 0 ){ exit; }
+}
+
+close(IOUT);
+system("rm -f _____temp_tutu01_____");
+
+# creates new stubs file if successful
+if( $success == 1 ){
+ print "\n\nsuccessfully extracted declarations \n\n";
+ system("cp -p $outfile $outfile.bak");
+ system("cp -p _____temp_tutu_____ $outfile");
+ print "created new: $outfile \n";
+}
+system("rm -f _____temp_tutu_____");
+
+
Property changes on: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/UTILS/create_specfem3D_gpu_cuda_method_stubs.pl
___________________________________________________________________
Name: svn:executable
+ *
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_kernels_cuda.cu 2012-02-23 06:14:45 UTC (rev 19664)
@@ -45,8 +45,7 @@
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_cudakernel(int* ispec_is_elastic,
- int* ibool,
+__global__ void compute_kernels_cudakernel(int* ibool,
realw* accel,
realw* b_displ,
realw* epsilondev_xx,
@@ -54,98 +53,224 @@
realw* epsilondev_xy,
realw* epsilondev_xz,
realw* epsilondev_yz,
+ realw* epsilon_trace_over_3,
realw* b_epsilondev_xx,
realw* b_epsilondev_yy,
realw* b_epsilondev_xy,
realw* b_epsilondev_xz,
realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
realw* rho_kl,
- realw deltat,
realw* mu_kl,
realw* kappa_kl,
- realw* epsilon_trace_over_3,
- realw* b_epsilon_trace_over_3,
- int NSPEC_AB) {
+ int NSPEC,
+ realw deltat,
+ int ANISOTROPIC_KL) {
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
// handles case when there is 1 extra block (due to rectangular grid)
- if(ispec < NSPEC_AB) {
+ if(ispec < NSPEC) {
- // elastic elements only
- if( ispec_is_elastic[ispec] ) {
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int iglob = ibool[ijk_ispec] - 1 ;
- int ijk = threadIdx.x;
- int ijk_ispec = ijk + NGLL3*ispec;
- int iglob = ibool[ijk_ispec] - 1 ;
+ // isotropic kernels:
+ // density kernel
+ rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
+ accel[3*iglob+1]*b_displ[3*iglob+1]+
+ accel[3*iglob+2]*b_displ[3*iglob+2]);
- // isotropic kernels:
- // density kernel
- rho_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_displ[3*iglob]+
- accel[3*iglob+1]*b_displ[3*iglob+1]+
- accel[3*iglob+2]*b_displ[3*iglob+2]);
-
-
+ // isotropic kernel contributions
+ if( ! ANISOTROPIC_KL ){
// shear modulus kernel
mu_kl[ijk_ispec] += deltat * (epsilondev_xx[ijk_ispec]*b_epsilondev_xx[ijk_ispec]+
epsilondev_yy[ijk_ispec]*b_epsilondev_yy[ijk_ispec]+
(epsilondev_xx[ijk_ispec]+epsilondev_yy[ijk_ispec])*
- (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
- 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
- epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
- epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
+ (b_epsilondev_xx[ijk_ispec]+b_epsilondev_yy[ijk_ispec])+
+ 2*(epsilondev_xy[ijk_ispec]*b_epsilondev_xy[ijk_ispec]+
+ epsilondev_xz[ijk_ispec]*b_epsilondev_xz[ijk_ispec]+
+ epsilondev_yz[ijk_ispec]*b_epsilondev_yz[ijk_ispec]));
// bulk modulus kernel
kappa_kl[ijk_ispec] += deltat*(9*epsilon_trace_over_3[ijk_ispec]*
- b_epsilon_trace_over_3[ijk_ispec]);
+ b_epsilon_trace_over_3[ijk_ispec]);
+ }
+ }
+}
+__device__ void compute_strain_product(realw* prod,
+ realw eps_trace_over_3,
+ realw* epsdev,
+ realw b_eps_trace_over_3,
+ realw* b_epsdev){
+
+ realw eps[6],b_eps[6];
+
+ // Building of the local matrix of the strain tensor
+ // for the adjoint field and the regular backward field
+
+ // note: indices are -1 compared to fortran routine because of fortran -> C array indexing
+
+ // eps11 et eps22
+ eps[0] = epsdev[0] + eps_trace_over_3;
+ eps[1] = epsdev[1] + eps_trace_over_3;
+ //eps33
+ eps[2] = - (eps[0] + eps[1]) + 3.0f*eps_trace_over_3;
+ //eps23
+ eps[3] = epsdev[4];
+ //eps13
+ eps[4] = epsdev[3];
+ //eps12
+ eps[5] = epsdev[2];
+
+ b_eps[0] = b_epsdev[0] + b_eps_trace_over_3;
+ b_eps[1] = b_epsdev[1] + b_eps_trace_over_3;
+ b_eps[2] = - (b_eps[0] + b_eps[1]) + 3.0f*b_eps_trace_over_3;
+ b_eps[3] = b_epsdev[4];
+ b_eps[4] = b_epsdev[3];
+ b_eps[5] = b_epsdev[2];
+
+ // Computing the 21 strain products without assuming eps(i)*b_eps(j) = eps(j)*b_eps(i)
+ int p = 0;
+ for(int i=0; i<6; i++){
+ for(int j=i; j<6; j++){
+ prod[p]=eps[i]*b_eps[j];
+
+ if(j>i){
+ prod[p]=prod[p]+eps[j]*b_eps[i];
+ if(j>2 && i<3){ prod[p] = prod[p]*2.0f;}
+ }
+
+ if(i>2){ prod[p]=prod[p]*4.0f;}
+
+ p=p+1;
}
}
}
+__global__ void compute_kernels_ani_cudakernel(int* ibool,
+ realw* epsilondev_xx,
+ realw* epsilondev_yy,
+ realw* epsilondev_xy,
+ realw* epsilondev_xz,
+ realw* epsilondev_yz,
+ realw* epsilon_trace_over_3,
+ realw* b_epsilondev_xx,
+ realw* b_epsilondev_yy,
+ realw* b_epsilondev_xy,
+ realw* b_epsilondev_xz,
+ realw* b_epsilondev_yz,
+ realw* b_epsilon_trace_over_3,
+ realw* cijkl_kl,
+ int NSPEC,
+ realw deltat) {
+
+ int ispec = blockIdx.x + blockIdx.y*gridDim.x;
+
+ // handles case when there is 1 extra block (due to rectangular grid)
+ if(ispec < NSPEC) {
+
+ int ijk = threadIdx.x;
+ int ijk_ispec = ijk + NGLL3*ispec;
+
+ // fully anisotropic kernel contributions
+
+ realw prod[21];
+ realw epsdev[5];
+ realw b_epsdev[5];
+
+ epsdev[0] = epsilondev_xx[ijk_ispec];
+ epsdev[1] = epsilondev_yy[ijk_ispec];
+ epsdev[2] = epsilondev_xy[ijk_ispec];
+ epsdev[3] = epsilondev_xz[ijk_ispec];
+ epsdev[4] = epsilondev_yz[ijk_ispec];
+
+ b_epsdev[0] = b_epsilondev_xx[ijk_ispec];
+ b_epsdev[1] = b_epsilondev_yy[ijk_ispec];
+ b_epsdev[2] = b_epsilondev_xy[ijk_ispec];
+ b_epsdev[3] = b_epsilondev_xz[ijk_ispec];
+ b_epsdev[4] = b_epsilondev_yz[ijk_ispec];
+
+ // fully anisotropic kernel contributions
+ compute_strain_product(prod,epsilon_trace_over_3[ijk_ispec],epsdev,
+ b_epsilon_trace_over_3[ijk_ispec],b_epsdev);
+
+ for(int i=0;i<21;i++){
+ cijkl_kl[i + 21*ijk_ispec] += deltat * prod[i];
+ }
+ }
+}
+
+
/* ----------------------------------------------------------------------------------------------- */
+
+// crust_mantle
+
extern "C"
-void FC_FUNC_(compute_kernels_elastic_cuda,
- COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
- realw* deltat_f) {
-TRACE("compute_kernels_elastic_cuda");
+void FC_FUNC_(compute_kernels_cm_cuda,
+ COMPUTE_KERNELS_CM_CUDA)(long* Mesh_pointer,realw* deltat_f) {
+TRACE("compute_kernels_cm_cuda");
+
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
+ int blocksize = NGLL3;
realw deltat = *deltat_f;
- int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_x = mp->NSPEC_CRUST_MANTLE;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ispec_is_elastic,mp->d_ibool,
- mp->d_accel, mp->d_b_displ,
- mp->d_epsilondev_xx,
- mp->d_epsilondev_yy,
- mp->d_epsilondev_xy,
- mp->d_epsilondev_xz,
- mp->d_epsilondev_yz,
- mp->d_b_epsilondev_xx,
- mp->d_b_epsilondev_yy,
- mp->d_b_epsilondev_xy,
- mp->d_b_epsilondev_xz,
- mp->d_b_epsilondev_yz,
- mp->d_rho_kl,
+ compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
+ mp->d_accel_crust_mantle,
+ mp->d_b_displ_crust_mantle,
+ mp->d_epsilondev_xx_crust_mantle,
+ mp->d_epsilondev_yy_crust_mantle,
+ mp->d_epsilondev_xy_crust_mantle,
+ mp->d_epsilondev_xz_crust_mantle,
+ mp->d_epsilondev_yz_crust_mantle,
+ mp->d_eps_trace_over_3_crust_mantle,
+ mp->d_b_epsilondev_xx_crust_mantle,
+ mp->d_b_epsilondev_yy_crust_mantle,
+ mp->d_b_epsilondev_xy_crust_mantle,
+ mp->d_b_epsilondev_xz_crust_mantle,
+ mp->d_b_epsilondev_yz_crust_mantle,
+ mp->d_b_eps_trace_over_3_crust_mantle,
+ mp->d_rho_kl_crust_mantle,
+ mp->d_beta_kl_crust_mantle,
+ mp->d_alpha_kl_crust_mantle,
+ mp->NSPEC_CRUST_MANTLE,
deltat,
- mp->d_mu_kl,
- mp->d_kappa_kl,
- mp->d_epsilon_trace_over_3,
- mp->d_b_epsilon_trace_over_3,
- mp->NSPEC_AB);
+ mp->anisotropic_kl);
+ if(mp->anisotropic_kl){
+ compute_kernels_ani_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
+ mp->d_epsilondev_xx_crust_mantle,
+ mp->d_epsilondev_yy_crust_mantle,
+ mp->d_epsilondev_xy_crust_mantle,
+ mp->d_epsilondev_xz_crust_mantle,
+ mp->d_epsilondev_yz_crust_mantle,
+ mp->d_eps_trace_over_3_crust_mantle,
+ mp->d_b_epsilondev_xx_crust_mantle,
+ mp->d_b_epsilondev_yy_crust_mantle,
+ mp->d_b_epsilondev_xy_crust_mantle,
+ mp->d_b_epsilondev_xz_crust_mantle,
+ mp->d_b_epsilondev_yz_crust_mantle,
+ mp->d_b_eps_trace_over_3_crust_mantle,
+ mp->d_cijkl_kl_crust_mantle,
+ mp->NSPEC_CRUST_MANTLE,
+ deltat);
+
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_elastic_cuda");
#endif
@@ -154,94 +279,67 @@
/* ----------------------------------------------------------------------------------------------- */
-// NOISE SIMULATIONS
-/* ----------------------------------------------------------------------------------------------- */
+// inner_core
-
-__global__ void compute_kernels_strength_noise_cuda_kernel(realw* displ,
- int* free_surface_ispec,
- int* free_surface_ijk,
- int* ibool,
- realw* noise_surface_movie,
- realw* normal_x_noise,
- realw* normal_y_noise,
- realw* normal_z_noise,
- realw* Sigma_kl,
- realw deltat,
- int num_free_surface_faces) {
- int iface = blockIdx.x + blockIdx.y*gridDim.x;
-
- if(iface < num_free_surface_faces) {
-
- int ispec = free_surface_ispec[iface]-1;
- int igll = threadIdx.x;
- int ipoin = igll + NGLL2*iface;
- int i = free_surface_ijk[INDEX3(NDIM,NGLL2,0,igll,iface)] - 1 ;
- int j = free_surface_ijk[INDEX3(NDIM,NGLL2,1,igll,iface)] - 1;
- int k = free_surface_ijk[INDEX3(NDIM,NGLL2,2,igll,iface)] - 1;
-
- int iglob = ibool[INDEX4(5,5,5,i,j,k,ispec)] - 1 ;
-
- realw eta = ( noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x_noise[ipoin]+
- noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
- noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z_noise[ipoin]);
-
- Sigma_kl[INDEX4(5,5,5,i,j,k,ispec)] += deltat*eta*(normal_x_noise[ipoin]*displ[3*iglob]+
- normal_y_noise[ipoin]*displ[1+3*iglob]+
- normal_z_noise[ipoin]*displ[2+3*iglob]);
- }
-
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
extern "C"
-void FC_FUNC_(compute_kernels_strgth_noise_cu,
- COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
- realw* h_noise_surface_movie,
- realw* deltat) {
+void FC_FUNC_(compute_kernels_ic_cuda,
+ COMPUTE_KERNELS_IC_CUDA)(long* Mesh_pointer,realw* deltat_f) {
-TRACE("compute_kernels_strgth_noise_cu");
+ TRACE("compute_kernels_cm_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
- 3*NGLL2*(mp->num_free_surface_faces)*sizeof(realw),cudaMemcpyHostToDevice);
+ int blocksize = NGLL3;
+ realw deltat = *deltat_f;
-
- int num_blocks_x = mp->num_free_surface_faces;
+ int num_blocks_x = mp->NSPEC_INNER_CORE;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
num_blocks_y = num_blocks_y*2;
}
-
dim3 grid(num_blocks_x,num_blocks_y);
- dim3 threads(NGLL2,1,1);
+ dim3 threads(blocksize,1,1);
- compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ,
- mp->d_free_surface_ispec,
- mp->d_free_surface_ijk,
- mp->d_ibool,
- mp->d_noise_surface_movie,
- mp->d_normal_x_noise,
- mp->d_normal_y_noise,
- mp->d_normal_z_noise,
- mp->d_Sigma_kl,*deltat,
- mp->num_free_surface_faces);
+ // only isotropic kernels in inner core so far implemented
+ int aniso_flag = 0;
+ compute_kernels_cudakernel<<<grid,threads>>>(mp->d_ibool_inner_core,
+ mp->d_accel_inner_core,
+ mp->d_b_displ_inner_core,
+ mp->d_epsilondev_xx_inner_core,
+ mp->d_epsilondev_yy_inner_core,
+ mp->d_epsilondev_xy_inner_core,
+ mp->d_epsilondev_xz_inner_core,
+ mp->d_epsilondev_yz_inner_core,
+ mp->d_eps_trace_over_3_inner_core,
+ mp->d_b_epsilondev_xx_inner_core,
+ mp->d_b_epsilondev_yy_inner_core,
+ mp->d_b_epsilondev_xy_inner_core,
+ mp->d_b_epsilondev_xz_inner_core,
+ mp->d_b_epsilondev_yz_inner_core,
+ mp->d_b_eps_trace_over_3_inner_core,
+ mp->d_rho_kl_inner_core,
+ mp->d_beta_kl_inner_core,
+ mp->d_alpha_kl_inner_core,
+ mp->NSPEC_INNER_CORE,
+ deltat,
+ aniso_flag);
+
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
+ exit_on_cuda_error("compute_kernels_elastic_cuda");
#endif
}
-
/* ----------------------------------------------------------------------------------------------- */
// ACOUSTIC SIMULATIONS
+// for outer core region
+
/* ----------------------------------------------------------------------------------------------- */
@@ -260,14 +358,11 @@
realw* d_etaz,
realw* d_gammax,
realw* d_gammay,
- realw* d_gammaz,
- realw rhol,
- int gravity) {
+ realw* d_gammaz) {
realw temp1l,temp2l,temp3l;
realw hp1,hp2,hp3;
realw xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl;
- realw rho_invl;
int l,offset,offset1,offset2,offset3;
//const int NGLLX = 5;
@@ -299,7 +394,6 @@
hp3 = hprime_zz[l*NGLLX+K];
offset3 = l*NGLL2+J*NGLLX+I;
temp3l += scalar_field[offset3]*hp3;
-
}
offset = ispec*NGLL3_ALIGN + ijk;
@@ -314,24 +408,20 @@
gammayl = d_gammay[offset];
gammazl = d_gammaz[offset];
- if( gravity ){
- // daniel: TODO - check gravity case here
- rho_invl = 1.0f / rhol;
- }else{
- rho_invl = 1.0f / rhol;
- }
+ // note: global version uses a different potential definition, no need to divide by rho
+ //rho_invl = 1.0f / rhol;
+
// derivatives of acoustic scalar potential field on GLL points
- vector_field_element[0] = (temp1l*xixl + temp2l*etaxl + temp3l*gammaxl) * rho_invl;
- vector_field_element[1] = (temp1l*xiyl + temp2l*etayl + temp3l*gammayl) * rho_invl;
- vector_field_element[2] = (temp1l*xizl + temp2l*etazl + temp3l*gammazl) * rho_invl;
+ vector_field_element[0] = temp1l*xixl + temp2l*etaxl + temp3l*gammaxl;
+ vector_field_element[1] = temp1l*xiyl + temp2l*etayl + temp3l*gammayl;
+ vector_field_element[2] = temp1l*xizl + temp2l*etazl + temp3l*gammazl;
}
/* ----------------------------------------------------------------------------------------------- */
-__global__ void compute_kernels_acoustic_kernel(int* ispec_is_acoustic,
- int* ibool,
+__global__ void compute_kernels_acoustic_kernel(int* ibool,
realw* rhostore,
realw* kappastore,
realw* hprime_xx,
@@ -352,62 +442,59 @@
realw* rho_ac_kl,
realw* kappa_ac_kl,
realw deltat,
- int NSPEC_AB,
- int gravity) {
+ int NSPEC) {
int ispec = blockIdx.x + blockIdx.y*gridDim.x;
// handles case when there is 1 extra block (due to rectangular grid)
- if( ispec < NSPEC_AB ){
+ if( ispec < NSPEC ){
- // acoustic elements only
- if( ispec_is_acoustic[ispec] ) {
+ int ijk = threadIdx.x;
- int ijk = threadIdx.x;
+ // local and global indices
+ int ijk_ispec = ijk + NGLL3*ispec;
+ int ijk_ispec_padded = ijk + NGLL3_PADDED*ispec;
+ int iglob = ibool[ijk_ispec] - 1;
- // local and global indices
- int ijk_ispec = ijk + NGLL3*ispec;
- int ijk_ispec_padded = ijk + NGLL3_PADDED*ispec;
- int iglob = ibool[ijk_ispec] - 1;
+ realw accel_elm[3];
+ realw b_displ_elm[3];
+ realw rhol,kappal;
+ realw div_displ,b_div_displ;
- realw accel_elm[3];
- realw b_displ_elm[3];
- realw rhol,kappal;
+ // shared memory between all threads within this block
+ __shared__ realw scalar_field_displ[NGLL3];
+ __shared__ realw scalar_field_accel[NGLL3];
- // shared memory between all threads within this block
- __shared__ realw scalar_field_displ[NGLL3];
- __shared__ realw scalar_field_accel[NGLL3];
+ // copy field values
+ scalar_field_displ[ijk] = b_potential_acoustic[iglob];
+ scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
+ __syncthreads();
- // copy field values
- scalar_field_displ[ijk] = b_potential_acoustic[iglob];
- scalar_field_accel[ijk] = potential_dot_dot_acoustic[iglob];
- __syncthreads();
+ // displacement vector from backward field
+ compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz);
- // gets material parameter
- rhol = rhostore[ijk_ispec_padded];
+ // acceleration vector
+ compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
+ hprime_xx,hprime_yy,hprime_zz,
+ d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz);
- // displacement vector from backward field
- compute_gradient_kernel(ijk,ispec,scalar_field_displ,b_displ_elm,
- hprime_xx,hprime_yy,hprime_zz,
- d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
- rhol,gravity);
+ // gets material parameter
+ rhol = rhostore[ijk_ispec_padded];
- // acceleration vector
- compute_gradient_kernel(ijk,ispec,scalar_field_accel,accel_elm,
- hprime_xx,hprime_yy,hprime_zz,
- d_xix,d_xiy,d_xiz,d_etax,d_etay,d_etaz,d_gammax,d_gammay,d_gammaz,
- rhol,gravity);
+ // density kernel
+ rho_ac_kl[ijk_ispec] += deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
+ accel_elm[1]*b_displ_elm[1] +
+ accel_elm[2]*b_displ_elm[2]);
- // density kernel
- rho_ac_kl[ijk_ispec] -= deltat * rhol * (accel_elm[0]*b_displ_elm[0] +
- accel_elm[1]*b_displ_elm[1] +
- accel_elm[2]*b_displ_elm[2]);
+ // bulk modulus kernel
+ kappal = rhol/ kappastore[ijk_ispec_padded];
- // bulk modulus kernel
- kappal = kappastore[ijk_ispec];
- kappa_ac_kl[ijk_ispec] -= deltat / kappal * potential_dot_dot_acoustic[iglob]
- * b_potential_dot_dot_acoustic[iglob];
- }
+ div_displ = kappal * potential_dot_dot_acoustic[iglob];
+ b_div_displ = kappal * b_potential_dot_dot_acoustic[iglob];
+
+ kappa_ac_kl[ijk_ispec] += deltat * div_displ * b_div_displ;
}
}
@@ -415,19 +502,17 @@
extern "C"
-void FC_FUNC_(compute_kernels_acoustic_cuda,
- COMPUTE_KERNELS_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- realw* deltat_f) {
+void FC_FUNC_(compute_kernels_oc_cuda,
+ COMPUTE_KERNELS_OC_CUDA)(long* Mesh_pointer,realw* deltat_f) {
-TRACE("compute_kernels_acoustic_cuda");
+TRACE("compute_kernels_oc_cuda");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
realw deltat = *deltat_f;
- int num_blocks_x = mp->NSPEC_AB;
+ int num_blocks_x = mp->NSPEC_OUTER_CORE;
int num_blocks_y = 1;
while(num_blocks_x > 65535) {
num_blocks_x = (int) ceil(num_blocks_x*0.5f);
@@ -437,38 +522,123 @@
dim3 grid(num_blocks_x,num_blocks_y);
dim3 threads(blocksize,1,1);
- compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ispec_is_acoustic,
- mp->d_ibool,
- mp->d_rhostore,
- mp->d_kappastore,
+ compute_kernels_acoustic_kernel<<<grid,threads>>>(mp->d_ibool_outer_core,
+ mp->d_rhostore_outer_core,
+ mp->d_kappavstore_outer_core,
mp->d_hprime_xx,
mp->d_hprime_yy,
mp->d_hprime_zz,
- mp->d_xix,
- mp->d_xiy,
- mp->d_xiz,
- mp->d_etax,
- mp->d_etay,
- mp->d_etaz,
- mp->d_gammax,
- mp->d_gammay,
- mp->d_gammaz,
- mp->d_potential_dot_dot_acoustic,
- mp->d_b_potential_acoustic,
- mp->d_b_potential_dot_dot_acoustic,
- mp->d_rho_ac_kl,
- mp->d_kappa_ac_kl,
+ mp->d_xix_outer_core,
+ mp->d_xiy_outer_core,
+ mp->d_xiz_outer_core,
+ mp->d_etax_outer_core,
+ mp->d_etay_outer_core,
+ mp->d_etaz_outer_core,
+ mp->d_gammax_outer_core,
+ mp->d_gammay_outer_core,
+ mp->d_gammaz_outer_core,
+ mp->d_accel_outer_core,
+ mp->d_b_displ_outer_core,
+ mp->d_b_accel_outer_core,
+ mp->d_rho_kl_outer_core,
+ mp->d_alpha_kl_outer_core,
deltat,
- mp->NSPEC_AB,
- mp->gravity);
+ mp->NSPEC_OUTER_CORE);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("compute_kernels_acoustic_kernel");
+ exit_on_cuda_error("compute_kernels_oc_kernel");
#endif
}
/* ----------------------------------------------------------------------------------------------- */
+// NOISE SIMULATIONS
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+__global__ void compute_kernels_strength_noise_cuda_kernel(realw* displ,
+ int* ibelm_top,
+ int* ibool,
+ realw* noise_surface_movie,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* Sigma_kl,
+ realw deltat,
+ int nspec_top) {
+ int iface = blockIdx.x + blockIdx.y*gridDim.x;
+
+ if(iface < nspec_top) {
+
+ int ispec = ibelm_top[iface]-1;
+ int igll = threadIdx.x;
+ int ipoin = igll + NGLL2*iface;
+
+ int k = NGLLX-1;
+ int j = (igll/NGLLX);
+ int i = (igll-j*NGLLX);
+
+ int iglob = ibool[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] - 1 ;
+
+ realw eta = ( noise_surface_movie[INDEX3(NDIM,NGLL2,0,igll,iface)]*normal_x_noise[ipoin]+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,1,igll,iface)]*normal_y_noise[ipoin]+
+ noise_surface_movie[INDEX3(NDIM,NGLL2,2,igll,iface)]*normal_z_noise[ipoin]);
+
+ Sigma_kl[INDEX4(NGLLX,NGLLX,NGLLX,i,j,k,ispec)] += deltat*eta*
+ (normal_x_noise[ipoin]*displ[3*iglob]+
+ normal_y_noise[ipoin]*displ[1+3*iglob]+
+ normal_z_noise[ipoin]*displ[2+3*iglob]);
+ }
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(compute_kernels_strgth_noise_cu,
+ COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
+ realw* h_noise_surface_movie,
+ realw* deltat_f) {
+
+ TRACE("compute_kernels_strgth_noise_cu");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+
+ // copies surface buffer to GPU
+ cudaMemcpy(mp->d_noise_surface_movie,h_noise_surface_movie,
+ NDIM*NGLL2*(mp->nspec_top)*sizeof(realw),cudaMemcpyHostToDevice);
+
+ int num_blocks_x = mp->nspec_top;
+ realw deltat = *deltat_f;
+
+ int num_blocks_y = 1;
+ while(num_blocks_x > 65535) {
+ num_blocks_x = (int) ceil(num_blocks_x*0.5f);
+ num_blocks_y = num_blocks_y*2;
+ }
+
+ dim3 grid(num_blocks_x,num_blocks_y);
+ dim3 threads(NGLL2,1,1);
+
+ compute_kernels_strength_noise_cuda_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
+ mp->d_ibelm_top_crust_mantle,
+ mp->d_ibool_crust_mantle,
+ mp->d_noise_surface_movie,
+ mp->d_normal_x_noise,
+ mp->d_normal_y_noise,
+ mp->d_normal_z_noise,
+ mp->d_Sigma_kl,
+ deltat,
+ mp->nspec_top);
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("compute_kernels_strength_noise_cuda_kernel");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// preconditioner (approximate Hessian kernel)
/* ----------------------------------------------------------------------------------------------- */
@@ -490,8 +660,8 @@
int iglob = ibool[ijk_ispec] - 1 ;
// approximate hessian
- hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob]+
- accel[3*iglob+1]*b_accel[3*iglob+1]+
+ hess_kl[ijk_ispec] += deltat * (accel[3*iglob]*b_accel[3*iglob] +
+ accel[3*iglob+1]*b_accel[3*iglob+1] +
accel[3*iglob+2]*b_accel[3*iglob+2]);
}
}
@@ -506,6 +676,9 @@
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
+ // checks
+ if( ! mp->approximate_hess_kl ){exit_on_cuda_error("approximate_hess_kl flag not properly initialized");}
+
int blocksize = NGLL3; // NGLLX*NGLLY*NGLLZ
realw deltat = *deltat_f;
@@ -520,11 +693,11 @@
dim3 threads(blocksize,1,1);
compute_kernels_hess_cudakernel<<<grid,threads>>>(mp->d_ibool_crust_mantle,
- mp->d_accel_crust_mantle,
- mp->d_b_accel_crust_mantle,
- mp->d_hess_kl_crust_mantle,
- deltat,
- mp->NSPEC_CRUST_MANTLE);
+ mp->d_accel_crust_mantle,
+ mp->d_b_accel_crust_mantle,
+ mp->d_hess_kl_crust_mantle,
+ deltat,
+ mp->NSPEC_CRUST_MANTLE);
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("compute_kernels_hess_cuda");
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h 2012-02-23 06:14:45 UTC (rev 19664)
@@ -250,6 +250,10 @@
realw* d_b_eps_trace_over_3_crust_mantle;
// kernels
+ realw* d_rho_kl_crust_mantle;
+ realw* d_alpha_kl_crust_mantle;
+ realw* d_beta_kl_crust_mantle;
+ realw* d_cijkl_kl_crust_mantle;
realw* d_hess_kl_crust_mantle;
// inner / outer elements
@@ -291,6 +295,10 @@
// backward/reconstructed elastic wavefield
realw* d_b_displ_outer_core; realw* d_b_veloc_outer_core; realw* d_b_accel_outer_core;
+ // kernels
+ realw* d_rho_kl_outer_core;
+ realw* d_alpha_kl_outer_core;
+
// inner / outer elements
int* d_phase_ispec_inner_outer_core;
int num_phase_ispec_outer_core;
@@ -371,6 +379,11 @@
realw* d_eps_trace_over_3_inner_core;
realw* d_b_eps_trace_over_3_inner_core;
+ // kernels
+ realw* d_rho_kl_inner_core;
+ realw* d_alpha_kl_inner_core;
+ realw* d_beta_kl_inner_core;
+
// inner / outer elements
int* d_phase_ispec_inner_inner_core;
int num_phase_ispec_inner_core;
@@ -420,6 +433,8 @@
int rotation;
int anisotropic_inner_core;
int save_boundary_mesh;
+
+ int anisotropic_kl;
int approximate_hess_kl;
// ------------------------------------------------------------------ //
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu 2012-02-23 06:14:45 UTC (rev 19664)
@@ -211,6 +211,7 @@
int* ANISOTROPIC_INNER_CORE_f,
int* SAVE_BOUNDARY_MESH_f,
int* USE_MESH_COLORING_GPU_f,
+ int* ANISOTROPIC_KL_f,
int* APPROXIMATE_HESS_KL_f) {
TRACE("prepare_constants_device");
@@ -259,6 +260,8 @@
mp->anisotropic_3D_mantle = *ANISOTROPIC_3D_MANTLE_f;
mp->anisotropic_inner_core = *ANISOTROPIC_INNER_CORE_f;
mp->save_boundary_mesh = *SAVE_BOUNDARY_MESH_f;
+
+ mp->anisotropic_kl = *ANISOTROPIC_KL_f;
mp->approximate_hess_kl = *APPROXIMATE_HESS_KL_f;
// mpi process rank
@@ -267,7 +270,7 @@
// mesh coloring flag
#ifdef USE_MESH_COLORING_GPU
mp->use_mesh_coloring_gpu = 1;
- if( ! *USE_MESH_COLORING_GPU_f ) exit_on_error("error with USE_MESH_COLORING_GPU constant; please re-compile\n");
+ if( ! *USE_MESH_COLORING_GPU_f ){exit_on_error("error with USE_MESH_COLORING_GPU constant; please re-compile\n");}
#else
// mesh coloring
// note: this here passes the coloring as an option to the kernel routines
@@ -1228,6 +1231,101 @@
/* ----------------------------------------------------------------------------------------------- */
+// for NOISE simulations
+
+/* ----------------------------------------------------------------------------------------------- */
+
+
+extern "C"
+void FC_FUNC_(prepare_fields_noise_device,
+ PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
+ int* nspec_top,
+ int* ibelm_top_crust_mantle,
+ int* NSTEP,
+ realw* noise_sourcearray,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* jacobian2D_top_crust_mantle) {
+
+ TRACE("prepare_fields_noise_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+
+ // free surface
+ mp->nspec_top = *nspec_top;
+ if( mp->nspec_top > 0 ){
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_top_crust_mantle,
+ mp->nspec_top*sizeof(int)),7001);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_crust_mantle,ibelm_top_crust_mantle,
+ mp->nspec_top*sizeof(int),cudaMemcpyHostToDevice),7002);
+
+ // alloc storage for the surface buffer to be copied
+ print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
+ NDIM*NGLL2*(mp->nspec_top)*sizeof(realw)),7005);
+ }else{
+ // for global mesh: each crust/mantle slice should have at top a free surface
+ exit_on_cuda_error("prepare_fields_noise_device nspec_top not properly initialized");
+ }
+
+
+ // prepares noise source array
+ if( mp->noise_tomography == 1 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
+ NDIM*NGLL3*(*NSTEP)*sizeof(realw)),7101);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray,noise_sourcearray,
+ NDIM*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
+ }
+
+ // prepares noise directions
+ if( mp->noise_tomography > 1 ){
+ int nface_size = NGLL2*(mp->nspec_top);
+ // allocates memory on GPU
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
+ nface_size*sizeof(realw)),7301);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
+ nface_size*sizeof(realw)),7302);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
+ nface_size*sizeof(realw)),7303);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
+ nface_size*sizeof(realw)),7304);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
+
+ print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_jacobian2D_top_crust_mantle,
+ nface_size*sizeof(realw)),7305);
+ print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_crust_mantle, jacobian2D_top_crust_mantle,
+ nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
+ }
+
+ // prepares noise strength kernel
+ if( mp->noise_tomography == 3 ){
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
+ NGLL3*(mp->NSPEC_CRUST_MANTLE)*sizeof(realw)),7401);
+ // initializes kernel values to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_Sigma_kl,0,
+ NGLL3*mp->NSPEC_CRUST_MANTLE*sizeof(realw)),7403);
+
+ }
+
+#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
+ exit_on_cuda_error("prepare_fields_noise_device");
+#endif
+}
+
+
+/* ----------------------------------------------------------------------------------------------- */
+
// Earth regions
// CRUST / MANTLE
@@ -1488,18 +1586,37 @@
print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_crust_mantle,h_rmass,
sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
-
// kernels
if( mp->simulation_type == 3 ){
+
size = NGLL3*(mp->NSPEC_CRUST_MANTLE);
+ // density kernel
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl_crust_mantle),
+ size*sizeof(realw)),5204);
+ // initializes kernel values to zero
+ print_CUDA_error_if_any(cudaMemset(mp->d_rho_kl_crust_mantle,0,size*sizeof(realw)),5207);
+
+ if( ! mp->anisotropic_kl){
+ // isotropic kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alpha_kl_crust_mantle),
+ size*sizeof(realw)),5205);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_beta_kl_crust_mantle),
+ size*sizeof(realw)),5206);
+ print_CUDA_error_if_any(cudaMemset(mp->d_alpha_kl_crust_mantle,0,size*sizeof(realw)),5208);
+ print_CUDA_error_if_any(cudaMemset(mp->d_beta_kl_crust_mantle,0,size*sizeof(realw)),5209);
+ }else{
+ // anisotropic kernels
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_cijkl_kl_crust_mantle),
+ 21*size*sizeof(realw)),5206);
+ print_CUDA_error_if_any(cudaMemset(mp->d_cijkl_kl_crust_mantle,0,size*sizeof(realw)),5209);
+ }
+
// preconditioner
if( mp->approximate_hess_kl ){
print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_kl_crust_mantle),
size*sizeof(realw)),3030);
- // initializes with zeros
- print_CUDA_error_if_any(cudaMemset(mp->d_hess_kl_crust_mantle,0,
- size*sizeof(realw)),3031);
+ print_CUDA_error_if_any(cudaMemset(mp->d_hess_kl_crust_mantle,0,size*sizeof(realw)),3031);
}
}
@@ -1630,7 +1747,22 @@
print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_outer_core,h_rmass,
sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ // kernels
+ if( mp->simulation_type == 3 ){
+ size = NGLL3*(mp->NSPEC_OUTER_CORE);
+
+ // density kernel
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl_outer_core),
+ size*sizeof(realw)),5204);
+ print_CUDA_error_if_any(cudaMemset(mp->d_rho_kl_outer_core,0,size*sizeof(realw)),5207);
+
+ // isotropic kernel
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alpha_kl_outer_core),
+ size*sizeof(realw)),5205);
+ print_CUDA_error_if_any(cudaMemset(mp->d_alpha_kl_outer_core,0,size*sizeof(realw)),5208);
+ }
+
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
exit_on_cuda_error("prepare_outer_core_device");
#endif
@@ -1659,11 +1791,7 @@
int* num_phase_ispec,
int* phase_ispec_inner,
int* nspec_outer,
- int* nspec_inner
- //int* iboolleft_xi, int* iboolright_xi,
- //int* iboolleft_eta, int* iboolright_eta,
- //int* npoin2D_xi, int* npoin2D_eta
- ) {
+ int* nspec_inner) {
TRACE("prepare_inner_core_device");
@@ -1810,229 +1938,35 @@
print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_inner_core,h_rmass,
sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
+ // kernels
+ if( mp->simulation_type == 3 ){
- // mpi communication
-// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolright_xi_inner_core,npoin2D_xi*sizeof(int)),270);
-// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_xi_inner_core,npoin2D_xi*sizeof(int)),280);
-// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolright_eta_inner_core,npoin2D_eta*sizeof(int)),290);
-// print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_iboolleft_eta_inner_core,npoin2D_eta*sizeof(int)),300);
-//
-// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolright_xi_inner_core,iboolright_xi,
-// npoin2D_xi*sizeof(int),cudaMemcpyHostToDevice),91);
-// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_xi_inner_core,iboolleft_xi,
-// npoin2D_xi*sizeof(int),cudaMemcpyHostToDevice),92);
-// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolright_eta_inner_core,iboolright_eta,
-// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),93);
-// print_CUDA_error_if_any(cudaMemcpy(mp->d_iboolleft_eta_inner_core,iboolleft_eta,
-// npoin2D_eta*sizeof(int),cudaMemcpyHostToDevice),94);
+ size = NGLL3*(mp->NSPEC_INNER_CORE);
+ // density kernel
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl_inner_core),
+ size*sizeof(realw)),5204);
+ print_CUDA_error_if_any(cudaMemset(mp->d_rho_kl_inner_core,0,size*sizeof(realw)),5207);
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_inner_core_device");
-#endif
-}
-
-
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// for ACOUSTIC simulations
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(prepare_fields_acoustic_device,
- PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
- realw* rmass_acoustic,
- realw* rhostore,
- realw* kappastore,
- int* num_phase_ispec_acoustic,
- int* phase_ispec_inner_acoustic,
- int* ispec_is_acoustic,
- int* NOISE_TOMOGRAPHY,
- int* num_free_surface_faces,
- int* free_surface_ispec,
- int* free_surface_ijk,
- int* ABSORBING_CONDITIONS,
- int* b_reclen_potential,
- realw* b_absorb_potential,
- int* ELASTIC_SIMULATION,
- int* num_coupling_ac_el_faces,
- int* coupling_ac_el_ispec,
- int* coupling_ac_el_ijk,
- realw* coupling_ac_el_normal,
- realw* coupling_ac_el_jacobian2Dw,
- int* num_colors_outer_acoustic,
- int* num_colors_inner_acoustic,
- int* num_elem_colors_acoustic) {
-
- TRACE("prepare_fields_acoustic_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- // Assuming NGLLX==5. Padded is then 128 (5^3+3)
- int size_padded = NGLL3_PADDED * mp->NSPEC_AB;
- int size_nonpadded = NGLL3 * mp->NSPEC_AB;
- int size_glob = mp->NGLOB_AB;
-
- // allocates arrays on device (GPU)
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_acoustic),sizeof(realw)*size_glob),2001);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_acoustic),sizeof(realw)*size_glob),2002);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_potential_dot_dot_acoustic),sizeof(realw)*size_glob),2003);
-
- // mpi buffer
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_send_potential_dot_dot_buffer),
- (mp->max_nibool_interfaces_ext_mesh)*(mp->num_interfaces_ext_mesh)*sizeof(realw)),2004);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_acoustic),sizeof(realw)*size_glob),2005);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_acoustic,rmass_acoustic,
- sizeof(realw)*size_glob,cudaMemcpyHostToDevice),2100);
-
- // padded array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rhostore),size_padded*sizeof(realw)),2006);
- // transfer constant element data with padding
- for(int i=0; i < mp->NSPEC_AB; i++) {
- print_CUDA_error_if_any(cudaMemcpy(mp->d_rhostore+i*NGLL3_PADDED, &rhostore[i*NGLL3],
- NGLL3*sizeof(realw),cudaMemcpyHostToDevice),2106);
+ // isotropic kernel
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_alpha_kl_inner_core),
+ size*sizeof(realw)),5205);
+ print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_beta_kl_inner_core),
+ size*sizeof(realw)),5205);
+ print_CUDA_error_if_any(cudaMemset(mp->d_alpha_kl_inner_core,0,size*sizeof(realw)),5208);
+ print_CUDA_error_if_any(cudaMemset(mp->d_beta_kl_inner_core,0,size*sizeof(realw)),5208);
}
- // non-padded array
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappastore),size_nonpadded*sizeof(realw)),2007);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_kappastore,kappastore,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),2105);
- // phase elements
- mp->num_phase_ispec_acoustic = *num_phase_ispec_acoustic;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_phase_ispec_inner_acoustic),
- mp->num_phase_ispec_acoustic*2*sizeof(int)),2008);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_phase_ispec_inner_acoustic,phase_ispec_inner_acoustic,
- mp->num_phase_ispec_acoustic*2*sizeof(int),cudaMemcpyHostToDevice),2101);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_ispec_is_acoustic),
- mp->NSPEC_AB*sizeof(int)),2009);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ispec_is_acoustic,ispec_is_acoustic,
- mp->NSPEC_AB*sizeof(int),cudaMemcpyHostToDevice),2102);
-
- // free surface
- if( *NOISE_TOMOGRAPHY == 0 ){
- // allocate surface arrays
- mp->num_free_surface_faces = *num_free_surface_faces;
- if( mp->num_free_surface_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ispec),
- mp->num_free_surface_faces*sizeof(int)),2201);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ispec,free_surface_ispec,
- mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2203);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_free_surface_ijk),
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int)),2202);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_free_surface_ijk,free_surface_ijk,
- 3*NGLL2*mp->num_free_surface_faces*sizeof(int),cudaMemcpyHostToDevice),2204);
- }
- }
-
- // absorbing boundaries
- if( *ABSORBING_CONDITIONS ){
- mp->d_b_reclen_potential = *b_reclen_potential;
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_absorb_potential),mp->d_b_reclen_potential),2301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_absorb_potential,b_absorb_potential,
- mp->d_b_reclen_potential,cudaMemcpyHostToDevice),2302);
- }
-
-
- // for seismograms
- if( mp->nrec_local > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_station_seismo_potential),
- mp->nrec_local*NGLL3*sizeof(realw)),2107);
-
- mp->h_station_seismo_potential = (realw*) malloc( mp->nrec_local*NGLL3*sizeof(realw) );
- if( mp->h_station_seismo_potential == NULL) exit_on_error("error allocating h_station_seismo_potential");
- }
-
-
- // coupling with elastic parts
- if( *ELASTIC_SIMULATION && *num_coupling_ac_el_faces > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ispec),
- (*num_coupling_ac_el_faces)*sizeof(int)),2601);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ispec,coupling_ac_el_ispec,
- (*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2602);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_ijk),
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int)),2603);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_ijk,coupling_ac_el_ijk,
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(int),cudaMemcpyHostToDevice),2604);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_normal),
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2605);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_normal,coupling_ac_el_normal,
- 3*NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2606);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_coupling_ac_el_jacobian2Dw),
- NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw)),2607);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_coupling_ac_el_jacobian2Dw,coupling_ac_el_jacobian2Dw,
- NGLL2*(*num_coupling_ac_el_faces)*sizeof(realw),cudaMemcpyHostToDevice),2608);
-
- }
-
- // mesh coloring
- if( mp->use_mesh_coloring_gpu ){
- mp->num_colors_outer_acoustic = *num_colors_outer_acoustic;
- mp->num_colors_inner_acoustic = *num_colors_inner_acoustic;
- mp->h_num_elem_colors_acoustic = (int*) num_elem_colors_acoustic;
- }
-
#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_acoustic_device");
+ exit_on_cuda_error("prepare_inner_core_device");
#endif
}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(prepare_fields_acoustic_adj_dev,
- PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
- int* APPROXIMATE_HESS_KL) {
- TRACE("prepare_fields_acoustic_adj_dev");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- int size_glob = mp->NGLOB_AB;
-
- // kernel simulations
- if( *SIMULATION_TYPE != 3 ) return;
-
- // allocates backward/reconstructed arrays on device (GPU)
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_acoustic),sizeof(realw)*size_glob),3014);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_acoustic),sizeof(realw)*size_glob),3015);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_potential_dot_dot_acoustic),sizeof(realw)*size_glob),3016);
-
- // allocates kernels
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3017);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3018);
-
- // initializes kernel values to zero
- print_CUDA_error_if_any(cudaMemset(mp->d_rho_ac_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),3019);
- print_CUDA_error_if_any(cudaMemset(mp->d_kappa_ac_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),3020);
-
- // preconditioner
- if( *APPROXIMATE_HESS_KL ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_ac_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),3030);
- // initializes with zeros
- print_CUDA_error_if_any(cudaMemset(mp->d_hess_ac_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),3031);
- }
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_acoustic_adj_dev");
-#endif
-}
-*/
-
/* ----------------------------------------------------------------------------------------------- */
// for ELASTIC simulations
@@ -2385,326 +2319,271 @@
}
*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(prepare_fields_elastic_adj_dev,
- PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* size,
- int* SIMULATION_TYPE,
- int* COMPUTE_AND_STORE_STRAIN,
- realw* epsilon_trace_over_3,
- realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,realw* b_epsilondev_yz,
- realw* b_epsilon_trace_over_3,
- int* ATTENUATION,
- int* R_size,
- realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
- int* APPROXIMATE_HESS_KL){
- TRACE("prepare_fields_elastic_adj_dev");
+/* ----------------------------------------------------------------------------------------------- */
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
+// cleanup
- // checks if kernel simulation
- if( *SIMULATION_TYPE != 3 ) return;
+/* ----------------------------------------------------------------------------------------------- */
- // kernel simulations
- // allocates backward/reconstructed arrays on device (GPU)
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_displ),sizeof(realw)*(*size)),5201);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_veloc),sizeof(realw)*(*size)),5202);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_accel),sizeof(realw)*(*size)),5203);
+extern "C"
+void FC_FUNC_(prepare_cleanup_device,
+ PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f) {
- // allocates kernels
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rho_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5204);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_mu_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5205);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_kappa_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5206);
+TRACE("prepare_cleanup_device");
- // initializes kernel values to zero
- print_CUDA_error_if_any(cudaMemset(mp->d_rho_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5207);
- print_CUDA_error_if_any(cudaMemset(mp->d_mu_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5208);
- print_CUDA_error_if_any(cudaMemset(mp->d_kappa_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5209);
+ // frees allocated memory arrays
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f);
- // strains used for attenuation and kernel simulations
- if( *COMPUTE_AND_STORE_STRAIN ){
- // strains
- int epsilondev_size = NGLL3*mp->NSPEC_AB; // note: non-aligned; if align, check memcpy below and indexing
+ // frees memory on GPU
- // solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_epsilon_trace_over_3),
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5310);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_epsilon_trace_over_3,epsilon_trace_over_3,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5311);
- // backward solid pressure
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilon_trace_over_3),
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5312);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilon_trace_over_3 ,b_epsilon_trace_over_3,
- NGLL3*mp->NSPEC_AB*sizeof(realw),cudaMemcpyHostToDevice),5313);
- // prepares backward strains
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xx),
- epsilondev_size*sizeof(realw)),5321);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yy),
- epsilondev_size*sizeof(realw)),5322);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xy),
- epsilondev_size*sizeof(realw)),5323);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_xz),
- epsilondev_size*sizeof(realw)),5324);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_epsilondev_yz),
- epsilondev_size*sizeof(realw)),5325);
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5326);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5327);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5328);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5329);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,
- epsilondev_size*sizeof(realw),cudaMemcpyHostToDevice),5330);
+ //------------------------------------------
+ // sources
+ //------------------------------------------
+ if( mp->simulation_type == 1 || mp->simulation_type == 3 ){
+ cudaFree(mp->d_sourcearrays);
+ cudaFree(mp->d_stf_pre_compute);
}
- // attenuation memory variables
- if( *ATTENUATION ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xx),
- (*R_size)*sizeof(realw)),5421);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xx,b_R_xx,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5422);
+ cudaFree(mp->d_islice_selected_source);
+ cudaFree(mp->d_ispec_selected_source);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yy),
- (*R_size)*sizeof(realw)),5423);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yy,b_R_yy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5424);
+ //------------------------------------------
+ // receivers
+ //------------------------------------------
+ if( mp->nrec_local > 0 ) {
+ cudaFree(mp->d_number_receiver_global);
+ cudaFree(mp->d_station_seismo_field);
+ free(mp->h_station_seismo_field);
+ }
+ cudaFree(mp->d_ispec_selected_rec);
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xy),
- (*R_size)*sizeof(realw)),5425);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xy,b_R_xy,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5426);
+ if( mp->nadj_rec_local > 0 ){
+ cudaFree(mp->d_adj_sourcearrays);
+ cudaFree(mp->d_pre_computed_irec);
+ free(mp->h_adj_sourcearrays_slice);
+ }
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_xz),
- (*R_size)*sizeof(realw)),5427);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_xz,b_R_xz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5428);
+ //------------------------------------------
+ // rotation arrays
+ //------------------------------------------
+ if( mp->rotation ){
+ cudaFree(mp->d_A_array_rotation);
+ cudaFree(mp->d_B_array_rotation);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_A_array_rotation);
+ cudaFree(mp->d_b_B_array_rotation);
+ }
+ }
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_R_yz),
- (*R_size)*sizeof(realw)),5429);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_R_yz,b_R_yz,(*R_size)*sizeof(realw),
- cudaMemcpyHostToDevice),5420);
-
- // alpha,beta,gamma factors for backward fields
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_alphaval),
- N_SLS*sizeof(realw)),5434);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_alphaval ,b_alphaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5435);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_betaval),
- N_SLS*sizeof(realw)),5436);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_betaval ,b_betaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5437);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_b_gammaval),
- N_SLS*sizeof(realw)),5438);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_gammaval ,b_gammaval,
- N_SLS*sizeof(realw),cudaMemcpyHostToDevice),5439);
+ //------------------------------------------
+ // gravity arrays
+ //------------------------------------------
+ if( ! mp->gravity ){
+ cudaFree(mp->d_d_ln_density_dr_table);
+ }else{
+ cudaFree(mp->d_minus_rho_g_over_kappa_fluid);
+ cudaFree(mp->d_minus_gravity_table);
+ cudaFree(mp->d_minus_deriv_gravity_table);
+ cudaFree(mp->d_density_table);
}
- if( *APPROXIMATE_HESS_KL ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_hess_el_kl),NGLL3*mp->NSPEC_AB*sizeof(realw)),5450);
- // initializes with zeros
- print_CUDA_error_if_any(cudaMemset(mp->d_hess_el_kl,0,
- NGLL3*mp->NSPEC_AB*sizeof(realw)),5451);
+ //------------------------------------------
+ // attenuation arrays
+ //------------------------------------------
+ if( mp->attenuation ){
+ cudaFree(mp->d_one_minus_sum_beta_crust_mantle);
+ cudaFree(mp->d_one_minus_sum_beta_inner_core);
+ if( ! mp->use_attenuation_mimic ){
+ cudaFree(mp->d_factor_common_crust_mantle);
+ cudaFree(mp->d_R_xx_crust_mantle);
+ cudaFree(mp->d_R_yy_crust_mantle);
+ cudaFree(mp->d_R_xy_crust_mantle);
+ cudaFree(mp->d_R_xz_crust_mantle);
+ cudaFree(mp->d_R_yz_crust_mantle);
+ cudaFree(mp->d_factor_common_inner_core);
+ cudaFree(mp->d_R_xx_inner_core);
+ cudaFree(mp->d_R_yy_inner_core);
+ cudaFree(mp->d_R_xy_inner_core);
+ cudaFree(mp->d_R_xz_inner_core);
+ cudaFree(mp->d_R_yz_inner_core);
+ }
+ cudaFree(mp->d_alphaval);
+ cudaFree(mp->d_betaval);
+ cudaFree(mp->d_gammaval);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_alphaval);
+ cudaFree(mp->d_b_betaval);
+ cudaFree(mp->d_b_gammaval);
+ }
}
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_elastic_adj_dev");
-#endif
-}
-*/
+ //------------------------------------------
+ // strain
+ //------------------------------------------
+ if( mp->compute_and_store_strain ){
+ cudaFree(mp->d_epsilondev_xx_crust_mantle);
+ cudaFree(mp->d_epsilondev_yy_crust_mantle);
+ cudaFree(mp->d_epsilondev_xy_crust_mantle);
+ cudaFree(mp->d_epsilondev_xz_crust_mantle);
+ cudaFree(mp->d_epsilondev_yz_crust_mantle);
-/* ----------------------------------------------------------------------------------------------- */
+ cudaFree(mp->d_epsilondev_xx_inner_core);
+ cudaFree(mp->d_epsilondev_yy_inner_core);
+ cudaFree(mp->d_epsilondev_xy_inner_core);
+ cudaFree(mp->d_epsilondev_xz_inner_core);
+ cudaFree(mp->d_epsilondev_yz_inner_core);
-// purely adjoint & kernel simulations
+ cudaFree(mp->d_eps_trace_over_3_crust_mantle);
+ cudaFree(mp->d_eps_trace_over_3_inner_core);
+ if( mp->simulation_type == 3 ){
+ cudaFree(mp->d_b_epsilondev_xx_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_yy_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_xy_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_xz_crust_mantle);
+ cudaFree(mp->d_b_epsilondev_yz_crust_mantle);
-/* ----------------------------------------------------------------------------------------------- */
+ cudaFree(mp->d_b_epsilondev_xx_inner_core);
+ cudaFree(mp->d_b_epsilondev_yy_inner_core);
+ cudaFree(mp->d_b_epsilondev_xy_inner_core);
+ cudaFree(mp->d_b_epsilondev_xz_inner_core);
+ cudaFree(mp->d_b_epsilondev_yz_inner_core);
-/*
-extern "C"
-void FC_FUNC_(prepare_sim2_or_3_const_device,
- PREPARE_SIM2_OR_3_CONST_DEVICE)(
- long* Mesh_pointer_f,
- int* islice_selected_rec,
- int* islice_selected_rec_size,
- int* nadj_rec_local,
- int* nrec,
- int* myrank) {
+ cudaFree(mp->d_b_eps_trace_over_3_crust_mantle);
+ cudaFree(mp->d_b_eps_trace_over_3_inner_core);
+ }
+ }
- TRACE("prepare_sim2_or_3_const_device");
+ //------------------------------------------
+ // absorbing boundaries arrays
+ //------------------------------------------
+ if( mp->absorbing_conditions){
+ cudaFree(mp->d_rho_vp_crust_mantle);
+ cudaFree(mp->d_rho_vs_crust_mantle);
+ cudaFree(mp->d_nkmin_xi_crust_mantle);
+ cudaFree(mp->d_nkmin_eta_crust_mantle);
+ cudaFree(mp->d_njmin_crust_mantle);
+ cudaFree(mp->d_njmax_crust_mantle);
+ cudaFree(mp->d_nimin_crust_mantle);
+ cudaFree(mp->d_nimax_crust_mantle);
+ if( mp->nspec2D_xmin_crust_mantle > 0 ){
+ cudaFree(mp->d_ibelm_xmin_crust_mantle);
+ cudaFree(mp->d_normal_xmin_crust_mantle);
+ cudaFree(mp->d_jacobian2D_xmin_crust_mantle);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_xmin_crust_mantle);
+ }
+ }
+ if( mp->nspec2D_xmax_crust_mantle > 0 ){
+ cudaFree(mp->d_ibelm_xmax_crust_mantle);
+ cudaFree(mp->d_normal_xmax_crust_mantle);
+ cudaFree(mp->d_jacobian2D_xmax_crust_mantle);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_xmax_crust_mantle);
+ }
+ }
+ if( mp->nspec2D_ymin_crust_mantle > 0 ){
+ cudaFree(mp->d_ibelm_ymin_crust_mantle);
+ cudaFree(mp->d_normal_ymin_crust_mantle);
+ cudaFree(mp->d_jacobian2D_ymin_crust_mantle);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_ymin_crust_mantle);
+ }
+ }
+ if( mp->nspec2D_ymax_crust_mantle > 0 ){
+ cudaFree(mp->d_ibelm_ymax_crust_mantle);
+ cudaFree(mp->d_normal_ymax_crust_mantle);
+ cudaFree(mp->d_jacobian2D_ymax_crust_mantle);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_ymax_crust_mantle);
+ }
+ }
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
- // allocates arrays for receivers
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_islice_selected_rec,
- *islice_selected_rec_size*sizeof(int)),6001);
- // copies arrays to GPU device
- print_CUDA_error_if_any(cudaMemcpy(mp->d_islice_selected_rec,islice_selected_rec,
- *islice_selected_rec_size*sizeof(int),cudaMemcpyHostToDevice),6002);
-
- // adjoint source arrays
- mp->nadj_rec_local = *nadj_rec_local;
- if( mp->nadj_rec_local > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_adj_sourcearrays,
- (mp->nadj_rec_local)*3*NGLL3*sizeof(realw)),6003);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_pre_computed_irec,
- (mp->nadj_rec_local)*sizeof(int)),6004);
-
- // prepares local irec array:
- // the irec_local variable needs to be precomputed (as
- // h_pre_comp..), because normally it is in the loop updating accel,
- // and due to how it's incremented, it cannot be parallelized
- int* h_pre_computed_irec = (int*) malloc( (mp->nadj_rec_local)*sizeof(int) );
- if( h_pre_computed_irec == NULL ) exit_on_error("prepare_sim2_or_3_const_device: h_pre_computed_irec not allocated\n");
-
- int irec_local = 0;
- for(int irec = 0; irec < *nrec; irec++) {
- if(*myrank == islice_selected_rec[irec]) {
- irec_local++;
- h_pre_computed_irec[irec_local-1] = irec;
+ cudaFree(mp->d_vp_outer_core);
+ cudaFree(mp->d_nkmin_xi_outer_core);
+ cudaFree(mp->d_nkmin_eta_outer_core);
+ cudaFree(mp->d_njmin_outer_core);
+ cudaFree(mp->d_njmax_outer_core);
+ cudaFree(mp->d_nimin_outer_core);
+ cudaFree(mp->d_nimax_outer_core);
+ if( mp->nspec2D_xmin_outer_core > 0 ){
+ cudaFree(mp->d_ibelm_xmin_outer_core);
+ cudaFree(mp->d_jacobian2D_xmin_outer_core);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_xmin_outer_core);
}
}
- if( irec_local != mp->nadj_rec_local ) exit_on_error("prepare_sim2_or_3_const_device: irec_local not equal\n");
- // copies values onto GPU
- print_CUDA_error_if_any(cudaMemcpy(mp->d_pre_computed_irec,h_pre_computed_irec,
- (mp->nadj_rec_local)*sizeof(int),cudaMemcpyHostToDevice),6010);
- free(h_pre_computed_irec);
+ if( mp->nspec2D_xmax_outer_core > 0 ){
+ cudaFree(mp->d_ibelm_xmax_outer_core);
+ cudaFree(mp->d_jacobian2D_xmax_outer_core);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_xmax_outer_core);
+ }
+ }
+ if( mp->nspec2D_ymin_outer_core > 0 ){
+ cudaFree(mp->d_ibelm_ymin_outer_core);
+ cudaFree(mp->d_jacobian2D_ymin_outer_core);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_ymin_outer_core);
+ }
+ }
+ if( mp->nspec2D_ymax_outer_core > 0 ){
+ cudaFree(mp->d_ibelm_ymax_outer_core);
+ cudaFree(mp->d_jacobian2D_ymax_outer_core);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_ymax_outer_core);
+ }
+ }
+ if( mp->nspec2D_zmin_outer_core > 0 ){
+ cudaFree(mp->d_ibelm_zmin_outer_core);
+ cudaFree(mp->d_jacobian2D_zmin_outer_core);
+ if( (mp->simulation_type == 1 && mp->save_forward ) || (mp->simulation_type == 3) ){
+ cudaFree(mp->d_absorb_zmin_outer_core);
+ }
+ }
- // temporary array to prepare extracted source array values
- mp->h_adj_sourcearrays_slice = (realw*) malloc( (mp->nadj_rec_local)*3*NGLL3*sizeof(realw) );
- if( mp->h_adj_sourcearrays_slice == NULL ) exit_on_error("h_adj_sourcearrays_slice not allocated\n");
-
}
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_sim2_or_3_const_device");
-#endif
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// for NOISE simulations
-
-/* ----------------------------------------------------------------------------------------------- */
-
-
-extern "C"
-void FC_FUNC_(prepare_fields_noise_device,
- PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
- int* nspec_top,
- int* ibelm_top_crust_mantle,
- int* NSTEP,
- realw* noise_sourcearray,
- realw* normal_x_noise,
- realw* normal_y_noise,
- realw* normal_z_noise,
- realw* mask_noise,
- realw* jacobian2D_top_crust_mantle) {
-
- TRACE("prepare_fields_noise_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
- // free surface
- mp->nspec_top = *nspec_top;
- if( mp->nspec_top > 0 ){
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_ibelm_top_crust_mantle,
- mp->nspec_top*sizeof(int)),7001);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_ibelm_top_crust_mantle,ibelm_top_crust_mantle,
- mp->nspec_top*sizeof(int),cudaMemcpyHostToDevice),7002);
-
- // alloc storage for the surface buffer to be copied
- print_CUDA_error_if_any(cudaMalloc((void**) &mp->d_noise_surface_movie,
- NDIM*NGLL2*(mp->nspec_top)*sizeof(realw)),7005);
+ //------------------------------------------
+ // mpi buffers
+ //------------------------------------------
+ if( mp->num_interfaces_crust_mantle > 0 ){
+ cudaFree(mp->d_nibool_interfaces_crust_mantle);
+ cudaFree(mp->d_ibool_interfaces_crust_mantle);
+ cudaFree(mp->d_send_accel_buffer_crust_mantle);
}
-
-
- // prepares noise source array
- if( mp->noise_tomography == 1 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_noise_sourcearray,
- NDIM*NGLL3*(*NSTEP)*sizeof(realw)),7101);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_noise_sourcearray,noise_sourcearray,
- NDIM*NGLL3*(*NSTEP)*sizeof(realw),cudaMemcpyHostToDevice),7102);
+ if( mp->num_interfaces_inner_core > 0 ){
+ cudaFree(mp->d_nibool_interfaces_inner_core);
+ cudaFree(mp->d_ibool_interfaces_inner_core);
+ cudaFree(mp->d_send_accel_buffer_inner_core);
}
-
- // prepares noise directions
- if( mp->noise_tomography > 1 ){
- int nface_size = NGLL2*(mp->nspec_top);
- // allocates memory on GPU
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_x_noise,
- nface_size*sizeof(realw)),7301);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_x_noise, normal_x_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7306);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_y_noise,
- nface_size*sizeof(realw)),7302);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_y_noise, normal_y_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7307);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_normal_z_noise,
- nface_size*sizeof(realw)),7303);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_normal_z_noise, normal_z_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7308);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_mask_noise,
- nface_size*sizeof(realw)),7304);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_mask_noise, mask_noise,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7309);
-
- print_CUDA_error_if_any(cudaMalloc((void**)&mp->d_jacobian2D_top_crust_mantle,
- nface_size*sizeof(realw)),7305);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_jacobian2D_top_crust_mantle, jacobian2D_top_crust_mantle,
- nface_size*sizeof(realw),cudaMemcpyHostToDevice),7310);
+ if( mp->num_interfaces_outer_core > 0 ){
+ cudaFree(mp->d_nibool_interfaces_outer_core);
+ cudaFree(mp->d_ibool_interfaces_outer_core);
+ cudaFree(mp->d_send_accel_buffer_outer_core);
}
- // prepares noise strength kernel
- if( mp->noise_tomography == 3 ){
- print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_Sigma_kl),
- NGLL3*(mp->NSPEC_CRUST_MANTLE)*sizeof(realw)),7401);
- // initializes kernel values to zero
- print_CUDA_error_if_any(cudaMemset(mp->d_Sigma_kl,0,
- NGLL3*mp->NSPEC_CRUST_MANTLE*sizeof(realw)),7403);
-
+ //------------------------------------------
+ // NOISE arrays
+ //------------------------------------------
+ if( mp->noise_tomography > 0 ){
+ cudaFree(mp->d_ibelm_top_crust_mantle);
+ cudaFree(mp->d_noise_surface_movie);
+ if( mp->noise_tomography == 1 ) cudaFree(mp->d_noise_sourcearray);
+ if( mp->noise_tomography > 1 ){
+ cudaFree(mp->d_normal_x_noise);
+ cudaFree(mp->d_normal_y_noise);
+ cudaFree(mp->d_normal_z_noise);
+ cudaFree(mp->d_mask_noise);
+ cudaFree(mp->d_jacobian2D_top_crust_mantle);
+ }
+ if( mp->noise_tomography == 3 ) cudaFree(mp->d_Sigma_kl);
}
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("prepare_fields_noise_device");
-#endif
-}
-
-
-
-/* ----------------------------------------------------------------------------------------------- */
-
-// cleanup
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(prepare_cleanup_device,
- PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f) {
-
-TRACE("prepare_cleanup_device");
-
- // frees allocated memory arrays
- Mesh* mp = (Mesh*)(*Mesh_pointer_f);
-
- // frees memory on GPU
+ //------------------------------------------
// crust_mantle
+ //------------------------------------------
cudaFree(mp->d_xix_crust_mantle);
cudaFree(mp->d_xiy_crust_mantle);
cudaFree(mp->d_xiz_crust_mantle);
@@ -2757,6 +2636,7 @@
if( mp->gravity ){
cudaFree(mp->d_xstore_crust_mantle);
}
+ cudaFree(mp->d_phase_ispec_inner_crust_mantle);
cudaFree(mp->d_displ_crust_mantle);
cudaFree(mp->d_veloc_crust_mantle);
@@ -2765,10 +2645,20 @@
cudaFree(mp->d_b_displ_crust_mantle);
cudaFree(mp->d_b_veloc_crust_mantle);
cudaFree(mp->d_b_accel_crust_mantle);
+ cudaFree(mp->d_rho_kl_crust_mantle);
+ if(mp->anisotropic_kl){
+ cudaFree(mp->d_cijkl_kl_crust_mantle);
+ }else{
+ cudaFree(mp->d_alpha_kl_crust_mantle);
+ cudaFree(mp->d_beta_kl_crust_mantle);
+ }
+ if(mp->approximate_hess_kl){ cudaFree(mp->d_hess_kl_crust_mantle);}
}
+ cudaFree(mp->d_rmass_crust_mantle);
-
+ //------------------------------------------
// outer_core
+ //------------------------------------------
cudaFree(mp->d_xix_outer_core);
cudaFree(mp->d_xiy_outer_core);
cudaFree(mp->d_xiz_outer_core);
@@ -2787,7 +2677,9 @@
cudaFree(mp->d_xstore_outer_core);
cudaFree(mp->d_ystore_outer_core);
cudaFree(mp->d_zstore_outer_core);
+
cudaFree(mp->d_ibool_outer_core);
+ cudaFree(mp->d_phase_ispec_inner_outer_core);
cudaFree(mp->d_displ_outer_core);
cudaFree(mp->d_veloc_outer_core);
@@ -2796,9 +2688,14 @@
cudaFree(mp->d_b_displ_outer_core);
cudaFree(mp->d_b_veloc_outer_core);
cudaFree(mp->d_b_accel_outer_core);
+ cudaFree(mp->d_rho_kl_outer_core);
+ cudaFree(mp->d_alpha_kl_outer_core);
}
+ cudaFree(mp->d_rmass_outer_core);
+ //------------------------------------------
// inner_core
+ //------------------------------------------
cudaFree(mp->d_xix_inner_core);
cudaFree(mp->d_xiy_inner_core);
cudaFree(mp->d_xiz_inner_core);
@@ -2832,6 +2729,13 @@
if( mp->simulation_type == 3 && mp->save_boundary_mesh ){
cudaFree(mp->d_rhostore_inner_core);
}
+ cudaFree(mp->d_idoubling_inner_core);
+ if( mp->gravity ){
+ cudaFree(mp->d_xstore_inner_core);
+ cudaFree(mp->d_ystore_inner_core);
+ cudaFree(mp->d_zstore_inner_core);
+ }
+ cudaFree(mp->d_phase_ispec_inner_inner_core);
cudaFree(mp->d_displ_inner_core);
cudaFree(mp->d_veloc_inner_core);
@@ -2840,259 +2744,15 @@
cudaFree(mp->d_b_displ_inner_core);
cudaFree(mp->d_b_veloc_inner_core);
cudaFree(mp->d_b_accel_inner_core);
- }
- // sources
- if( mp->simulation_type == 1 || mp->simulation_type == 3 ){
- cudaFree(mp->d_sourcearrays);
- cudaFree(mp->d_stf_pre_compute);
+ cudaFree(mp->d_rho_kl_inner_core);
+ cudaFree(mp->d_alpha_kl_inner_core);
+ cudaFree(mp->d_beta_kl_inner_core);
}
+ cudaFree(mp->d_rmass_inner_core);
- cudaFree(mp->d_islice_selected_source);
- cudaFree(mp->d_ispec_selected_source);
-
- // receivers
- if( mp->nrec_local > 0 ) {
- cudaFree(mp->d_number_receiver_global);
- cudaFree(mp->d_station_seismo_field);
- free(mp->h_station_seismo_field);
- }
- cudaFree(mp->d_ispec_selected_rec);
-
- // rotation arrays
- if( mp->rotation ){
- cudaFree(mp->d_A_array_rotation);
- cudaFree(mp->d_B_array_rotation);
- if( mp->simulation_type == 3 ){
- cudaFree(mp->d_b_A_array_rotation);
- cudaFree(mp->d_b_B_array_rotation);
- }
- }
-
- // gravity arrays
- if( ! mp->gravity ){
- cudaFree(mp->d_d_ln_density_dr_table);
- }else{
- cudaFree(mp->d_minus_rho_g_over_kappa_fluid);
- cudaFree(mp->d_minus_gravity_table);
- cudaFree(mp->d_minus_deriv_gravity_table);
- cudaFree(mp->d_density_table);
- }
-
- // attenuation arrays
- if( mp->attenuation ){
- cudaFree(mp->d_one_minus_sum_beta_crust_mantle);
- cudaFree(mp->d_one_minus_sum_beta_inner_core);
- if( ! mp->use_attenuation_mimic ){
- cudaFree(mp->d_R_xx_crust_mantle);
- cudaFree(mp->d_R_yy_crust_mantle);
- cudaFree(mp->d_R_xy_crust_mantle);
- cudaFree(mp->d_R_xz_crust_mantle);
- cudaFree(mp->d_R_yz_crust_mantle);
- cudaFree(mp->d_factor_common_crust_mantle);
- cudaFree(mp->d_R_xx_inner_core);
- cudaFree(mp->d_R_yy_inner_core);
- cudaFree(mp->d_R_xy_inner_core);
- cudaFree(mp->d_R_xz_inner_core);
- cudaFree(mp->d_R_yz_inner_core);
- cudaFree(mp->d_factor_common_inner_core);
- }
- cudaFree(mp->d_alphaval);
- cudaFree(mp->d_betaval);
- cudaFree(mp->d_gammaval);
- if( mp->simulation_type == 3 ){
- cudaFree(mp->d_b_alphaval);
- cudaFree(mp->d_b_betaval);
- cudaFree(mp->d_b_gammaval);
- }
- }
-
- // strain
- if( mp->compute_and_store_strain ){
- cudaFree(mp->d_epsilondev_xx_crust_mantle);
- cudaFree(mp->d_epsilondev_yy_crust_mantle);
- cudaFree(mp->d_epsilondev_xy_crust_mantle);
- cudaFree(mp->d_epsilondev_xz_crust_mantle);
- cudaFree(mp->d_epsilondev_yz_crust_mantle);
-
- cudaFree(mp->d_epsilondev_xx_inner_core);
- cudaFree(mp->d_epsilondev_yy_inner_core);
- cudaFree(mp->d_epsilondev_xy_inner_core);
- cudaFree(mp->d_epsilondev_xz_inner_core);
- cudaFree(mp->d_epsilondev_yz_inner_core);
-
- cudaFree(mp->d_eps_trace_over_3_crust_mantle);
- cudaFree(mp->d_eps_trace_over_3_inner_core);
- if( mp->simulation_type == 3 ){
- cudaFree(mp->d_b_epsilondev_xx_crust_mantle);
- cudaFree(mp->d_b_epsilondev_yy_crust_mantle);
- cudaFree(mp->d_b_epsilondev_xy_crust_mantle);
- cudaFree(mp->d_b_epsilondev_xz_crust_mantle);
- cudaFree(mp->d_b_epsilondev_yz_crust_mantle);
-
- cudaFree(mp->d_b_epsilondev_xx_inner_core);
- cudaFree(mp->d_b_epsilondev_yy_inner_core);
- cudaFree(mp->d_b_epsilondev_xy_inner_core);
- cudaFree(mp->d_b_epsilondev_xz_inner_core);
- cudaFree(mp->d_b_epsilondev_yz_inner_core);
-
- cudaFree(mp->d_b_eps_trace_over_3_crust_mantle);
- cudaFree(mp->d_b_eps_trace_over_3_inner_core);
- }
- }
-
-
/*
- // absorbing boundaries
- if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0 ){
- cudaFree(mp->d_abs_boundary_ispec);
- cudaFree(mp->d_abs_boundary_ijk);
- cudaFree(mp->d_abs_boundary_normal);
- cudaFree(mp->d_abs_boundary_jacobian2Dw);
- }
- // interfaces
- cudaFree(mp->d_nibool_interfaces_ext_mesh);
- cudaFree(mp->d_ibool_interfaces_ext_mesh);
-
- // global indexing
- cudaFree(mp->d_ispec_is_inner);
- cudaFree(mp->d_ibool);
-
- // ACOUSTIC arrays
- if( *ACOUSTIC_SIMULATION ){
- cudaFree(mp->d_potential_acoustic);
- cudaFree(mp->d_potential_dot_acoustic);
- cudaFree(mp->d_potential_dot_dot_acoustic);
- cudaFree(mp->d_send_potential_dot_dot_buffer);
- cudaFree(mp->d_rmass_acoustic);
- cudaFree(mp->d_rhostore);
- cudaFree(mp->d_kappastore);
- cudaFree(mp->d_phase_ispec_inner_acoustic);
- cudaFree(mp->d_ispec_is_acoustic);
-
- if( *NOISE_TOMOGRAPHY == 0 ){
- cudaFree(mp->d_free_surface_ispec);
- cudaFree(mp->d_free_surface_ijk);
- }
-
- if( *ABSORBING_CONDITIONS ) cudaFree(mp->d_b_absorb_potential);
-
- if( *SIMULATION_TYPE == 3 ) {
- cudaFree(mp->d_b_potential_acoustic);
- cudaFree(mp->d_b_potential_dot_acoustic);
- cudaFree(mp->d_b_potential_dot_dot_acoustic);
- cudaFree(mp->d_rho_ac_kl);
- cudaFree(mp->d_kappa_ac_kl);
- if( *APPROXIMATE_HESS_KL) cudaFree(mp->d_hess_ac_kl);
- }
-
-
- if(mp->nrec_local > 0 ){
- cudaFree(mp->d_station_seismo_potential);
- free(mp->h_station_seismo_potential);
- }
-
- } // ACOUSTIC_SIMULATION
-
- // ELASTIC arrays
- if( *ELASTIC_SIMULATION ){
- cudaFree(mp->d_displ);
- cudaFree(mp->d_veloc);
- cudaFree(mp->d_accel);
- cudaFree(mp->d_send_accel_buffer);
- cudaFree(mp->d_rmass);
-
- cudaFree(mp->d_phase_ispec_inner_elastic);
- cudaFree(mp->d_ispec_is_elastic);
-
- if( mp->nrec_local > 0 ){
- cudaFree(mp->d_station_seismo_field);
- free(mp->h_station_seismo_field);
- }
-
- if( *ABSORBING_CONDITIONS && mp->d_num_abs_boundary_faces > 0){
- cudaFree(mp->d_rho_vp);
- cudaFree(mp->d_rho_vs);
-
- if(*SIMULATION_TYPE == 3 || ( *SIMULATION_TYPE == 1 && *SAVE_FORWARD ))
- cudaFree(mp->d_b_absorb_field);
- }
-
- if( *SIMULATION_TYPE == 3 ) {
- cudaFree(mp->d_b_displ);
- cudaFree(mp->d_b_veloc);
- cudaFree(mp->d_b_accel);
- cudaFree(mp->d_rho_kl);
- cudaFree(mp->d_mu_kl);
- cudaFree(mp->d_kappa_kl);
- if( *APPROXIMATE_HESS_KL ) cudaFree(mp->d_hess_el_kl);
- }
-
- if( *COMPUTE_AND_STORE_STRAIN ){
- cudaFree(mp->d_epsilondev_xx);
- cudaFree(mp->d_epsilondev_yy);
- cudaFree(mp->d_epsilondev_xy);
- cudaFree(mp->d_epsilondev_xz);
- cudaFree(mp->d_epsilondev_yz);
- if( *SIMULATION_TYPE == 3 ){
- cudaFree(mp->d_epsilon_trace_over_3);
- cudaFree(mp->d_b_epsilon_trace_over_3);
- cudaFree(mp->d_b_epsilondev_xx);
- cudaFree(mp->d_b_epsilondev_yy);
- cudaFree(mp->d_b_epsilondev_xy);
- cudaFree(mp->d_b_epsilondev_xz);
- cudaFree(mp->d_b_epsilondev_yz);
- }
- }
-
- if( *ATTENUATION ){
- cudaFree(mp->d_factor_common);
- cudaFree(mp->d_one_minus_sum_beta);
- cudaFree(mp->d_alphaval);
- cudaFree(mp->d_betaval);
- cudaFree(mp->d_gammaval);
- cudaFree(mp->d_R_xx);
- cudaFree(mp->d_R_yy);
- cudaFree(mp->d_R_xy);
- cudaFree(mp->d_R_xz);
- cudaFree(mp->d_R_yz);
- if( *SIMULATION_TYPE == 3){
- cudaFree(mp->d_b_R_xx);
- cudaFree(mp->d_b_R_yy);
- cudaFree(mp->d_b_R_xy);
- cudaFree(mp->d_b_R_xz);
- cudaFree(mp->d_b_R_yz);
- cudaFree(mp->d_b_alphaval);
- cudaFree(mp->d_b_betaval);
- cudaFree(mp->d_b_gammaval);
- }
- }
-
- if( *ANISOTROPY ){
- cudaFree(mp->d_c11store);
- cudaFree(mp->d_c12store);
- cudaFree(mp->d_c13store);
- cudaFree(mp->d_c14store);
- cudaFree(mp->d_c15store);
- cudaFree(mp->d_c16store);
- cudaFree(mp->d_c22store);
- cudaFree(mp->d_c23store);
- cudaFree(mp->d_c24store);
- cudaFree(mp->d_c25store);
- cudaFree(mp->d_c26store);
- cudaFree(mp->d_c33store);
- cudaFree(mp->d_c34store);
- cudaFree(mp->d_c35store);
- cudaFree(mp->d_c36store);
- cudaFree(mp->d_c44store);
- cudaFree(mp->d_c45store);
- cudaFree(mp->d_c46store);
- cudaFree(mp->d_c55store);
- cudaFree(mp->d_c56store);
- cudaFree(mp->d_c66store);
- }
-
if( *OCEANS ){
if( mp->num_free_surface_faces > 0 ){
cudaFree(mp->d_rmass_ocean_load);
@@ -3106,34 +2766,11 @@
}
} // ELASTIC_SIMULATION
- // purely adjoint & kernel array
- if( *SIMULATION_TYPE == 2 || *SIMULATION_TYPE == 3 ){
- cudaFree(mp->d_islice_selected_rec);
- if(mp->nadj_rec_local > 0 ){
- cudaFree(mp->d_adj_sourcearrays);
- cudaFree(mp->d_pre_computed_irec);
- free(mp->h_adj_sourcearrays_slice);
- }
- }
-
- // NOISE arrays
- if( *NOISE_TOMOGRAPHY > 0 ){
- cudaFree(mp->d_free_surface_ispec);
- cudaFree(mp->d_free_surface_ijk);
- cudaFree(mp->d_noise_surface_movie);
- if( *NOISE_TOMOGRAPHY == 1 ) cudaFree(mp->d_noise_sourcearray);
- if( *NOISE_TOMOGRAPHY > 1 ){
- cudaFree(mp->d_normal_x_noise);
- cudaFree(mp->d_normal_y_noise);
- cudaFree(mp->d_normal_z_noise);
- cudaFree(mp->d_mask_noise);
- cudaFree(mp->d_free_surface_jacobian2Dw);
- }
- if( *NOISE_TOMOGRAPHY == 3 ) cudaFree(mp->d_Sigma_kl);
- }
-
*/
+ // releases previous contexts
+ cudaThreadExit();
+
// mesh pointer - not needed anymore
free(mp);
}
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/specfem3D_gpu_cuda_method_stubs.c 2012-02-23 06:14:45 UTC (rev 19664)
@@ -205,20 +205,20 @@
// src/cuda/compute_kernels_cuda.cu
//
-void FC_FUNC_(compute_kernels_elastic_cuda,
- COMPUTE_KERNELS_ELASTIC_CUDA)(long* Mesh_pointer,
- realw* deltat_f) {}
+void FC_FUNC_(compute_kernels_cm_cuda,
+ COMPUTE_KERNELS_CM_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+void FC_FUNC_(compute_kernels_ic_cuda,
+ COMPUTE_KERNELS_IC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+
+void FC_FUNC_(compute_kernels_oc_cuda,
+ COMPUTE_KERNELS_OC_CUDA)(long* Mesh_pointer,realw* deltat_f) {}
+
void FC_FUNC_(compute_kernels_strgth_noise_cu,
COMPUTE_KERNELS_STRGTH_NOISE_CU)(long* Mesh_pointer,
- realw* h_noise_surface_movie,
- realw* deltat) {}
+ realw* h_noise_surface_movie,
+ realw* deltat_f) {}
-void FC_FUNC_(compute_kernels_acoustic_cuda,
- COMPUTE_KERNELS_ACOUSTIC_CUDA)(
- long* Mesh_pointer,
- realw* deltat_f) {}
-
void FC_FUNC_(compute_kernels_hess_cuda,
COMPUTE_KERNELS_HESS_CUDA)(long* Mesh_pointer,
realw* deltat_f) {}
@@ -364,6 +364,7 @@
int* ANISOTROPIC_INNER_CORE_f,
int* SAVE_BOUNDARY_MESH_f,
int* USE_MESH_COLORING_GPU_f,
+ int* ANISOTROPIC_KL_f,
int* APPROXIMATE_HESS_KL_f) {}
void FC_FUNC_(prepare_fields_rotation_device,
@@ -486,6 +487,18 @@
int* ibool_interfaces_outer_core
){}
+void FC_FUNC_(prepare_fields_noise_device,
+ PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
+ int* nspec_top,
+ int* ibelm_top_crust_mantle,
+ int* NSTEP,
+ realw* noise_sourcearray,
+ realw* normal_x_noise,
+ realw* normal_y_noise,
+ realw* normal_z_noise,
+ realw* mask_noise,
+ realw* jacobian2D_top_crust_mantle) {}
+
void FC_FUNC_(prepare_crust_mantle_device,
PREPARE_CRUST_MANTLE_DEVICE)(long* Mesh_pointer_f,
realw* h_xix, realw* h_xiy, realw* h_xiz,
@@ -542,42 +555,8 @@
int* num_phase_ispec,
int* phase_ispec_inner,
int* nspec_outer,
- int* nspec_inner
- //int* iboolleft_xi, int* iboolright_xi,
- //int* iboolleft_eta, int* iboolright_eta,
- //int* npoin2D_xi, int* npoin2D_eta
- ) {}
+ int* nspec_inner) {}
-void FC_FUNC_(prepare_fields_acoustic_device,
- PREPARE_FIELDS_ACOUSTIC_DEVICE)(long* Mesh_pointer_f,
- realw* rmass_acoustic,
- realw* rhostore,
- realw* kappastore,
- int* num_phase_ispec_acoustic,
- int* phase_ispec_inner_acoustic,
- int* ispec_is_acoustic,
- int* NOISE_TOMOGRAPHY,
- int* num_free_surface_faces,
- int* free_surface_ispec,
- int* free_surface_ijk,
- int* ABSORBING_CONDITIONS,
- int* b_reclen_potential,
- realw* b_absorb_potential,
- int* ELASTIC_SIMULATION,
- int* num_coupling_ac_el_faces,
- int* coupling_ac_el_ispec,
- int* coupling_ac_el_ijk,
- realw* coupling_ac_el_normal,
- realw* coupling_ac_el_jacobian2Dw,
- int* num_colors_outer_acoustic,
- int* num_colors_inner_acoustic,
- int* num_elem_colors_acoustic) {}
-
-void FC_FUNC_(prepare_fields_acoustic_adj_dev,
- PREPARE_FIELDS_ACOUSTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* SIMULATION_TYPE,
- int* APPROXIMATE_HESS_KL) {}
-
void FC_FUNC_(prepare_fields_elastic_device,
PREPARE_FIELDS_ELASTIC_DEVICE)(long* Mesh_pointer_f,
int* size,
@@ -633,42 +612,6 @@
realw *c56store,
realw *c66store){}
-void FC_FUNC_(prepare_fields_elastic_adj_dev,
- PREPARE_FIELDS_ELASTIC_ADJ_DEV)(long* Mesh_pointer_f,
- int* size,
- int* SIMULATION_TYPE,
- int* COMPUTE_AND_STORE_STRAIN,
- realw* epsilon_trace_over_3,
- realw* b_epsilondev_xx,realw* b_epsilondev_yy,realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,realw* b_epsilondev_yz,
- realw* b_epsilon_trace_over_3,
- int* ATTENUATION,
- int* R_size,
- realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
- realw* b_alphaval,realw* b_betaval,realw* b_gammaval,
- int* APPROXIMATE_HESS_KL){}
-
-void FC_FUNC_(prepare_sim2_or_3_const_device,
- PREPARE_SIM2_OR_3_CONST_DEVICE)(
- long* Mesh_pointer_f,
- int* islice_selected_rec,
- int* islice_selected_rec_size,
- int* nadj_rec_local,
- int* nrec,
- int* myrank) {}
-
-void FC_FUNC_(prepare_fields_noise_device,
- PREPARE_FIELDS_NOISE_DEVICE)(long* Mesh_pointer_f,
- int* nspec_top,
- int* ibelm_top_crust_mantle,
- int* NSTEP,
- realw* noise_sourcearray,
- realw* normal_x_noise,
- realw* normal_y_noise,
- realw* normal_z_noise,
- realw* mask_noise,
- realw* jacobian2D_top_crust_mantle) {}
-
void FC_FUNC_(prepare_cleanup_device,
PREPARE_CLEANUP_DEVICE)(long* Mesh_pointer_f) {}
@@ -734,6 +677,12 @@
void FC_FUNC_(transfer_b_displ_ic_from_device,
TRANSFER_B_DISPL_IC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+void FC_FUNC_(transfer_displ_oc_from_device,
+ TRANSFER_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
+void FC_FUNC_(transfer_b_displ_oc_from_device,
+ TRANSFER_B_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {}
+
void FC_FUNC_(transfer_veloc_cm_from_device,
TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {}
@@ -746,6 +695,9 @@
void FC_FUNC_(transfer_accel_ic_from_device,
TRANSFER_ACCEL_IC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+void FC_FUNC_(transfer_accel_oc_from_device,
+ TRANSFER_ACCEL_OC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {}
+
void FC_FUNC_(transfer_strain_cm_from_device,
TRANSFER_STRAIN_CM_FROM_DEVICE)(long* Mesh_pointer,
realw* eps_trace_over_3,
@@ -790,140 +742,38 @@
realw* A_array_rotation,
realw* B_array_rotation) {}
-void FC_FUNC_(transfer_b_att_cm_to_device,
- TRANSFER_B_ATT_CM_TO_DEVICE)(long* Mesh_pointer,
- realw* R_xx,
- realw* R_yy,
- realw* R_xy,
- realw* R_xz,
- realw* R_yz) {}
+void FC_FUNC_(transfer_kernels_cm_to_host,
+ TRANSFER_KERNELS_CM_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ realw* h_cijkl_kl,
+ int* NSPEC) {}
-void FC_FUNC_(transfer_sigma_from_device,
- TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_b_displ_from_device,
- TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_displ_from_device,
- TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_compute_kernel_answers_from_device,
- TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
- realw* rho_kl,int* size_rho,
- realw* mu_kl, int* size_mu,
- realw* kappa_kl, int* size_kappa) {}
-
-void FC_FUNC_(transfer_compute_kernel_fields_from_device,
- TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
- realw* accel, int* size_accel,
- realw* b_displ, int* size_b_displ,
- realw* epsilondev_xx,
- realw* epsilondev_yy,
- realw* epsilondev_xy,
- realw* epsilondev_xz,
- realw* epsilondev_yz,
- int* size_epsilondev,
- realw* b_epsilondev_xx,
- realw* b_epsilondev_yy,
- realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,
- realw* b_epsilondev_yz,
- int* size_b_epsilondev,
- realw* rho_kl,int* size_rho,
- realw* mu_kl, int* size_mu,
- realw* kappa_kl, int* size_kappa,
- realw* epsilon_trace_over_3,
- realw* b_epsilon_trace_over_3,
- int* size_epsilon_trace_over_3) {}
-
-void FC_FUNC_(transfer_b_fields_att_to_device,
- TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
- realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
- int* size_R,
- realw* b_epsilondev_xx,
- realw* b_epsilondev_yy,
- realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,
- realw* b_epsilondev_yz,
- int* size_epsilondev) {}
-
-void FC_FUNC_(transfer_fields_att_from_device,
- TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
- realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
- int* size_R,
- realw* epsilondev_xx,
- realw* epsilondev_yy,
- realw* epsilondev_xy,
- realw* epsilondev_xz,
- realw* epsilondev_yz,
- int* size_epsilondev) {}
-
-void FC_FUNC_(transfer_kernels_el_to_host,
- TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
+void FC_FUNC_(transfer_kernels_ic_to_host,
+ TRANSFER_KERNELS_IC_TO_HOST)(long* Mesh_pointer,
realw* h_rho_kl,
- realw* h_mu_kl,
- realw* h_kappa_kl,
- int* NSPEC_AB) {}
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ int* NSPEC) {}
+void FC_FUNC_(transfer_kernels_oc_to_host,
+ TRANSFER_KERNELS_OC_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ int* NSPEC) {}
+
void FC_FUNC_(transfer_kernels_noise_to_host,
TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
- realw* h_Sigma_kl,
- int* NSPEC_AB) {}
+ realw* h_Sigma_kl,
+ int* NSPEC) {}
-void FC_FUNC_(transfer_fields_ac_to_device,
- TRANSFER_FIELDS_AC_TO_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_b_fields_ac_to_device,
- TRANSFER_B_FIELDS_AC_TO_DEVICE)(
- int* size,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_b_fields_ac_from_device,
- TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_dot_dot_from_device,
- TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_b_dot_dot_from_device,
- TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {}
-
-void FC_FUNC_(transfer_kernels_ac_to_host,
- TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
- realw* h_rho_ac_kl,
- realw* h_kappa_ac_kl,
- int* NSPEC_AB) {}
-
void FC_FUNC_(transfer_kernels_hess_cm_tohost,
TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
realw* h_hess_kl,
int* NSPEC) {}
-void FC_FUNC_(transfer_kernels_hess_ac_tohost,
- TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
- realw* h_hess_ac_kl,
- int* NSPEC_AB) {}
-
//
// src/cuda/write_seismograms_cuda.cu
//
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/transfer_fields_cuda.cu 2012-02-23 06:14:45 UTC (rev 19664)
@@ -329,6 +329,32 @@
/* ----------------------------------------------------------------------------------------------- */
extern "C"
+void FC_FUNC_(transfer_displ_oc_from_device,
+ TRANSFER_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_displ_oc_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
+void FC_FUNC_(transfer_b_displ_oc_from_device,
+ TRANSFER_B_DISPL_OC_FROM_DEVICE)(int* size, realw* displ, long* Mesh_pointer_f) {
+
+ TRACE("transfer_b_displ_oc_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_b_displ_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40006);
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
+extern "C"
void FC_FUNC_(transfer_veloc_cm_from_device,
TRANSFER_DISPL_CM_FROM_DEVICE)(int* size, realw* veloc, long* Mesh_pointer_f) {
@@ -383,6 +409,20 @@
/* ----------------------------------------------------------------------------------------------- */
+extern "C"
+void FC_FUNC_(transfer_accel_oc_from_device,
+ TRANSFER_ACCEL_OC_FROM_DEVICE)(int* size, realw* accel,long* Mesh_pointer_f) {
+
+ TRACE("transfer_accel_oc_from_device");
+
+ Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+
+ print_CUDA_error_if_any(cudaMemcpy(accel,mp->d_accel_outer_core,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40026);
+
+}
+
+/* ----------------------------------------------------------------------------------------------- */
+
// strain fields
/* ----------------------------------------------------------------------------------------------- */
@@ -563,455 +603,114 @@
/* ----------------------------------------------------------------------------------------------- */
-// attenuation fields
+// KERNEL transfers
/* ----------------------------------------------------------------------------------------------- */
-/*
-// feature not used so far ...
-// crust_mantle
+// crust/mantle
+
extern "C"
-void FC_FUNC_(transfer_b_att_cm_to_device,
- TRANSFER_B_ATT_CM_TO_DEVICE)(long* Mesh_pointer,
- realw* R_xx,
- realw* R_yy,
- realw* R_xy,
- realw* R_xz,
- realw* R_yz) {
- TRACE("transfer_b_att_cm_to_device");
+void FC_FUNC_(transfer_kernels_cm_to_host,
+ TRANSFER_KERNELS_CM_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ realw* h_cijkl_kl,
+ int* NSPEC) {
+ TRACE("transfer_kernels_cm_to_host");
//get mesh pointer out of fortran integer container
Mesh* mp = (Mesh*)(*Mesh_pointer);
- if( ! mp->use_attenuation_mimic){ exit_on_cuda_error("not supported attenuation feature yet");}
+ int size = (*NSPEC)*NGLL3;
- // not used so far...
- // see notes about USE_ATTENUATION_MIMIC
- int size = N_SLS*NGLL3*mp->NSPEC_CRUST_MANTLE;
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl_crust_mantle,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40101);
- cudaMemcpy(mp->d_b_R_xx_crust_mantle,R_xx,size*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_yy_crust_mantle,R_yy,size*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_xy_crust_mantle,R_xy,size*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_xz_crust_mantle,R_xz,size*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_yz_crust_mantle,R_yz,size*sizeof(realw),cudaMemcpyHostToDevice);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_att_cm_to_device");
-#endif
+ if( ! mp->anisotropic_kl){
+ print_CUDA_error_if_any(cudaMemcpy(h_alpha_kl,mp->d_alpha_kl_crust_mantle,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40102);
+ print_CUDA_error_if_any(cudaMemcpy(h_beta_kl,mp->d_beta_kl_crust_mantle,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40103);
+ }else{
+ print_CUDA_error_if_any(cudaMemcpy(h_cijkl_kl,mp->d_cijkl_kl_crust_mantle,
+ 21*size*sizeof(realw),cudaMemcpyDeviceToHost),40102);
+ }
}
-*/
-
-
-
-
-
-
-
-
-//daniel: TODO old code routines...
-
-
-
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_sigma_from_device,
- TRANSFER_SIGMA_FROM_DEVICE)(int* size, realw* sigma_kl,long* Mesh_pointer_f) {
+// inner core
-TRACE("transfer_sigma_from_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(sigma_kl,mp->d_Sigma_kl,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40046);
-
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
extern "C"
-void FC_FUNC_(transfer_b_displ_from_device,
- TRANSFER_B_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
+void FC_FUNC_(transfer_kernels_ic_to_host,
+ TRANSFER_KERNELS_IC_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ realw* h_beta_kl,
+ int* NSPEC) {
+TRACE("transfer_kernels_ic_to_host");
-TRACE("transfer_b_displ_from_device");
+ //get mesh pointer out of fortran integer container
+ Mesh* mp = (Mesh*)(*Mesh_pointer);
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
+ int size = (*NSPEC)*NGLL3;
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40056);
-
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl_inner_core,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40101);
+ print_CUDA_error_if_any(cudaMemcpy(h_alpha_kl,mp->d_alpha_kl_inner_core,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40102);
+ print_CUDA_error_if_any(cudaMemcpy(h_beta_kl,mp->d_beta_kl_inner_core,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),40103);
}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_displ_from_device,
- TRANSFER_DISPL_FROM_DEVICE)(int* size, realw* displ,long* Mesh_pointer_f) {
-TRACE("transfer_displ_from_device");
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(displ,mp->d_displ,sizeof(realw)*(*size),cudaMemcpyDeviceToHost),40066);
-
-}
-*/
/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_compute_kernel_answers_from_device,
- TRANSFER_COMPUTE_KERNEL_ANSWERS_FROM_DEVICE)(long* Mesh_pointer,
- realw* rho_kl,int* size_rho,
- realw* mu_kl, int* size_mu,
- realw* kappa_kl, int* size_kappa) {
-TRACE("transfer_compute_kernel_answers_from_device");
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(realw),cudaMemcpyDeviceToHost);
+// outer core
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
extern "C"
-void FC_FUNC_(transfer_compute_kernel_fields_from_device,
- TRANSFER_COMPUTE_KERNEL_FIELDS_FROM_DEVICE)(long* Mesh_pointer,
- realw* accel, int* size_accel,
- realw* b_displ, int* size_b_displ,
- realw* epsilondev_xx,
- realw* epsilondev_yy,
- realw* epsilondev_xy,
- realw* epsilondev_xz,
- realw* epsilondev_yz,
- int* size_epsilondev,
- realw* b_epsilondev_xx,
- realw* b_epsilondev_yy,
- realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,
- realw* b_epsilondev_yz,
- int* size_b_epsilondev,
- realw* rho_kl,int* size_rho,
- realw* mu_kl, int* size_mu,
- realw* kappa_kl, int* size_kappa,
- realw* epsilon_trace_over_3,
- realw* b_epsilon_trace_over_3,
- int* size_epsilon_trace_over_3) {
-TRACE("transfer_compute_kernel_fields_from_device");
+void FC_FUNC_(transfer_kernels_oc_to_host,
+ TRANSFER_KERNELS_OC_TO_HOST)(long* Mesh_pointer,
+ realw* h_rho_kl,
+ realw* h_alpha_kl,
+ int* NSPEC) {
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- cudaMemcpy(accel,mp->d_accel,*size_accel*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_displ,mp->d_b_displ,*size_b_displ*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xx,mp->d_b_epsilondev_xx,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_yy,mp->d_b_epsilondev_yy,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xy,mp->d_b_epsilondev_xy,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_xz,mp->d_b_epsilondev_xz,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilondev_yz,mp->d_b_epsilondev_yz,*size_b_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(rho_kl,mp->d_rho_kl,*size_rho*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(mu_kl,mp->d_mu_kl,*size_mu*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(kappa_kl,mp->d_kappa_kl,*size_kappa*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilon_trace_over_3,mp->d_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(realw),
- cudaMemcpyDeviceToHost);
- cudaMemcpy(b_epsilon_trace_over_3,mp->d_b_epsilon_trace_over_3,*size_epsilon_trace_over_3*sizeof(realw),
- cudaMemcpyDeviceToHost);
+ TRACE("transfer_kernels_oc_to_host");
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_compute_kernel_fields_from_device");
-#endif
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-// attenuation fields
-
-extern "C"
-void FC_FUNC_(transfer_b_fields_att_to_device,
- TRANSFER_B_FIELDS_ATT_TO_DEVICE)(long* Mesh_pointer,
- realw* b_R_xx,realw* b_R_yy,realw* b_R_xy,realw* b_R_xz,realw* b_R_yz,
- int* size_R,
- realw* b_epsilondev_xx,
- realw* b_epsilondev_yy,
- realw* b_epsilondev_xy,
- realw* b_epsilondev_xz,
- realw* b_epsilondev_yz,
- int* size_epsilondev) {
- TRACE("transfer_b_fields_att_to_device");
//get mesh pointer out of fortran integer container
Mesh* mp = (Mesh*)(*Mesh_pointer);
- cudaMemcpy(mp->d_b_R_xx,b_R_xx,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_yy,b_R_yy,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_xy,b_R_xy,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_xz,b_R_xz,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_R_yz,b_R_yz,*size_R*sizeof(realw),cudaMemcpyHostToDevice);
+ int size = (*NSPEC)*NGLL3;
- cudaMemcpy(mp->d_b_epsilondev_xx,b_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_yy,b_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_xy,b_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_xz,b_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
- cudaMemcpy(mp->d_b_epsilondev_yz,b_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyHostToDevice);
-
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_fields_att_to_device");
-#endif
+ // copies kernel values over to CPU host
+ print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl_outer_core,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),54101);
+ print_CUDA_error_if_any(cudaMemcpy(h_alpha_kl,mp->d_alpha_kl_outer_core,
+ size*sizeof(realw),cudaMemcpyDeviceToHost),54102);
}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-// attenuation fields
-extern "C"
-void FC_FUNC_(transfer_fields_att_from_device,
- TRANSFER_FIELDS_ATT_FROM_DEVICE)(long* Mesh_pointer,
- realw* R_xx,realw* R_yy,realw* R_xy,realw* R_xz,realw* R_yz,
- int* size_R,
- realw* epsilondev_xx,
- realw* epsilondev_yy,
- realw* epsilondev_xy,
- realw* epsilondev_xz,
- realw* epsilondev_yz,
- int* size_epsilondev) {
- TRACE("transfer_fields_att_from_device");
- //get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
-
- cudaMemcpy(R_xx,mp->d_R_xx,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(R_yy,mp->d_R_yy,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(R_xy,mp->d_R_xy,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(R_xz,mp->d_R_xz,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(R_yz,mp->d_R_yz,*size_R*sizeof(realw),cudaMemcpyDeviceToHost);
-
- cudaMemcpy(epsilondev_xx,mp->d_epsilondev_xx,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yy,mp->d_epsilondev_yy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xy,mp->d_epsilondev_xy,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_xz,mp->d_epsilondev_xz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
- cudaMemcpy(epsilondev_yz,mp->d_epsilondev_yz,*size_epsilondev*sizeof(realw),cudaMemcpyDeviceToHost);
-
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_fields_att_from_device");
-#endif
-}
-*/
-
/* ----------------------------------------------------------------------------------------------- */
-extern "C"
-void FC_FUNC_(transfer_kernels_el_to_host,
- TRANSFER_KERNELS_EL_TO_HOST)(long* Mesh_pointer,
- realw* h_rho_kl,
- realw* h_mu_kl,
- realw* h_kappa_kl,
- int* NSPEC_AB) {
-TRACE("transfer_kernels_el_to_host");
- //get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
-
- print_CUDA_error_if_any(cudaMemcpy(h_rho_kl,mp->d_rho_kl,*NSPEC_AB*NGLL3*sizeof(realw),
- cudaMemcpyDeviceToHost),40101);
- print_CUDA_error_if_any(cudaMemcpy(h_mu_kl,mp->d_mu_kl,*NSPEC_AB*NGLL3*sizeof(realw),
- cudaMemcpyDeviceToHost),40102);
- print_CUDA_error_if_any(cudaMemcpy(h_kappa_kl,mp->d_kappa_kl,*NSPEC_AB*NGLL3*sizeof(realw),
- cudaMemcpyDeviceToHost),40103);
-
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
// for NOISE simulations
-/* ----------------------------------------------------------------------------------------------- */
-
extern "C"
void FC_FUNC_(transfer_kernels_noise_to_host,
TRANSFER_KERNELS_NOISE_TO_HOST)(long* Mesh_pointer,
- realw* h_Sigma_kl,
- int* NSPEC_AB) {
-TRACE("transfer_kernels_noise_to_host");
+ realw* h_Sigma_kl,
+ int* NSPEC) {
+ TRACE("transfer_kernels_noise_to_host");
Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
- print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
+ print_CUDA_error_if_any(cudaMemcpy(h_Sigma_kl,mp->d_Sigma_kl,NGLL3*(*NSPEC)*sizeof(realw),
cudaMemcpyDeviceToHost),40201);
-
}
-
/* ----------------------------------------------------------------------------------------------- */
-// for ACOUSTIC simulations
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_fields_ac_to_device,
- TRANSFER_FIELDS_AC_TO_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {
-TRACE("transfer_fields_ac_to_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_acoustic,potential_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),50110);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_acoustic,potential_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),50120);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_potential_dot_dot_acoustic,potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),50130);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_fields_ac_to_device");
-#endif
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_b_fields_ac_to_device,
- TRANSFER_B_FIELDS_AC_TO_DEVICE)(
- int* size,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_ac_to_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_acoustic,b_potential_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),51110);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_acoustic,b_potential_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),51120);
- print_CUDA_error_if_any(cudaMemcpy(mp->d_b_potential_dot_dot_acoustic,b_potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyHostToDevice),51130);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_fields_ac_to_device");
-#endif
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_fields_ac_from_device,TRANSFER_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* potential_acoustic,
- realw* potential_dot_acoustic,
- realw* potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {
-TRACE("transfer_fields_ac_from_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(potential_acoustic,mp->d_potential_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52111);
- print_CUDA_error_if_any(cudaMemcpy(potential_dot_acoustic,mp->d_potential_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52121);
- print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),52131);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_fields_ac_from_device");
-#endif
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_b_fields_ac_from_device,
- TRANSFER_B_FIELDS_AC_FROM_DEVICE)(
- int* size,
- realw* b_potential_acoustic,
- realw* b_potential_dot_acoustic,
- realw* b_potential_dot_dot_acoustic,
- long* Mesh_pointer_f) {
-TRACE("transfer_b_fields_ac_from_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(b_potential_acoustic,mp->d_b_potential_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53111);
- print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_acoustic,mp->d_b_potential_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53121);
- print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),53131);
-
-#ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
- exit_on_cuda_error("after transfer_b_fields_ac_from_device");
-#endif
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_dot_dot_from_device,
- TRNASFER_DOT_DOT_FROM_DEVICE)(int* size, realw* potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-
- TRACE("transfer_dot_dot_from_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(potential_dot_dot_acoustic,mp->d_potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50041);
-
-}
-*/
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_b_dot_dot_from_device,
- TRNASFER_B_DOT_DOT_FROM_DEVICE)(int* size, realw* b_potential_dot_dot_acoustic,long* Mesh_pointer_f) {
-
- TRACE("transfer_b_dot_dot_from_device");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer_f); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(b_potential_dot_dot_acoustic,mp->d_b_potential_dot_dot_acoustic,
- sizeof(realw)*(*size),cudaMemcpyDeviceToHost),50042);
-
-}
-*/
-
-/* ----------------------------------------------------------------------------------------------- */
-
-extern "C"
-void FC_FUNC_(transfer_kernels_ac_to_host,
- TRANSFER_KERNELS_AC_TO_HOST)(long* Mesh_pointer,
- realw* h_rho_ac_kl,
- realw* h_kappa_ac_kl,
- int* NSPEC_AB) {
-
- TRACE("transfer_kernels_ac_to_host");
-
- //get mesh pointer out of fortran integer container
- Mesh* mp = (Mesh*)(*Mesh_pointer);
- int size = *NSPEC_AB*NGLL3;
-
- // copies kernel values over to CPU host
- print_CUDA_error_if_any(cudaMemcpy(h_rho_ac_kl,mp->d_rho_ac_kl,size*sizeof(realw),
- cudaMemcpyDeviceToHost),54101);
- print_CUDA_error_if_any(cudaMemcpy(h_kappa_ac_kl,mp->d_kappa_ac_kl,size*sizeof(realw),
- cudaMemcpyDeviceToHost),54102);
-}
-
-/* ----------------------------------------------------------------------------------------------- */
-
// for Hess kernel calculations
-/* ----------------------------------------------------------------------------------------------- */
-
extern "C"
void FC_FUNC_(transfer_kernels_hess_cm_tohost,
TRANSFER_KERNELS_HESS_CM_TOHOST)(long* Mesh_pointer,
@@ -1025,19 +724,3 @@
cudaMemcpyDeviceToHost),70201);
}
-/* ----------------------------------------------------------------------------------------------- */
-/*
-extern "C"
-void FC_FUNC_(transfer_kernels_hess_ac_tohost,
- TRANSFER_KERNELS_HESS_AC_TOHOST)(long* Mesh_pointer,
- realw* h_hess_ac_kl,
- int* NSPEC_AB) {
- TRACE("transfer_kernels_hess_ac_tohost");
-
- Mesh* mp = (Mesh*)(*Mesh_pointer); //get mesh pointer out of fortran integer container
-
- print_CUDA_error_if_any(cudaMemcpy(h_hess_ac_kl,mp->d_hess_ac_kl,NGLL3*(*NSPEC_AB)*sizeof(realw),
- cudaMemcpyDeviceToHost),70202);
-}
-*/
-
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_boundary_kernel.f90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -25,9 +25,327 @@
!
!=====================================================================
-subroutine compute_boundary_kernel(displ,accel,b_displ,nspec,iregion_code, &
+
+ subroutine compute_boundary_kernels()
+
+! kernel calculations
+
+ use specfem_par
+ use specfem_par_crustmantle
+ use specfem_par_innercore
+ use specfem_par_outercore
+ use specfem_par_movie
+ implicit none
+
+ ! local parameters
+ ! dummy array that does not need to be actually read
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
+ logical,dimension(:),allocatable:: dummy_ispec_is_tiso
+ integer:: ispec,i,j,k,l,iglob
+ real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl
+ real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l
+
+ ! transfers wavefields onto CPU
+ if( GPU_MODE ) then
+ ! crust/mantle
+ call transfer_accel_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
+ call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+ call transfer_b_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle,Mesh_pointer)
+ ! inner core
+ call transfer_accel_ic_from_device(NDIM*NGLOB_INNER_CORE,accel_inner_core,Mesh_pointer)
+ call transfer_displ_ic_from_device(NDIM*NGLOB_INNER_CORE,displ_inner_core,Mesh_pointer)
+ call transfer_b_displ_ic_from_device(NDIM*NGLOB_INNER_CORE,b_displ_inner_core,Mesh_pointer)
+ ! outer core
+ call transfer_accel_oc_from_device(NGLOB_OUTER_CORE,accel_outer_core,Mesh_pointer)
+ call transfer_displ_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core,Mesh_pointer)
+ call transfer_b_displ_oc_from_device(NGLOB_OUTER_CORE,b_displ_outer_core,Mesh_pointer)
+
+ ! pre-calculates gradients on CPU
+ do ispec = 1, NSPEC_OUTER_CORE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_outer_core(i,j,k,ispec)
+
+ xixl = xix_outer_core(i,j,k,ispec)
+ xiyl = xiy_outer_core(i,j,k,ispec)
+ xizl = xiz_outer_core(i,j,k,ispec)
+ etaxl = etax_outer_core(i,j,k,ispec)
+ etayl = etay_outer_core(i,j,k,ispec)
+ etazl = etaz_outer_core(i,j,k,ispec)
+ gammaxl = gammax_outer_core(i,j,k,ispec)
+ gammayl = gammay_outer_core(i,j,k,ispec)
+ gammazl = gammaz_outer_core(i,j,k,ispec)
+
+ ! calculates gradient grad(b_displ)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ b_vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ b_vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ b_vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ ! calculates gradient grad(accel)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ vector_accel_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ vector_accel_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ vector_accel_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ ! calculates gradient grad(displ)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ enddo
+ enddo
+ enddo
+ enddo
+ endif
+
+ ! updates kernels on CPU
+ fluid_solid_boundary = .false.
+ iregion_code = IREGION_CRUST_MANTLE
+
+ ! Moho
+ if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
+
+ moho_kl = moho_kl + (moho_kl_top - moho_kl_bot) * deltat
+ endif
+
+ ! 400
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
+
+ d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
+
+ ! 670
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
+
+ d670_kl = d670_kl + (d670_kl_top - d670_kl_bot) * deltat
+
+ ! CMB
+ fluid_solid_boundary = .true.
+ iregion_code = IREGION_CRUST_MANTLE
+
+ call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
+ b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
+ ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
+ kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
+ c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
+ c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
+ c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
+ c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
+ cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
+
+ iregion_code = IREGION_OUTER_CORE
+
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
+ dummy_ispec_is_tiso(:) = .false.
+
+ call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ b_vector_displ_outer_core,nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_outer_core,normal_top_outer_core, &
+ cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
+
+ cmb_kl = cmb_kl + (cmb_kl_top - cmb_kl_bot) * deltat
+
+ ! ICB
+ fluid_solid_boundary = .true.
+ call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
+ b_vector_displ_outer_core,nspec_outer_core, &
+ iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
+ xix_outer_core,xiy_outer_core,xiz_outer_core, &
+ etax_outer_core,etay_outer_core,etaz_outer_core,&
+ gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_outer_core,kappavstore_outer_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
+ icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
+
+ deallocate(dummy_ispec_is_tiso)
+
+ ! ICB
+ iregion_code = IREGION_INNER_CORE
+
+ ! dummy allocation
+ allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
+ dummy_ispec_is_tiso(:) = .false.
+
+ call compute_boundary_kernel(displ_inner_core,accel_inner_core, &
+ b_displ_inner_core,nspec_inner_core,iregion_code, &
+ ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core,&
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
+ rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
+ dummy_array,dummy_array,dummy_array, &
+ c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array,dummy_array, &
+ c33store_inner_core,dummy_array,dummy_array, &
+ dummy_array,c44store_inner_core,dummy_array,dummy_array, &
+ dummy_array,dummy_array,dummy_array, &
+ k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
+ icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
+ deallocate(dummy_ispec_is_tiso)
+
+ icb_kl = icb_kl + (icb_kl_top - icb_kl_bot) * deltat
+
+ end subroutine compute_boundary_kernels
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine compute_boundary_kernel(displ,accel,b_displ,nspec,iregion_code, &
ystore,zstore,ibool,ispec_is_tiso, &
- !--- idoubling, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_yy,hprime_zz, &
rhostore,kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -37,488 +37,169 @@
use specfem_par_movie
implicit none
- ! local parameters
- ! dummy array that does not need to be actually read
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,1) :: dummy_array
- logical,dimension(:),allocatable:: dummy_ispec_is_tiso
-
! crust mantle
- call compute_kernels_crust_mantle(ibool_crust_mantle, &
- rho_kl_crust_mantle,beta_kl_crust_mantle, &
- alpha_kl_crust_mantle,cijkl_kl_crust_mantle, &
- accel_crust_mantle,b_displ_crust_mantle, &
- epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
- epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
- b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
- b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
- eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle, &
- deltat)
+ call compute_kernels_crust_mantle()
! outer core
- call compute_kernels_outer_core(ibool_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- hprime_xx,hprime_yy,hprime_zz, &
- displ_outer_core,accel_outer_core, &
- b_displ_outer_core,b_accel_outer_core, &
- vector_accel_outer_core,vector_displ_outer_core, &
- b_vector_displ_outer_core, &
- div_displ_outer_core,b_div_displ_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- rho_kl_outer_core,alpha_kl_outer_core, &
- deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
- deltat)
+ call compute_kernels_outer_core()
! inner core
- call compute_kernels_inner_core(ibool_inner_core, &
- rho_kl_inner_core,beta_kl_inner_core, &
- alpha_kl_inner_core, &
- accel_inner_core,b_displ_inner_core, &
- epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
- epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
- b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
- b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
- eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core, &
- deltat)
+ call compute_kernels_inner_core()
-!<YANGL
! NOISE TOMOGRAPHY --- source strength kernel
- if (NOISE_TOMOGRAPHY == 3) &
- call compute_kernels_strength_noise(nmovie_points,ibool_crust_mantle, &
- Sigma_kl_crust_mantle,displ_crust_mantle,deltat,it, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- NSPEC2D_TOP(IREGION_CRUST_MANTLE),noise_surface_movie, &
- ibelm_top_crust_mantle)
-!>YANGL
+ if( NOISE_TOMOGRAPHY == 3 ) call compute_kernels_strength_noise()
- ! --- boundary kernels ------
- if (SAVE_BOUNDARY_MESH) then
+ ! boundary kernels
+ if( SAVE_BOUNDARY_MESH ) call compute_boundary_kernels()
- ! transfers wavefields onto CPU
- if( GPU_MODE ) then
- ! crust/mantle
- call transfer_accel_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
- call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
- call transfer_b_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_crust_mantle,Mesh_pointer)
- ! inner core
- call transfer_accel_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_inner_core,Mesh_pointer)
- call transfer_displ_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_inner_core,Mesh_pointer)
- call transfer_b_displ_ic_from_device(NDIM*NGLOB_CRUST_MANTLE,b_displ_inner_core,Mesh_pointer)
- endif
-
- ! updates kernels on CPU
- fluid_solid_boundary = .false.
- iregion_code = IREGION_CRUST_MANTLE
-
- ! Moho
- if (.not. SUPPRESS_CRUSTAL_MESH .and. HONOR_1D_SPHERICAL_MOHO) then
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_moho_top,normal_moho,moho_kl_top,fluid_solid_boundary,NSPEC2D_MOHO)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_moho_bot,normal_moho,moho_kl_bot,fluid_solid_boundary,NSPEC2D_MOHO)
-
- moho_kl = moho_kl + (moho_kl_top - moho_kl_bot) * deltat
- endif
-
- ! 400
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_400_top,normal_400,d400_kl_top,fluid_solid_boundary,NSPEC2D_400)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_400_bot,normal_400,d400_kl_bot,fluid_solid_boundary,NSPEC2D_400)
-
- d400_kl = d400_kl + (d400_kl_top - d400_kl_bot) * deltat
-
- ! 670
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_670_top,normal_670,d670_kl_top,fluid_solid_boundary,NSPEC2D_670)
-
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_bot,ibelm_670_bot,normal_670,d670_kl_bot,fluid_solid_boundary,NSPEC2D_670)
-
- d670_kl = d670_kl + (d670_kl_top - d670_kl_bot) * deltat
-
- ! CMB
- fluid_solid_boundary = .true.
- iregion_code = IREGION_CRUST_MANTLE
- call compute_boundary_kernel(displ_crust_mantle,accel_crust_mantle, &
- b_displ_crust_mantle,nspec_crust_mantle,iregion_code, &
- ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! -- idoubling_crust_mantle, &
- xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
- etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle,&
- gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_crust_mantle,kappavstore_crust_mantle, muvstore_crust_mantle, &
- kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle, &
- c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle,c14store_crust_mantle, &
- c15store_crust_mantle,c16store_crust_mantle,c22store_crust_mantle, &
- c23store_crust_mantle,c24store_crust_mantle,c25store_crust_mantle,c26store_crust_mantle, &
- c33store_crust_mantle,c34store_crust_mantle,c35store_crust_mantle, &
- c36store_crust_mantle,c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
- c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
- k_top,ibelm_bottom_crust_mantle,normal_top_outer_core, &
- cmb_kl_top,fluid_solid_boundary,NSPEC2D_CMB)
-
- iregion_code = IREGION_OUTER_CORE
- ! dummy allocation
- allocate(dummy_ispec_is_tiso(NSPEC_OUTER_CORE))
- dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
- ! --idoubling_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_outer_core,normal_top_outer_core, &
- cmb_kl_bot,fluid_solid_boundary,NSPEC2D_CMB)
-
- cmb_kl = cmb_kl + (cmb_kl_top - cmb_kl_bot) * deltat
-
- ! ICB
- fluid_solid_boundary = .true.
- call compute_boundary_kernel(vector_displ_outer_core,vector_accel_outer_core, &
- b_vector_displ_outer_core,nspec_outer_core, &
- iregion_code,ystore_outer_core,zstore_outer_core,ibool_outer_core,dummy_ispec_is_tiso, &
- ! --idoubling_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core,&
- gammax_outer_core,gammay_outer_core,gammaz_outer_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_outer_core,kappavstore_outer_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_top,ibelm_bottom_outer_core,normal_bottom_outer_core, &
- icb_kl_top,fluid_solid_boundary,NSPEC2D_ICB)
-
- deallocate(dummy_ispec_is_tiso)
-
- ! ICB
- iregion_code = IREGION_INNER_CORE
- ! dummy allocation
- allocate(dummy_ispec_is_tiso(NSPEC_INNER_CORE))
- dummy_ispec_is_tiso(:) = .false.
- call compute_boundary_kernel(displ_inner_core,accel_inner_core, &
- b_displ_inner_core,nspec_inner_core,iregion_code, &
- ystore_inner_core,zstore_inner_core,ibool_inner_core,dummy_ispec_is_tiso, &
- ! -- idoubling_inner_core, &
- xix_inner_core,xiy_inner_core,xiz_inner_core, &
- etax_inner_core,etay_inner_core,etaz_inner_core,&
- gammax_inner_core,gammay_inner_core,gammaz_inner_core,hprime_xx,hprime_yy,hprime_zz, &
- rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core, &
- dummy_array,dummy_array,dummy_array, &
- c11store_inner_core,c12store_inner_core,c13store_inner_core,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array,dummy_array, &
- c33store_inner_core,dummy_array,dummy_array, &
- dummy_array,c44store_inner_core,dummy_array,dummy_array, &
- dummy_array,dummy_array,dummy_array, &
- k_bot,ibelm_top_inner_core,normal_bottom_outer_core, &
- icb_kl_bot,fluid_solid_boundary,NSPEC2D_ICB)
- deallocate(dummy_ispec_is_tiso)
- icb_kl = icb_kl + (icb_kl_top - icb_kl_bot) * deltat
- endif
-
! approximate hessian
- if( APPROXIMATE_HESS_KL ) then
- call compute_kernels_hessian(ibool_crust_mantle, &
- hess_kl_crust_mantle,&
- accel_crust_mantle,b_accel_crust_mantle, &
- deltat)
- endif
+ if( APPROXIMATE_HESS_KL ) call compute_kernels_hessian()
-
end subroutine compute_kernels
!
!-------------------------------------------------------------------------------------------------
!
- subroutine compute_kernels_crust_mantle(ibool_crust_mantle, &
- rho_kl_crust_mantle,beta_kl_crust_mantle, &
- alpha_kl_crust_mantle,cijkl_kl_crust_mantle, &
- accel_crust_mantle,b_displ_crust_mantle, &
- epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
- epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
- b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
- b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
- eps_trace_over_3_crust_mantle,b_eps_trace_over_3_crust_mantle, &
- deltat)
+ subroutine compute_kernels_crust_mantle()
+ use constants
+ use specfem_par,only: deltat
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+ use specfem_par_crustmantle
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- rho_kl_crust_mantle, beta_kl_crust_mantle, alpha_kl_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- cijkl_kl_crust_mantle
-
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_displ_crust_mantle
-
-! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
-! epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
- epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
- epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
-
-! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
-! b_epsilondev_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
- b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
- eps_trace_over_3_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- b_eps_trace_over_3_crust_mantle
-
- real(kind=CUSTOM_REAL) deltat
-
! local parameters
- real(kind=CUSTOM_REAL),dimension(21) :: prod !, cijkl_kl_local
+ real(kind=CUSTOM_REAL),dimension(21) :: prod
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
integer :: i,j,k,ispec,iglob
- ! crust_mantle
- do ispec = 1, NSPEC_CRUST_MANTLE
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool_crust_mantle(i,j,k,ispec)
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ ! crust_mantle
+ do ispec = 1, NSPEC_CRUST_MANTLE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec)
- ! density kernel: see e.g. Tromp et al.(2005), equation (14)
- ! b_displ_crust_mantle is the backward/reconstructed wavefield, that is s(x,t) in eq. (14),
- ! accel_crust_mantle is the adjoint wavefield, that corresponds to s_dagger(x,T-t)
- !
- ! note with respect to eq. (14) the second time derivative is applied to the
- ! adjoint wavefield here rather than the backward/reconstructed wavefield.
- ! this is a valid operation and the resultant kernel identical to the eq. (14).
- !
- ! reason for this is that the adjoint wavefield is in general smoother
- ! since the adjoint sources normally are obtained for filtered traces.
- ! numerically, the time derivative by a finite-difference scheme should
- ! behave better for smoother wavefields, thus containing less numerical artefacts.
- rho_kl_crust_mantle(i,j,k,ispec) = rho_kl_crust_mantle(i,j,k,ispec) &
- + deltat * (accel_crust_mantle(1,iglob) * b_displ_crust_mantle(1,iglob) &
- + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
- + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
+ ! density kernel: see e.g. Tromp et al.(2005), equation (14)
+ ! b_displ_crust_mantle is the backward/reconstructed wavefield, that is s(x,t) in eq. (14),
+ ! accel_crust_mantle is the adjoint wavefield, that corresponds to s_dagger(x,T-t)
+ !
+ ! note with respect to eq. (14) the second time derivative is applied to the
+ ! adjoint wavefield here rather than the backward/reconstructed wavefield.
+ ! this is a valid operation and the resultant kernel identical to the eq. (14).
+ !
+ ! reason for this is that the adjoint wavefield is in general smoother
+ ! since the adjoint sources normally are obtained for filtered traces.
+ ! numerically, the time derivative by a finite-difference scheme should
+ ! behave better for smoother wavefields, thus containing less numerical artefacts.
+ rho_kl_crust_mantle(i,j,k,ispec) = rho_kl_crust_mantle(i,j,k,ispec) &
+ + deltat * (accel_crust_mantle(1,iglob) * b_displ_crust_mantle(1,iglob) &
+ + accel_crust_mantle(2,iglob) * b_displ_crust_mantle(2,iglob) &
+ + accel_crust_mantle(3,iglob) * b_displ_crust_mantle(3,iglob) )
- epsilondev_loc(1) = epsilondev_xx_crust_mantle(i,j,k,ispec)
- epsilondev_loc(2) = epsilondev_yy_crust_mantle(i,j,k,ispec)
- epsilondev_loc(3) = epsilondev_xy_crust_mantle(i,j,k,ispec)
- epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
- epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
+ ! isotropic kernels
+ if (.not. ANISOTROPIC_KL) then
- b_epsilondev_loc(1) = b_epsilondev_xx_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(2) = b_epsilondev_yy_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(3) = b_epsilondev_xy_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(4) = b_epsilondev_xz_crust_mantle(i,j,k,ispec)
- b_epsilondev_loc(5) = b_epsilondev_yz_crust_mantle(i,j,k,ispec)
+ ! temporary arrays
+ epsilondev_loc(1) = epsilondev_xx_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
- ! For anisotropic kernels
- if (ANISOTROPIC_KL) then
+ b_epsilondev_loc(1) = b_epsilondev_xx_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz_crust_mantle(i,j,k,ispec)
- call compute_strain_product(prod,eps_trace_over_3_crust_mantle(i,j,k,ispec),epsilondev_loc, &
- b_eps_trace_over_3_crust_mantle(i,j,k,ispec),b_epsilondev_loc)
- cijkl_kl_crust_mantle(:,i,j,k,ispec) = cijkl_kl_crust_mantle(:,i,j,k,ispec) + deltat * prod(:)
+ ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
+ ! note: multiplication with 2*mu(x) will be done after the time loop
+ beta_kl_crust_mantle(i,j,k,ispec) = beta_kl_crust_mantle(i,j,k,ispec) &
+ + deltat * &
+ (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+ + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
+ epsilondev_loc(5)*b_epsilondev_loc(5)) )
- else
- ! kernel for shear modulus, see e.g. Tromp et al. (2005), equation (17)
- ! note: multiplication with 2*mu(x) will be done after the time loop
- beta_kl_crust_mantle(i,j,k,ispec) = beta_kl_crust_mantle(i,j,k,ispec) &
- + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
- + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
- + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
- epsilondev_loc(5)*b_epsilondev_loc(5)) )
+ ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
+ ! note: multiplication with kappa(x) will be done after the time loop
+ alpha_kl_crust_mantle(i,j,k,ispec) = alpha_kl_crust_mantle(i,j,k,ispec) &
+ + deltat * (9 * eps_trace_over_3_crust_mantle(i,j,k,ispec) &
+ * b_eps_trace_over_3_crust_mantle(i,j,k,ispec))
+ endif
- ! kernel for bulk modulus, see e.g. Tromp et al. (2005), equation (18)
- ! note: multiplication with kappa(x) will be done after the time loop
- alpha_kl_crust_mantle(i,j,k,ispec) = alpha_kl_crust_mantle(i,j,k,ispec) &
- + deltat * (9 * eps_trace_over_3_crust_mantle(i,j,k,ispec) &
- * b_eps_trace_over_3_crust_mantle(i,j,k,ispec))
-
- endif
-
+ enddo
enddo
enddo
enddo
- enddo
+ ! For anisotropic kernels
+ if (ANISOTROPIC_KL) then
- end subroutine compute_kernels_crust_mantle
+ ! computes fully anisotropic kernel cijkl_kl
+ do ispec = 1, NSPEC_CRUST_MANTLE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_crust_mantle(i,j,k,ispec)
+ ! temporary arrays
+ epsilondev_loc(1) = epsilondev_xx_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz_crust_mantle(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz_crust_mantle(i,j,k,ispec)
-!
-!-------------------------------------------------------------------------------------------------
-!
+ b_epsilondev_loc(1) = b_epsilondev_xx_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz_crust_mantle(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz_crust_mantle(i,j,k,ispec)
+ call compute_strain_product(prod,eps_trace_over_3_crust_mantle(i,j,k,ispec),epsilondev_loc, &
+ b_eps_trace_over_3_crust_mantle(i,j,k,ispec),b_epsilondev_loc)
- subroutine compute_kernels_outer_core(ibool_outer_core, &
- xix_outer_core,xiy_outer_core,xiz_outer_core, &
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core, &
- hprime_xx,hprime_yy,hprime_zz, &
- displ_outer_core,accel_outer_core, &
- b_displ_outer_core,b_accel_outer_core, &
- vector_accel_outer_core,vector_displ_outer_core, &
- b_vector_displ_outer_core, &
- div_displ_outer_core,b_div_displ_outer_core, &
- rhostore_outer_core,kappavstore_outer_core, &
- rho_kl_outer_core,alpha_kl_outer_core, &
- deviatoric_outercore,nspec_beta_kl_outer_core,beta_kl_outer_core, &
- deltat)
+ cijkl_kl_crust_mantle(:,i,j,k,ispec) = cijkl_kl_crust_mantle(:,i,j,k,ispec) + deltat * prod(:)
- implicit none
+ enddo
+ enddo
+ enddo
+ enddo
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+ endif
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
+ else
+ ! updates kernel contribution on GPU
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- xix_outer_core,xiy_outer_core,xiz_outer_core,&
- etax_outer_core,etay_outer_core,etaz_outer_core, &
- gammax_outer_core,gammay_outer_core,gammaz_outer_core
+ ! computes contribution to density and isotropic/anisotropic kernels
+ call compute_kernels_cm_cuda(Mesh_pointer,deltat)
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz
+ endif
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
- displ_outer_core,accel_outer_core
+ end subroutine compute_kernels_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
- b_displ_outer_core,b_accel_outer_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_OUTER_CORE) :: vector_accel_outer_core,&
- vector_displ_outer_core, b_vector_displ_outer_core
+!
+!-------------------------------------------------------------------------------------------------
+!
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displ_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: b_div_displ_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
- rhostore_outer_core,kappavstore_outer_core
+ subroutine compute_kernels_outer_core()
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
- rho_kl_outer_core,alpha_kl_outer_core
+ use constants
+ use specfem_par,only: deltat,hprime_xx,hprime_yy,hprime_zz,myrank
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+ use specfem_par_outercore
- integer nspec_beta_kl_outer_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_beta_kl_outer_core) :: &
- beta_kl_outer_core
- logical deviatoric_outercore
+ implicit none
- real(kind=CUSTOM_REAL) deltat
-
! local parameters
real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,kappal
real(kind=CUSTOM_REAL) :: tempx1l,tempx2l,tempx3l
@@ -530,235 +211,228 @@
integer :: i,j,k,l,ispec,iglob
! outer_core -- compute the actual displacement and acceleration (NDIM,NGLOBMAX_OUTER_CORE)
- do ispec = 1, NSPEC_OUTER_CORE
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool_outer_core(i,j,k,ispec)
- xixl = xix_outer_core(i,j,k,ispec)
- xiyl = xiy_outer_core(i,j,k,ispec)
- xizl = xiz_outer_core(i,j,k,ispec)
- etaxl = etax_outer_core(i,j,k,ispec)
- etayl = etay_outer_core(i,j,k,ispec)
- etazl = etaz_outer_core(i,j,k,ispec)
- gammaxl = gammax_outer_core(i,j,k,ispec)
- gammayl = gammay_outer_core(i,j,k,ispec)
- gammazl = gammaz_outer_core(i,j,k,ispec)
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ do ispec = 1, NSPEC_OUTER_CORE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_outer_core(i,j,k,ispec)
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
+ xixl = xix_outer_core(i,j,k,ispec)
+ xiyl = xiy_outer_core(i,j,k,ispec)
+ xizl = xiz_outer_core(i,j,k,ispec)
+ etaxl = etax_outer_core(i,j,k,ispec)
+ etayl = etay_outer_core(i,j,k,ispec)
+ etazl = etaz_outer_core(i,j,k,ispec)
+ gammaxl = gammax_outer_core(i,j,k,ispec)
+ gammayl = gammay_outer_core(i,j,k,ispec)
+ gammazl = gammaz_outer_core(i,j,k,ispec)
-
- do l=1,NGLLX
- tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
-
- do l=1,NGLLY
- tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
-
- do l=1,NGLLZ
- tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
-
- b_vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- b_vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- b_vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
-
-
- !deviatoric kernel check
- if( deviatoric_outercore ) then
-
+ ! calculates gradient grad(b_displ)
tempx1l = 0._CUSTOM_REAL
tempx2l = 0._CUSTOM_REAL
tempx3l = 0._CUSTOM_REAL
-
- tempy1l = 0._CUSTOM_REAL
- tempy2l = 0._CUSTOM_REAL
- tempy3l = 0._CUSTOM_REAL
-
- tempz1l = 0._CUSTOM_REAL
- tempz2l = 0._CUSTOM_REAL
- tempz3l = 0._CUSTOM_REAL
-
- ! assumes NGLLX = NGLLY = NGLLZ
do l=1,NGLLX
- tempx1l = tempx1l + b_vector_displ_outer_core(1,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- tempy1l = tempy1l + b_vector_displ_outer_core(2,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- tempz1l = tempz1l + b_vector_displ_outer_core(3,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempx1l = tempx1l + b_displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + b_displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + b_displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ b_vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ b_vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ b_vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
- tempx2l = tempx2l + b_vector_displ_outer_core(1,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- tempy2l = tempy2l + b_vector_displ_outer_core(2,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- tempz2l = tempz2l + b_vector_displ_outer_core(3,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
-
- tempx3l = tempx3l + b_vector_displ_outer_core(1,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- tempy3l = tempy3l + b_vector_displ_outer_core(2,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- tempz3l = tempz3l + b_vector_displ_outer_core(3,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ ! calculates gradient grad(accel)
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ vector_accel_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ vector_accel_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ vector_accel_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ ! density kernel
+ rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
+ + deltat * dot_product(vector_accel_outer_core(:,iglob), b_vector_displ_outer_core(:,iglob))
- !deviatoric strain
- b_epsilondev_loc(1) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ ! bulk modulus kernel
+ kappal = rhostore_outer_core(i,j,k,ispec)/kappavstore_outer_core(i,j,k,ispec)
- b_epsilondev_loc(2) = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ div_displ_outer_core(i,j,k,ispec) = kappal * accel_outer_core(iglob)
+ b_div_displ_outer_core(i,j,k,ispec) = kappal * b_accel_outer_core(iglob)
- b_epsilondev_loc(3) = 0.5*( xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l &
- + xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ alpha_kl_outer_core(i,j,k,ispec) = alpha_kl_outer_core(i,j,k,ispec) &
+ + deltat * div_displ_outer_core(i,j,k,ispec) * b_div_displ_outer_core(i,j,k,ispec)
- b_epsilondev_loc(4) = 0.5*( xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l &
- + xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ ! calculates gradient grad(displ) (also needed for boundary kernels)
+ if(SAVE_BOUNDARY_MESH .or. deviatoric_outercore) then
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ do l=1,NGLLX
+ tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ enddo
+ do l=1,NGLLY
+ tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ enddo
+ do l=1,NGLLZ
+ tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
+ vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ endif
- b_epsilondev_loc(5) = 0.5*( xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l &
- + xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ !deviatoric kernel check
+ if( deviatoric_outercore ) then
- endif !deviatoric kernel check
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ tempy1l = 0._CUSTOM_REAL
+ tempy2l = 0._CUSTOM_REAL
+ tempy3l = 0._CUSTOM_REAL
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
+ tempz1l = 0._CUSTOM_REAL
+ tempz2l = 0._CUSTOM_REAL
+ tempz3l = 0._CUSTOM_REAL
- do l=1,NGLLX
- tempx1l = tempx1l + accel_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
+ ! assumes NGLLX = NGLLY = NGLLZ
+ do l=1,NGLLX
+ tempx1l = tempx1l + b_vector_displ_outer_core(1,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempy1l = tempy1l + b_vector_displ_outer_core(2,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempz1l = tempz1l + b_vector_displ_outer_core(3,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- do l=1,NGLLY
- tempx2l = tempx2l + accel_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
+ tempx2l = tempx2l + b_vector_displ_outer_core(1,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ tempy2l = tempy2l + b_vector_displ_outer_core(2,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ tempz2l = tempz2l + b_vector_displ_outer_core(3,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- do l=1,NGLLZ
- tempx3l = tempx3l + accel_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
+ tempx3l = tempx3l + b_vector_displ_outer_core(1,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ tempy3l = tempy3l + b_vector_displ_outer_core(2,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ tempz3l = tempz3l + b_vector_displ_outer_core(3,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
- vector_accel_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_accel_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_accel_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ !deviatoric strain
+ b_epsilondev_loc(1) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
+ b_epsilondev_loc(2) = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- do l=1,NGLLX
- tempx1l = tempx1l + displ_outer_core(ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- enddo
+ b_epsilondev_loc(3) = 0.5*( xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l &
+ + xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- do l=1,NGLLY
- tempx2l = tempx2l + displ_outer_core(ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- enddo
+ b_epsilondev_loc(4) = 0.5*( xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l &
+ + xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- do l=1,NGLLZ
- tempx3l = tempx3l + displ_outer_core(ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
+ b_epsilondev_loc(5) = 0.5*( xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l &
+ + xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- vector_displ_outer_core(1,iglob) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- vector_displ_outer_core(2,iglob) = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- vector_displ_outer_core(3,iglob) = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ tempx1l = 0._CUSTOM_REAL
+ tempx2l = 0._CUSTOM_REAL
+ tempx3l = 0._CUSTOM_REAL
+ tempy1l = 0._CUSTOM_REAL
+ tempy2l = 0._CUSTOM_REAL
+ tempy3l = 0._CUSTOM_REAL
- !deviatoric kernel check
- if( deviatoric_outercore ) then
+ tempz1l = 0._CUSTOM_REAL
+ tempz2l = 0._CUSTOM_REAL
+ tempz3l = 0._CUSTOM_REAL
- tempx1l = 0._CUSTOM_REAL
- tempx2l = 0._CUSTOM_REAL
- tempx3l = 0._CUSTOM_REAL
+ ! assumes NGLLX = NGLLY = NGLLZ
+ do l=1,NGLLX
+ tempx1l = tempx1l + vector_displ_outer_core(1,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempy1l = tempy1l + vector_displ_outer_core(2,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ tempz1l = tempz1l + vector_displ_outer_core(3,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- tempy1l = 0._CUSTOM_REAL
- tempy2l = 0._CUSTOM_REAL
- tempy3l = 0._CUSTOM_REAL
+ tempx2l = tempx2l + vector_displ_outer_core(1,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ tempy2l = tempy2l + vector_displ_outer_core(2,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ tempz2l = tempz2l + vector_displ_outer_core(3,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- tempz1l = 0._CUSTOM_REAL
- tempz2l = 0._CUSTOM_REAL
- tempz3l = 0._CUSTOM_REAL
+ tempx3l = tempx3l + vector_displ_outer_core(1,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ tempy3l = tempy3l + vector_displ_outer_core(2,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ tempz3l = tempz3l + vector_displ_outer_core(3,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
+ enddo
- ! assumes NGLLX = NGLLY = NGLLZ
- do l=1,NGLLX
- tempx1l = tempx1l + vector_displ_outer_core(1,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- tempy1l = tempy1l + vector_displ_outer_core(2,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
- tempz1l = tempz1l + vector_displ_outer_core(3,ibool_outer_core(l,j,k,ispec)) * hprime_xx(i,l)
+ !deviatoric strain
+ epsilondev_loc(1) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- tempx2l = tempx2l + vector_displ_outer_core(1,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- tempy2l = tempy2l + vector_displ_outer_core(2,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
- tempz2l = tempz2l + vector_displ_outer_core(3,ibool_outer_core(i,l,k,ispec)) * hprime_yy(j,l)
+ epsilondev_loc(2) = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- tempx3l = tempx3l + vector_displ_outer_core(1,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- tempy3l = tempy3l + vector_displ_outer_core(2,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- tempz3l = tempz3l + vector_displ_outer_core(3,ibool_outer_core(i,j,l,ispec)) * hprime_zz(k,l)
- enddo
+ epsilondev_loc(3) = 0.5*( xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l &
+ + xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ epsilondev_loc(4) = 0.5*( xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l &
+ + xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- !deviatoric strain
- epsilondev_loc(1) = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ epsilondev_loc(5) = 0.5*( xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l &
+ + xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l ) &
+ - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
+ + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
+ + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
- epsilondev_loc(2) = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ beta_kl_outer_core(i,j,k,ispec) = beta_kl_outer_core(i,j,k,ispec) &
+ + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+ + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
+ epsilondev_loc(5)*b_epsilondev_loc(5)) )
- epsilondev_loc(3) = 0.5*( xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l &
- + xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
+ endif !deviatoric kernel check
- epsilondev_loc(4) = 0.5*( xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l &
- + xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
-
- epsilondev_loc(5) = 0.5*( xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l &
- + xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l ) &
- - ONE_THIRD* (xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l &
- + xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l &
- + xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l )
-
- beta_kl_outer_core(i,j,k,ispec) = beta_kl_outer_core(i,j,k,ispec) &
- + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
- + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
- + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) + &
- epsilondev_loc(5)*b_epsilondev_loc(5)) )
-
- endif !deviatoric kernel check
-
-
-
- rho_kl_outer_core(i,j,k,ispec) = rho_kl_outer_core(i,j,k,ispec) &
- + deltat * dot_product(vector_accel_outer_core(:,iglob), b_vector_displ_outer_core(:,iglob))
-
- kappal = rhostore_outer_core(i,j,k,ispec)/kappavstore_outer_core(i,j,k,ispec)
-
- div_displ_outer_core(i,j,k,ispec) = kappal * accel_outer_core(iglob)
- b_div_displ_outer_core(i,j,k,ispec) = kappal * b_accel_outer_core(iglob)
-
- alpha_kl_outer_core(i,j,k,ispec) = alpha_kl_outer_core(i,j,k,ispec) &
- + deltat * div_displ_outer_core(i,j,k,ispec) * b_div_displ_outer_core(i,j,k,ispec)
-
-
+ enddo
enddo
enddo
enddo
- enddo
+ else
+ ! updates kernel contribution on GPU
+ if( deviatoric_outercore ) call exit_mpi(myrank,'deviatoric kernel on GPU not supported yet')
+
+ ! computes contribution to density and bulk modulus kernel
+ call compute_kernels_oc_cuda(Mesh_pointer,deltat)
+
+ endif
+
end subroutine compute_kernels_outer_core
@@ -767,110 +441,82 @@
!
- subroutine compute_kernels_inner_core(ibool_inner_core, &
- rho_kl_inner_core,beta_kl_inner_core, &
- alpha_kl_inner_core, &
- accel_inner_core,b_displ_inner_core, &
- epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
- epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
- b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
- b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
- eps_trace_over_3_inner_core,b_eps_trace_over_3_inner_core, &
- deltat)
+ subroutine compute_kernels_inner_core()
+ use constants
+ use specfem_par,only: deltat
+ use specfem_par,only: GPU_MODE,Mesh_pointer
+ use specfem_par_innercore
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
-
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- rho_kl_inner_core, beta_kl_inner_core, alpha_kl_inner_core
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
- accel_inner_core
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
- b_displ_inner_core
-
-! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
-! epsilondev_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
- epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
- epsilondev_xz_inner_core,epsilondev_yz_inner_core
-
-
-! real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
-! b_epsilondev_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
- b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core
-
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: &
- eps_trace_over_3_inner_core
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
- b_eps_trace_over_3_inner_core
-
- real(kind=CUSTOM_REAL) deltat
-
! local parameters
real(kind=CUSTOM_REAL), dimension(5) :: b_epsilondev_loc
real(kind=CUSTOM_REAL), dimension(5) :: epsilondev_loc
integer :: i,j,k,ispec,iglob
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ ! inner_core
+ do ispec = 1, NSPEC_INNER_CORE
+ do k = 1, NGLLZ
+ do j = 1, NGLLY
+ do i = 1, NGLLX
+ iglob = ibool_inner_core(i,j,k,ispec)
- ! inner_core
- do ispec = 1, NSPEC_INNER_CORE
- do k = 1, NGLLZ
- do j = 1, NGLLY
- do i = 1, NGLLX
- iglob = ibool_inner_core(i,j,k,ispec)
+ rho_kl_inner_core(i,j,k,ispec) = rho_kl_inner_core(i,j,k,ispec) &
+ + deltat * (accel_inner_core(1,iglob) * b_displ_inner_core(1,iglob) + &
+ accel_inner_core(2,iglob) * b_displ_inner_core(2,iglob) + &
+ accel_inner_core(3,iglob) * b_displ_inner_core(3,iglob) )
- rho_kl_inner_core(i,j,k,ispec) = rho_kl_inner_core(i,j,k,ispec) &
- + deltat * (accel_inner_core(1,iglob) * b_displ_inner_core(1,iglob) &
- + accel_inner_core(2,iglob) * b_displ_inner_core(2,iglob) &
- + accel_inner_core(3,iglob) * b_displ_inner_core(3,iglob) )
+ epsilondev_loc(1) = epsilondev_xx_inner_core(i,j,k,ispec)
+ epsilondev_loc(2) = epsilondev_yy_inner_core(i,j,k,ispec)
+ epsilondev_loc(3) = epsilondev_xy_inner_core(i,j,k,ispec)
+ epsilondev_loc(4) = epsilondev_xz_inner_core(i,j,k,ispec)
+ epsilondev_loc(5) = epsilondev_yz_inner_core(i,j,k,ispec)
- epsilondev_loc(1) = epsilondev_xx_inner_core(i,j,k,ispec)
- epsilondev_loc(2) = epsilondev_yy_inner_core(i,j,k,ispec)
- epsilondev_loc(3) = epsilondev_xy_inner_core(i,j,k,ispec)
- epsilondev_loc(4) = epsilondev_xz_inner_core(i,j,k,ispec)
- epsilondev_loc(5) = epsilondev_yz_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(1) = b_epsilondev_xx_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(2) = b_epsilondev_yy_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(3) = b_epsilondev_xy_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(4) = b_epsilondev_xz_inner_core(i,j,k,ispec)
+ b_epsilondev_loc(5) = b_epsilondev_yz_inner_core(i,j,k,ispec)
- b_epsilondev_loc(1) = b_epsilondev_xx_inner_core(i,j,k,ispec)
- b_epsilondev_loc(2) = b_epsilondev_yy_inner_core(i,j,k,ispec)
- b_epsilondev_loc(3) = b_epsilondev_xy_inner_core(i,j,k,ispec)
- b_epsilondev_loc(4) = b_epsilondev_xz_inner_core(i,j,k,ispec)
- b_epsilondev_loc(5) = b_epsilondev_yz_inner_core(i,j,k,ispec)
+ beta_kl_inner_core(i,j,k,ispec) = beta_kl_inner_core(i,j,k,ispec) &
+ + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
+ + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
+ + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + &
+ epsilondev_loc(4)*b_epsilondev_loc(4) + &
+ epsilondev_loc(5)*b_epsilondev_loc(5)) )
- beta_kl_inner_core(i,j,k,ispec) = beta_kl_inner_core(i,j,k,ispec) &
- + deltat * (epsilondev_loc(1)*b_epsilondev_loc(1) + epsilondev_loc(2)*b_epsilondev_loc(2) &
- + (epsilondev_loc(1)+epsilondev_loc(2)) * (b_epsilondev_loc(1)+b_epsilondev_loc(2)) &
- + 2 * (epsilondev_loc(3)*b_epsilondev_loc(3) + epsilondev_loc(4)*b_epsilondev_loc(4) &
- + epsilondev_loc(5)*b_epsilondev_loc(5)) )
-
- alpha_kl_inner_core(i,j,k,ispec) = alpha_kl_inner_core(i,j,k,ispec) &
- + deltat * (9 * eps_trace_over_3_inner_core(i,j,k,ispec) * b_eps_trace_over_3_inner_core(i,j,k,ispec))
+ alpha_kl_inner_core(i,j,k,ispec) = alpha_kl_inner_core(i,j,k,ispec) &
+ + deltat * (9 * eps_trace_over_3_inner_core(i,j,k,ispec) * &
+ b_eps_trace_over_3_inner_core(i,j,k,ispec))
+ enddo
enddo
enddo
enddo
- enddo
+ else
+ ! updates kernel contribution on GPU
+
+ ! computes contribution to density and bulk and shear modulus kernel
+ call compute_kernels_ic_cuda(Mesh_pointer,deltat)
+
+ endif
+
end subroutine compute_kernels_inner_core
!
!-------------------------------------------------------------------------------------------------
!
+
! Subroutines to compute the kernels for the 21 elastic coefficients
! Last modified 19/04/2007
-!-------------------------------------------------------------------
subroutine compute_strain_product(prod,eps_trace_over_3,epsdev,&
- b_eps_trace_over_3,b_epsdev)
+ b_eps_trace_over_3,b_epsdev)
! Purpose : compute the 21 strain products at a grid point
! (ispec,i,j,k fixed) and at a time t to compute then the kernels cij_kl (Voigt notation)
@@ -910,15 +556,15 @@
! Computing the 21 strain products without assuming eps(i)*b_eps(j) = eps(j)*b_eps(i)
p=1
do i=1,6
- do j=i,6
- prod(p)=eps(i)*b_eps(j)
- if(j>i) then
- prod(p)=prod(p)+eps(j)*b_eps(i)
- if(j>3 .and. i<4) prod(p)=prod(p)*2
- endif
- if(i>3) prod(p)=prod(p)*4
- p=p+1
- enddo
+ do j=i,6
+ prod(p)=eps(i)*b_eps(j)
+ if(j>i) then
+ prod(p)=prod(p)+eps(j)*b_eps(i)
+ if(j>3 .and. i<4) prod(p)=prod(p)*2
+ endif
+ if(i>3) prod(p)=prod(p)*4
+ p=p+1
+ enddo
enddo
end subroutine compute_strain_product
@@ -927,403 +573,15 @@
!-------------------------------------------------------------------------------------------------
!
- subroutine rotate_kernels_dble(cij_kl,cij_kll,theta_in,phi_in)
+ subroutine compute_kernels_hessian()
-! Purpose : compute the kernels in r,theta,phi (cij_kll)
-! from the kernels in x,y,z (cij_kl) (x,y,z <-> r,theta,phi)
-! At r,theta,phi fixed
-! theta and phi are in radians
-
-! Coeff from Min's routine rotate_anisotropic_tensor
-! with the help of Collect[Expand[cij],{dij}] in Mathematica
-
-! Definition of the output array cij_kll :
-! cij_kll(1) = C11 ; cij_kll(2) = C12 ; cij_kll(3) = C13
-! cij_kll(4) = C14 ; cij_kll(5) = C15 ; cij_kll(6) = C16
-! cij_kll(7) = C22 ; cij_kll(8) = C23 ; cij_kll(9) = C24
-! cij_kll(10) = C25 ; cij_kll(11) = C26 ; cij_kll(12) = C33
-! cij_kll(13) = C34 ; cij_kll(14) = C35 ; cij_kll(15) = C36
-! cij_kll(16) = C44 ; cij_kll(17) = C45 ; cij_kll(18) = C46
-! cij_kll(19) = C55 ; cij_kll(20) = C56 ; cij_kll(21) = C66
-! where the Cij (Voigt's notation) are defined as function of
-! the components of the elastic tensor in spherical coordinates
-! by eq. (A.1) of Chen & Tromp, GJI 168 (2007)
-
- implicit none
- include "constants.h"
-
- real(kind=CUSTOM_REAL) :: theta_in,phi_in
- real(kind=CUSTOM_REAL),dimension(21) :: cij_kll,cij_kl
-
- double precision :: theta,phi
- double precision :: costheta,sintheta,cosphi,sinphi
- double precision :: costhetasq,sinthetasq,cosphisq,sinphisq
- double precision :: costwotheta,sintwotheta,costwophi,sintwophi
- double precision :: cosfourtheta,sinfourtheta,cosfourphi,sinfourphi
- double precision :: costhetafour,sinthetafour,cosphifour,sinphifour
- double precision :: sintwophisq,sintwothetasq
- double precision :: costhreetheta,sinthreetheta,costhreephi,sinthreephi
-
-
- if (CUSTOM_REAL == SIZE_REAL) then
- theta = dble(theta_in)
- phi = dble(phi_in)
- else
- theta = theta_in
- phi = phi_in
- endif
-
- costheta = dcos(theta)
- sintheta = dsin(theta)
- cosphi = dcos(phi)
- sinphi = dsin(phi)
-
- costhetasq = costheta * costheta
- sinthetasq = sintheta * sintheta
- cosphisq = cosphi * cosphi
- sinphisq = sinphi * sinphi
-
- costhetafour = costhetasq * costhetasq
- sinthetafour = sinthetasq * sinthetasq
- cosphifour = cosphisq * cosphisq
- sinphifour = sinphisq * sinphisq
-
- costwotheta = dcos(2.d0*theta)
- sintwotheta = dsin(2.d0*theta)
- costwophi = dcos(2.d0*phi)
- sintwophi = dsin(2.d0*phi)
-
- costhreetheta=dcos(3.d0*theta)
- sinthreetheta=dsin(3.d0*theta)
- costhreephi=dcos(3.d0*phi)
- sinthreephi=dsin(3.d0*phi)
-
- cosfourtheta = dcos(4.d0*theta)
- sinfourtheta = dsin(4.d0*theta)
- cosfourphi = dcos(4.d0*phi)
- sinfourphi = dsin(4.d0*phi)
- sintwothetasq = sintwotheta * sintwotheta
- sintwophisq = sintwophi * sintwophi
-
-
- cij_kll(1) = 1.d0/16.d0* (cij_kl(16) - cij_kl(16)* costwophi + &
- 16.d0* cosphi*cosphisq* costhetafour* (cij_kl(1)* cosphi + cij_kl(6)* sinphi) + &
- 2.d0* (cij_kl(15) + cij_kl(17))* sintwophi* sintwothetasq - &
- 2.d0* (cij_kl(16)* cosfourtheta* sinphisq + &
- 2.d0* costhetafour* (-4* cij_kl(7)* sinphifour - &
- (cij_kl(2) + cij_kl(21))* sintwophisq) + &
- 8.d0* cij_kl(5)* cosphi*cosphisq* costheta*costhetasq* sintheta - &
- 8.d0* cij_kl(8)* costhetasq* sinphisq* sinthetasq - &
- 8.d0* cij_kl(12)* sinthetafour + &
- 8.d0* cosphisq* costhetasq* sintheta* ((cij_kl(4) + &
- cij_kl(20))* costheta* sinphi - &
- (cij_kl(3) + cij_kl(19))*sintheta) + &
- 8.d0* cosphi* costheta* (-cij_kl(11)* costheta*costhetasq* &
- sinphi*sinphisq + (cij_kl(10) + cij_kl(18))* costhetasq* sinphisq* sintheta + &
- cij_kl(14)* sintheta*sinthetasq) + 2.d0* sinphi* (cij_kl(13) + &
- cij_kl(9)* sinphisq)* sintwotheta + &
- sinphi* (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta))
-
- cij_kll(2) = 1.d0/4.d0* (costhetasq* (cij_kl(1) + 3.d0* cij_kl(2) + cij_kl(7) - &
- cij_kl(21) + (-cij_kl(1) + cij_kl(2) - cij_kl(7) + &
- cij_kl(21))* cosfourphi + (-cij_kl(6) + cij_kl(11))* sinfourphi) + &
- 4.d0* (cij_kl(8)* cosphisq - cij_kl(15)* cosphi* sinphi + &
- cij_kl(3)* sinphisq)* sinthetasq - &
- 2.d0* (cij_kl(10)* cosphisq*cosphi + &
- (cij_kl(9) - cij_kl(20))* cosphisq* sinphi + &
- (cij_kl(5) - cij_kl(18))* cosphi* sinphisq + &
- cij_kl(4)* sinphisq*sinphi)* sintwotheta)
-
- cij_kll(3) = 1.d0/8.d0* (sintwophi* (3.d0* cij_kl(15) - cij_kl(17) + &
- 4.d0* (cij_kl(2) + cij_kl(21))* costhetasq* sintwophi* sinthetasq) + &
- 4.d0* cij_kl(12)* sintwothetasq + 4.d0* cij_kl(1)* cosphifour* sintwothetasq + &
- 2.d0* cosphi*cosphisq* (8.d0* cij_kl(6)* costhetasq* sinphi* sinthetasq + &
- cij_kl(5)* sinfourtheta) + 2.d0* cosphisq* (3.d0* cij_kl(3) - cij_kl(19) + &
- (cij_kl(3) + cij_kl(19))* cosfourtheta + &
- (cij_kl(4) + cij_kl(20))* sinphi* sinfourtheta) + &
- 2.d0* sinphi* (sinphi* (3.d0* cij_kl(8) - &
- cij_kl(16) + (cij_kl(8) + cij_kl(16))* cosfourtheta + &
- 2.d0* cij_kl(7)* sinphisq* sintwothetasq)+ &
- (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta)+ &
- 2.d0* cosphi* ((cij_kl(15) + cij_kl(17))* cosfourtheta* sinphi + &
- 8.d0* cij_kl(11)* costhetasq* sinphi*sinphisq* sinthetasq + &
- (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq)*sinfourtheta))
-
- cij_kll(4) = 1.d0/8.d0* (cosphi* costheta *(5.d0* cij_kl(4) - &
- cij_kl(9) + 4.d0* cij_kl(13) - &
- 3.d0* cij_kl(20) + (cij_kl(4) + 3.d0* cij_kl(9) - &
- 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + &
- 1.d0/2.d0* (cij_kl(4) - cij_kl(9) + &
- cij_kl(20))* costhreephi * (costheta + 3.d0* costhreetheta) - &
- costheta* (-cij_kl(5) + 5.d0* cij_kl(10) + &
- 4.d0* cij_kl(14) - 3.d0* cij_kl(18) + &
- (3.d0* cij_kl(5) + cij_kl(10) - &
- 4.d0* cij_kl(14) + cij_kl(18))* costwotheta)* sinphi - &
- 1.d0/2.d0* (cij_kl(5) - cij_kl(10) - cij_kl(18))* (costheta + &
- 3.d0* costhreetheta)* sinthreephi + &
- 4.d0* (cij_kl(6) - cij_kl(11))* cosfourphi* costhetasq* sintheta - &
- 4.d0* (cij_kl(1) + cij_kl(3) - cij_kl(7) - cij_kl(8) + cij_kl(16) - cij_kl(19) + &
- (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + &
- cij_kl(16) - cij_kl(19))* costwotheta)* sintwophi* sintheta - &
- 4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
- cij_kl(21))* costhetasq* sinfourphi* sintheta + &
- costwophi* ((cij_kl(6) + cij_kl(11) + 6.d0* cij_kl(15) - &
- 2.d0* cij_kl(17))* sintheta + &
- (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + cij_kl(17)))* sinthreetheta))
-
- cij_kll(5) = 1.d0/4.d0* (2.d0* (cij_kl(4) + &
- cij_kl(20))* cosphisq* (costwotheta + cosfourtheta)* sinphi + &
- 2.d0* cij_kl(9)* (costwotheta + cosfourtheta)* sinphi*sinphisq + &
- 16.d0* cij_kl(1)* cosphifour* costheta*costhetasq* sintheta + &
- 4.d0* costheta*costhetasq* (-2.d0* cij_kl(8)* sinphisq + &
- 4.d0* cij_kl(7)* sinphifour + &
- (cij_kl(2) + cij_kl(21))* sintwophisq)* sintheta + &
- 4.d0* cij_kl(13)* (1.d0 + 2.d0* costwotheta)* sinphi* sinthetasq + &
- 8.d0* costheta* (-2.d0* cij_kl(12) + cij_kl(8)* sinphisq)* sintheta*sinthetasq + &
- 2.d0* cosphi*cosphisq* (cij_kl(5)* (costwotheta + cosfourtheta) + &
- 8.d0* cij_kl(6)* costheta*costhetasq* sinphi* sintheta) + &
- 2.d0* cosphi* (cosfourtheta* (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
- costwotheta* (cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
- 8.d0* cij_kl(11)* costheta*costhetasq* sinphi*sinphisq* sintheta) - &
- (cij_kl(3) + cij_kl(16) + cij_kl(19) + &
- (cij_kl(3) - cij_kl(16) + cij_kl(19))* costwophi + &
- (cij_kl(15) + cij_kl(17))* sintwophi)* sinfourtheta)
-
- cij_kll(6) = 1.d0/2.d0* costheta*costhetasq* ((cij_kl(6) + cij_kl(11))* costwophi + &
- (cij_kl(6) - cij_kl(11))* cosfourphi + 2.d0* (-cij_kl(1) + cij_kl(7))* sintwophi + &
- (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* sinfourphi) + &
- 1.d0/4.d0* costhetasq* (-(cij_kl(4) + 3* cij_kl(9) + cij_kl(20))* cosphi - &
- 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi + &
- (3.d0* cij_kl(5) + cij_kl(10) + cij_kl(18))* sinphi + &
- 3.d0* (cij_kl(5) - cij_kl(10) - cij_kl(18))* sinthreephi)* sintheta + &
- costheta* ((cij_kl(15) + cij_kl(17))* costwophi + &
- (-cij_kl(3) + cij_kl(8) + cij_kl(16) - cij_kl(19))* sintwophi)* sinthetasq + &
- (-cij_kl(13)* cosphi + cij_kl(14)* sinphi)* sintheta*sinthetasq
-
- cij_kll(7) = cij_kl(7)* cosphifour - cij_kl(11)* cosphi*cosphisq* sinphi + &
- (cij_kl(2) + cij_kl(21))* cosphisq* sinphisq - &
- cij_kl(6)* cosphi* sinphi*sinphisq + &
- cij_kl(1)* sinphifour
-
- cij_kll(8) = 1.d0/2.d0* (2.d0* costhetasq* sinphi* (-cij_kl(15)* cosphi + &
- cij_kl(3)* sinphi) + 2.d0* cij_kl(2)* cosphifour* sinthetasq + &
- (2.d0* cij_kl(2)* sinphifour + &
- (cij_kl(1) + cij_kl(7) - cij_kl(21))* sintwophisq)* sinthetasq + &
- cij_kl(4)* sinphi*sinphisq* sintwotheta + &
- cosphi*cosphisq* (2.d0* (-cij_kl(6) + cij_kl(11))* sinphi* sinthetasq + &
- cij_kl(10)* sintwotheta) + cosphi* sinphisq* (2.d0* (cij_kl(6) - &
- cij_kl(11))* sinphi* sinthetasq + &
- (cij_kl(5) - cij_kl(18))* sintwotheta) + &
- cosphisq* (2.d0* cij_kl(8)* costhetasq + &
- (cij_kl(9) - cij_kl(20))* sinphi* sintwotheta))
-
- cij_kll(9) = cij_kl(11)* cosphifour* sintheta - sinphi*sinphisq* (cij_kl(5)* costheta + &
- cij_kl(6)* sinphi* sintheta) + cosphisq* sinphi* (-(cij_kl(10) + &
- cij_kl(18))* costheta + &
- 3.d0* (cij_kl(6) - cij_kl(11))* sinphi* sintheta) + &
- cosphi* sinphisq* ((cij_kl(4) + cij_kl(20))* costheta + &
- 2.d0* (-2.d0* cij_kl(1) + cij_kl(2) + cij_kl(21))* sinphi* sintheta) + &
- cosphi*cosphisq* (cij_kl(9)* costheta - 2.d0* (cij_kl(2) - 2.d0* cij_kl(7) + &
- cij_kl(21))* sinphi* sintheta)
-
- cij_kll(10) = 1.d0/4.d0* (4.d0* costwotheta* (cij_kl(10)* cosphi*cosphisq + &
- (cij_kl(9) - cij_kl(20))* cosphisq* sinphi + &
- (cij_kl(5) - cij_kl(18))* cosphi* sinphisq + &
- cij_kl(4)* sinphi*sinphisq) + (cij_kl(1) + 3.d0* cij_kl(2) - &
- 2.d0* cij_kl(3) + cij_kl(7) - &
- 2.d0* cij_kl(8) - cij_kl(21) + 2.d0* (cij_kl(3) - cij_kl(8))* costwophi + &
- (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
- 2.d0* cij_kl(15)* sintwophi + &
- (-cij_kl(6) + cij_kl(11))* sinfourphi)* sintwotheta)
-
- cij_kll(11) = 1.d0/4.d0* (2.d0* costheta* ((cij_kl(6) + cij_kl(11))* costwophi + &
- (-cij_kl(6) + cij_kl(11))* cosfourphi + &
- 2.d0* (-cij_kl(1) + cij_kl(7))* sintwophi + &
- (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(21))* sinfourphi) + &
- (-(cij_kl(4) + 3.d0* cij_kl(9) + cij_kl(20))* cosphi + &
- (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi + &
- (3.d0* cij_kl(5) + cij_kl(10) + cij_kl(18))* sinphi + &
- (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sintheta)
-
- cij_kll(12) = 1.d0/16.d0* (cij_kl(16) - 2.d0* cij_kl(16)* cosfourtheta* sinphisq + &
- costwophi* (-cij_kl(16) + 8.d0* costheta* sinthetasq* ((cij_kl(3) - &
- cij_kl(8) + cij_kl(19))* costheta + &
- (cij_kl(5) - cij_kl(10) - cij_kl(18))* cosphi* sintheta)) + &
- 2.d0* (cij_kl(15) + cij_kl(17))* sintwophi* sintwothetasq + &
- 2.d0* (8.d0* cij_kl(12)* costhetafour + &
- 8.d0* cij_kl(14)* cosphi* costheta*costhetasq* sintheta + &
- 4.d0* cosphi* costheta* (cij_kl(5) + cij_kl(10) + cij_kl(18) + &
- (cij_kl(4) + cij_kl(20))* sintwophi)* &
- sintheta*sinthetasq + 8.d0* cij_kl(1)* cosphifour* sinthetafour + &
- 8.d0* cij_kl(6)* cosphi*cosphisq* sinphi* sinthetafour + &
- 8.d0* cij_kl(11)* cosphi* sinphi*sinphisq* sinthetafour + &
- 8.d0* cij_kl(7)* sinphifour* sinthetafour + &
- 2.d0* cij_kl(2)* sintwophisq* sinthetafour + &
- 2.d0* cij_kl(21)* sintwophisq* sinthetafour + &
- 2.d0* cij_kl(13)* sinphi* sintwotheta + &
- 2.d0* cij_kl(9)* sinphi*sinphisq* sintwotheta + &
- cij_kl(3)* sintwothetasq + cij_kl(8)* sintwothetasq + &
- cij_kl(19)* sintwothetasq + cij_kl(13)* sinphi* sinfourtheta - &
- cij_kl(9)* sinphi*sinphisq* sinfourtheta))
-
- cij_kll(13) = 1.d0/8.d0* (cosphi* costheta* (cij_kl(4) + 3.d0* cij_kl(9) + &
- 4.d0* cij_kl(13) + cij_kl(20) - (cij_kl(4) + 3.d0* cij_kl(9) - &
- 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + 4.d0* (-cij_kl(1) - &
- cij_kl(3) + cij_kl(7) + cij_kl(8) + cij_kl(16) - cij_kl(19) + &
- (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
- cij_kl(19))* costwotheta)* sintwophi* sintheta + &
- 4.d0* (cij_kl(6) - cij_kl(11))* cosfourphi* sinthetasq*sintheta - &
- 4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
- cij_kl(21))* sinfourphi* sinthetasq*sintheta + &
- costheta* ((-3.d0* cij_kl(5) - cij_kl(10) - 4.d0* cij_kl(14) - &
- cij_kl(18) + (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + &
- cij_kl(18))* costwotheta)* sinphi + 6.d0* ((cij_kl(4) - cij_kl(9) + &
- cij_kl(20))* costhreephi + (-cij_kl(5) + cij_kl(10) + &
- cij_kl(18))* sinthreephi)* sinthetasq) + costwophi* ((3* cij_kl(6) + &
- 3.d0* cij_kl(11) + 2.d0* (cij_kl(15) + cij_kl(17)))* sintheta - &
- (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + &
- cij_kl(17)))* sinthreetheta))
-
- cij_kll(14) = 1.d0/4.d0* (2.d0* cij_kl(13)* (costwotheta + cosfourtheta)* sinphi + &
- 8.d0* costheta*costhetasq* (-2.d0* cij_kl(12) + cij_kl(8)* sinphisq)* sintheta + &
- 4.d0* (cij_kl(4) + cij_kl(20))* cosphisq* (1.d0 + &
- 2.d0* costwotheta)* sinphi* sinthetasq + &
- 4.d0* cij_kl(9)* (1.d0 + 2.d0* costwotheta)* sinphi*sinphisq* sinthetasq + &
- 16.d0* cij_kl(1)* cosphifour* costheta* sintheta*sinthetasq + &
- 4.d0* costheta* (-2.d0* cij_kl(8)* sinphisq + 4.d0* cij_kl(7)* sinphifour + &
- (cij_kl(2) + cij_kl(21))* sintwophisq)* sintheta*sinthetasq + &
- 4.d0* cosphi*cosphisq* sinthetasq* (cij_kl(5) + 2.d0* cij_kl(5)* costwotheta + &
- 4.d0* cij_kl(6)* costheta* sinphi* sintheta) + &
- 2.d0* cosphi* (cosfourtheta* (cij_kl(14) - (cij_kl(10) + cij_kl(18))* sinphisq) + &
- costwotheta* (cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
- 8.d0* cij_kl(11)* costheta* sinphi*sinphisq* sintheta*sinthetasq) + &
- (cij_kl(3) + cij_kl(16) + cij_kl(19) + (cij_kl(3) - cij_kl(16) + &
- cij_kl(19))* costwophi + (cij_kl(15) + cij_kl(17))* sintwophi)* sinfourtheta)
-
- cij_kll(15) = costwophi* costheta* (-cij_kl(17) + (cij_kl(15) + cij_kl(17))* costhetasq) + &
- 1.d0/16.d0* (-((11.d0* cij_kl(4) + cij_kl(9) + 4.d0* cij_kl(13) - &
- 5.d0* cij_kl(20))* cosphi + (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
- (cij_kl(5) + 11.d0* cij_kl(10) + 4.d0* cij_kl(14) - &
- 5.d0* cij_kl(18))* sinphi + (-cij_kl(5) + cij_kl(10) + &
- cij_kl(18))* sinthreephi)* sintheta + &
- 8.d0* costheta* ((-cij_kl(1) - cij_kl(3) + cij_kl(7) + cij_kl(8) - cij_kl(16) +&
- cij_kl(19) + (cij_kl(1) - cij_kl(3) - &
- cij_kl(7) + cij_kl(8) + cij_kl(16) - cij_kl(19))* costwotheta)* sintwophi +&
- ((cij_kl(6) + cij_kl(11))* costwophi + &
- (cij_kl(6) - cij_kl(11))* cosfourphi + (-cij_kl(1) + cij_kl(2) - cij_kl(7) +&
- cij_kl(21))* sinfourphi)* sinthetasq) +&
- ((cij_kl(4) + 3.d0* cij_kl(9) - 4.d0* cij_kl(13) + cij_kl(20))* cosphi + &
- 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
- (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))* sinphi + &
- 3.d0* (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sinthreetheta)
-
- cij_kll(16) = 1.d0/4.d0*(cij_kl(1) - cij_kl(2) + cij_kl(7) + cij_kl(16) + &
- cij_kl(19) + cij_kl(21) + 2.d0*(cij_kl(16) - cij_kl(19))*costwophi* costhetasq + &
- (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(16) + &
- cij_kl(19) - cij_kl(21))*costwotheta - 2.d0* cij_kl(17)* costhetasq* sintwophi + &
- 2.d0* ((-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
- (-cij_kl(6) + cij_kl(11))* sinfourphi)* sinthetasq + ((cij_kl(5) - cij_kl(10) +&
- cij_kl(18))* cosphi + (-cij_kl(5) + cij_kl(10) + cij_kl(18))* costhreephi +&
- (-cij_kl(4) + cij_kl(9) + cij_kl(20))* sinphi - &
- (cij_kl(4) - cij_kl(9) + cij_kl(20))* sinthreephi)* sintwotheta)
-
- cij_kll(17) = 1.d0/8.d0* (4.d0* costwophi* costheta* (cij_kl(6) + cij_kl(11) - &
- 2.d0* cij_kl(15) - (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + &
- cij_kl(17)))* costwotheta) - (2.d0* cosphi* (-3.d0* cij_kl(4) +&
- cij_kl(9) + 2.d0* cij_kl(13) + cij_kl(20) + (cij_kl(4) - cij_kl(9) + &
- cij_kl(20))* costwophi) - (cij_kl(5) - 5.d0* cij_kl(10) + &
- 4.d0* cij_kl(14) + 3.d0* cij_kl(18))* sinphi + (-cij_kl(5) + cij_kl(10) + &
- cij_kl(18))* sinthreephi)* sintheta + &
- 8.d0* costheta* ((-cij_kl(1) + cij_kl(3) + cij_kl(7) - cij_kl(8) + &
- (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
- cij_kl(19))* costwotheta)* sintwophi + ((cij_kl(6) - cij_kl(11))* cosfourphi + &
- (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* sinfourphi)* sinthetasq) +&
- ((cij_kl(4) + 3.d0* cij_kl(9) - 4.d0* cij_kl(13) + cij_kl(20))* cosphi + &
- 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
- (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))* sinphi + &
- 3.d0* (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sinthreetheta)
-
- cij_kll(18) = 1.d0/2.d0* ((cij_kl(5) - cij_kl(10) + cij_kl(18))* cosphi* costwotheta - &
- (cij_kl(5) - cij_kl(10) - cij_kl(18))* costhreephi* costwotheta - &
- 2.d0* (cij_kl(4) - cij_kl(9) + &
- (cij_kl(4) - cij_kl(9) + cij_kl(20))* costwophi)* costwotheta* sinphi + &
- (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(16) - cij_kl(19) + cij_kl(21) + &
- (-cij_kl(16) + cij_kl(19))* costwophi + &
- (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
- cij_kl(17)* sintwophi + &
- (-cij_kl(6) + cij_kl(11))* sinfourphi)* sintwotheta)
-
- cij_kll(19) = 1.d0/4.d0* (cij_kl(16) - cij_kl(16)* costwophi + &
- (-cij_kl(15) + cij_kl(17))* sintwophi + &
- 4.d0* cij_kl(12)* sintwothetasq + &
- 2.d0* (2.d0* cij_kl(1)* cosphifour* sintwothetasq + &
- cosphi*cosphisq* (8.d0* cij_kl(6)* costhetasq* sinphi* sinthetasq + &
- cij_kl(5)* sinfourtheta) + cosphisq* (-cij_kl(3) + cij_kl(19) + (cij_kl(3) +&
- cij_kl(19))* cosfourtheta + (cij_kl(4) + cij_kl(20))* sinphi* sinfourtheta) + &
- sinphi* (cosfourtheta* ((cij_kl(15) + cij_kl(17))* cosphi + &
- cij_kl(16)* sinphi) + (cij_kl(2) + cij_kl(7) - 2.d0* cij_kl(8) + cij_kl(21) + &
- (cij_kl(2) - cij_kl(7) + cij_kl(21))* costwophi)* sinphi* sintwothetasq + &
- (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta) + &
- cosphi* (8.d0* cij_kl(11)* costhetasq* sinphi*sinphisq* sinthetasq + &
- (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq)* sinfourtheta)))
-
- cij_kll(20) = 1.d0/8.d0* (2.d0* cosphi* costheta* (-3.d0* cij_kl(4) - cij_kl(9) + &
- 4.d0* cij_kl(13) + cij_kl(20) + (cij_kl(4) + 3.d0* cij_kl(9) - &
- 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + &
- (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi* (costheta + &
- 3.d0* costhreetheta) - &
- 2.d0* costheta* (-cij_kl(5) - 3.d0* cij_kl(10) + 4.d0* cij_kl(14) + &
- cij_kl(18) + (3.d0* cij_kl(5) + &
- cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))*costwotheta)* sinphi - &
- (cij_kl(5) - cij_kl(10) - cij_kl(18))* &
- (costheta + 3.d0* costhreetheta)* sinthreephi + 8.d0* (cij_kl(6) - &
- cij_kl(11))* cosfourphi* costhetasq* sintheta - 8.d0* (cij_kl(1) - &
- cij_kl(3) - cij_kl(7) + cij_kl(8) + &
- (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
- cij_kl(19))* costwotheta)* sintwophi* sintheta - &
- 8.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
- cij_kl(21))* costhetasq* sinfourphi* sintheta + &
- 2.d0* costwophi* ((cij_kl(6) + cij_kl(11) - 2.d0* cij_kl(15) + &
- 2.d0* cij_kl(17))* sintheta + &
- (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + cij_kl(17)))* sinthreetheta))
-
- cij_kll(21) = 1.d0/4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) + cij_kl(16) + &
- cij_kl(19) + cij_kl(21) - 2.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
- cij_kl(21))* cosfourphi* costhetasq + &
- (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(16) - cij_kl(19) + &
- cij_kl(21))* costwotheta + &
- 2.d0* (-cij_kl(6) + cij_kl(11))* costhetasq* sinfourphi - &
- 2.d0* ((-cij_kl(16) + cij_kl(19))* costwophi + cij_kl(17)* sintwophi)* sinthetasq - &
- ((cij_kl(5) - cij_kl(10) + cij_kl(18))* cosphi + (-cij_kl(5) + cij_kl(10) +&
- cij_kl(18))* costhreephi + &
- (-cij_kl(4) + cij_kl(9) + cij_kl(20))* sinphi - (cij_kl(4) - cij_kl(9) + &
- cij_kl(20))* sinthreephi)* sintwotheta)
-
- end subroutine rotate_kernels_dble
-
-!-----------------------------------------------------------------------------
-
- subroutine compute_kernels_hessian(ibool_crust_mantle, &
- hess_kl_crust_mantle, &
- accel_crust_mantle,b_accel_crust_mantle, &
- deltat)
-
use constants
+ use specfem_par,only: deltat
use specfem_par,only: GPU_MODE,Mesh_pointer
+ use specfem_par_crustmantle
+
implicit none
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- hess_kl_crust_mantle
-
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
- accel_crust_mantle
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
- b_accel_crust_mantle
-
- real(kind=CUSTOM_REAL) deltat
-
! local parameters
integer :: i,j,k,ispec,iglob
@@ -1349,13 +607,11 @@
enddo
else
- ! updates kernels on GPU
+ ! updates kernel contribution on GPU
! computes contribution to density and bulk modulus kernel
call compute_kernels_hess_cuda(Mesh_pointer,deltat)
+
endif
end subroutine compute_kernels_hessian
-
-
-
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -545,12 +545,22 @@
else if (SIMULATION_TYPE == 3) then
! to store kernels
- !call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
- !call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
+ call transfer_kernels_oc_to_host(Mesh_pointer, &
+ rho_kl_outer_core,&
+ alpha_kl_outer_core,NSPEC_OUTER_CORE)
+ call transfer_kernels_cm_to_host(Mesh_pointer, &
+ rho_kl_crust_mantle, &
+ alpha_kl_crust_mantle, &
+ beta_kl_crust_mantle, &
+ cijkl_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+ call transfer_kernels_ic_to_host(Mesh_pointer, &
+ rho_kl_inner_core, &
+ alpha_kl_inner_core, &
+ beta_kl_inner_core,NSPEC_INNER_CORE)
! specific noise strength kernel
if( NOISE_TOMOGRAPHY == 3 ) then
- !call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
+ call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl_crust_mantle,NSPEC_CRUST_MANTLE)
endif
! approximative hessian for preconditioning kernels
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/noise_tomography.f90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -25,15 +25,12 @@
!
!=====================================================================
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
-
! subroutine for NOISE TOMOGRAPHY
! chracterize noise statistics
! for a given point (xcoord,ycoord,zcoord), specify the noise direction "normal_x/y/z_noise"
! and noise distribution "mask_noise"
! USERS need to modify this subroutine for their own noise characteristics
+
subroutine noise_distribution_direction(xcoord_in,ycoord_in,zcoord_in, &
normal_x_noise_out,normal_y_noise_out,normal_z_noise_out, &
mask_noise_out)
@@ -69,12 +66,13 @@
end subroutine noise_distribution_direction
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! read parameters
+
subroutine read_parameters_noise(myrank,nrec,NSTEP,nmovie_points, &
islice_selected_rec,xi_receiver,eta_receiver,gamma_receiver,nu, &
noise_sourcearray,xigll,yigll,zigll,nspec_top, &
@@ -218,12 +216,13 @@
end subroutine read_parameters_noise
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! check for consistency of the parameters
+
subroutine check_parameters_noise(myrank,NOISE_TOMOGRAPHY,SIMULATION_TYPE,SAVE_FORWARD, &
NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN,ROTATE_SEISMOGRAMS_RT, &
SAVE_ALL_SEISMOS_IN_ONE_FILE, USE_BINARY_FOR_LARGE_FILE, &
@@ -313,12 +312,14 @@
end subroutine check_parameters_noise
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
-! read and construct the "source" (source time function based upon noise spectrum) for "ensemble forward source"
+! read and construct the "source" (source time function based upon noise spectrum)
+! for "ensemble forward source"
+
subroutine compute_arrays_source_noise(myrank, &
xi_noise,eta_noise,gamma_noise,nu_single,noise_sourcearray, &
xigll,yigll,zigll,NSTEP)
@@ -403,13 +404,14 @@
end subroutine compute_arrays_source_noise
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! step 1: calculate the "ensemble forward source"
! add noise spectrum to the location of master receiver
+
subroutine noise_add_source_master_rec()
! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
@@ -447,9 +449,9 @@
end subroutine noise_add_source_master_rec
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! step 1: calculate the "ensemble forward source"
@@ -496,9 +498,9 @@
end subroutine noise_save_surface_movie
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! step 2/3: calculate/reconstructe the "ensemble forward wavefield"
@@ -518,7 +520,6 @@
use specfem_par
use specfem_par_crustmantle
-
implicit none
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE),intent(inout) :: accel
@@ -574,9 +575,9 @@
end subroutine noise_read_add_surface_movie
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
+!
+!-------------------------------------------------------------------------------------------------
+!
! subroutine for NOISE TOMOGRAPHY
! step 3: constructing noise source strength kernel
@@ -589,29 +590,28 @@
! by this modification, the efficiency is greatly improved
! and now, it should be OK to run NOISE_TOMOGRAPHY on a cluster with global storage
-!!!!! improved version !!!!!
- subroutine compute_kernels_strength_noise(nmovie_points,ibool_crust_mantle, &
- Sigma_kl_crust_mantle,displ_crust_mantle,deltat,it, &
- normal_x_noise,normal_y_noise,normal_z_noise, &
- nspec_top,noise_surface_movie, &
- ibelm_top_crust_mantle)
+ subroutine compute_kernels_strength_noise()
+
+ use specfem_par
+ use specfem_par_crustmantle
+
implicit none
- include "constants.h"
- include "OUTPUT_FILES/values_from_mesher.h"
+! include "constants.h"
+! include "OUTPUT_FILES/values_from_mesher.h"
! input parameters
- integer :: it,nspec_top,nmovie_points
- integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
- real(kind=CUSTOM_REAL) :: deltat
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: displ_crust_mantle
- real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise
+! integer :: it,nspec_top,nmovie_points
+! integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
+! integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+! real(kind=CUSTOM_REAL) :: deltat
+! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: displ_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise
! output parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
- Sigma_kl_crust_mantle
+! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+! Sigma_kl_crust_mantle
! local parameters
integer :: i,j,k,ispec,iglob,ipoin,ispec2D
real(kind=CUSTOM_REAL) :: eta
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,nspec_top) :: noise_surface_movie
+! real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,nspec_top) :: noise_surface_movie
! read surface movie, needed for Sigma_kl_crust_mantle
call read_abs(9,noise_surface_movie,CUSTOM_REAL*NDIM*NGLLX*NGLLY*nspec_top,it)
@@ -619,105 +619,46 @@
! noise source strength kernel
! to keep similar structure to other kernels, the source strength kernel is saved as a volumetric kernel
! but only updated at the surface, because the noise is generated there
- ipoin = 0
- do ispec2D = 1, nspec_top
- ispec = ibelm_top_crust_mantle(ispec2D)
+ if( .not. GPU_MODE ) then
+ ! on CPU
+ ipoin = 0
+ do ispec2D = 1, nspec_top
+ ispec = ibelm_top_crust_mantle(ispec2D)
- k = NGLLZ
+ k = NGLLZ
- ! loop on all the points inside the element
- do j = 1,NGLLY
- do i = 1,NGLLX
- ipoin = ipoin + 1
- iglob = ibool_crust_mantle(i,j,k,ispec)
+ ! loop on all the points inside the element
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ ipoin = ipoin + 1
+ iglob = ibool_crust_mantle(i,j,k,ispec)
- eta = noise_surface_movie(1,i,j,ispec2D) * normal_x_noise(ipoin) + &
- noise_surface_movie(2,i,j,ispec2D) * normal_y_noise(ipoin) + &
- noise_surface_movie(3,i,j,ispec2D) * normal_z_noise(ipoin)
+ eta = noise_surface_movie(1,i,j,ispec2D) * normal_x_noise(ipoin) + &
+ noise_surface_movie(2,i,j,ispec2D) * normal_y_noise(ipoin) + &
+ noise_surface_movie(3,i,j,ispec2D) * normal_z_noise(ipoin)
- Sigma_kl_crust_mantle(i,j,k,ispec) = Sigma_kl_crust_mantle(i,j,k,ispec) &
- + deltat * eta * ( normal_x_noise(ipoin) * displ_crust_mantle(1,iglob) &
- + normal_y_noise(ipoin) * displ_crust_mantle(2,iglob) &
- + normal_z_noise(ipoin) * displ_crust_mantle(3,iglob) )
+ Sigma_kl_crust_mantle(i,j,k,ispec) = Sigma_kl_crust_mantle(i,j,k,ispec) &
+ + deltat * eta * ( normal_x_noise(ipoin) * displ_crust_mantle(1,iglob) &
+ + normal_y_noise(ipoin) * displ_crust_mantle(2,iglob) &
+ + normal_z_noise(ipoin) * displ_crust_mantle(3,iglob) )
+ enddo
enddo
enddo
- enddo
+ else
+ ! on GPU
+ call compute_kernels_strgth_noise_cu(Mesh_pointer,noise_surface_movie,deltat)
+ endif
end subroutine compute_kernels_strength_noise
-!!!!! original implementation, not used anymore (but kept here for references) !!!!!
-! subroutine compute_kernels_strength_noise_original(myrank,ibool_crust_mantle, &
-! Sigma_kl_crust_mantle,displ_crust_mantle,deltat,it, &
-! nmovie_points,normal_x_noise,normal_y_noise,normal_z_noise, &
-! nspec_top,ibelm_top_crust_mantle,LOCAL_PATH)
-! implicit none
-! include "constants.h"
-! include "OUTPUT_FILES/values_from_mesher.h"
-! ! input parameters
-! integer :: myrank,nmovie_points,it,nspec_top
-! integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
-! integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
-! real(kind=CUSTOM_REAL) :: deltat
-! real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: displ_crust_mantle
-! real(kind=CUSTOM_REAL), dimension(nmovie_points) :: normal_x_noise,normal_y_noise,normal_z_noise
-! character(len=150) :: LOCAL_PATH
-! ! output parameters
-! real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
-! Sigma_kl_crust_mantle
-! ! local parameters
-! integer :: i,j,k,ispec,iglob,ipoin,ispec2D,ios
-! real(kind=CUSTOM_REAL) :: eta
-! real(kind=CUSTOM_REAL), dimension(nmovie_points) :: store_val_ux,store_val_uy,store_val_uz
-! character(len=150) :: outputname
!
+!-------------------------------------------------------------------------------------------------
!
-! ! read surface movie, needed for Sigma_kl_crust_mantle
-! write(outputname,"('/proc',i6.6,'_surface_movie',i6.6)") myrank, it
-! open(unit=IIN_NOISE,file=trim(LOCAL_PATH)//outputname,status='old',form='unformatted',action='read',iostat=ios)
-! if( ios /= 0) call exit_MPI(myrank,'file '//trim(outputname)//' does NOT exist!')
-!
-! read(IIN_NOISE) store_val_ux
-! read(IIN_NOISE) store_val_uy
-! read(IIN_NOISE) store_val_uz
-! close(IIN_NOISE)
-!
-! ! noise source strength kernel
-! ! to keep similar structure to other kernels, the source strength kernel is saved as a volumetric kernel
-! ! but only updated at the surface, because the noise is generated there
-! ipoin = 0
-! do ispec2D = 1, nspec_top
-! ispec = ibelm_top_crust_mantle(ispec2D)
-!
-! k = NGLLZ
-!
-! ! loop on all the points inside the element
-! do j = 1,NGLLY
-! do i = 1,NGLLX
-! ipoin = ipoin + 1
-! iglob = ibool_crust_mantle(i,j,k,ispec)
-!
-! eta = store_val_ux(ipoin) * normal_x_noise(ipoin) + &
-! store_val_uy(ipoin) * normal_y_noise(ipoin) + &
-! store_val_uz(ipoin) * normal_z_noise(ipoin)
-!
-! Sigma_kl_crust_mantle(i,j,k,ispec) = Sigma_kl_crust_mantle(i,j,k,ispec) &
-! + deltat * eta * ( normal_x_noise(ipoin) * displ_crust_mantle(1,iglob) &
-! + normal_y_noise(ipoin) * displ_crust_mantle(2,iglob) &
-! + normal_z_noise(ipoin) * displ_crust_mantle(3,iglob) )
-! enddo
-! enddo
-!
-! enddo
-!
-! end subroutine compute_kernels_strength_noise_original
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
-
! subroutine for NOISE TOMOGRAPHY
! step 3: save noise source strength kernel
+
subroutine save_kernels_strength_noise(myrank,LOCAL_PATH,Sigma_kl_crust_mantle)
implicit none
include "constants.h"
@@ -741,7 +682,3 @@
close(IOUT_NOISE)
end subroutine save_kernels_strength_noise
-
-! =============================================================================================================
-! =============================================================================================================
-! =============================================================================================================
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -1072,18 +1072,15 @@
NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
- SIMULATION_TYPE, &
- NOISE_TOMOGRAPHY, &
- SAVE_FORWARD, &
- ABSORBING_CONDITIONS, &
+ SIMULATION_TYPE,NOISE_TOMOGRAPHY, &
+ SAVE_FORWARD,ABSORBING_CONDITIONS, &
GRAVITY_VAL,ROTATION_VAL, &
ATTENUATION_VAL,USE_ATTENUATION_MIMIC, &
COMPUTE_AND_STORE_STRAIN, &
- ANISOTROPIC_3D_MANTLE_VAL, &
- ANISOTROPIC_INNER_CORE_VAL, &
+ ANISOTROPIC_3D_MANTLE_VAL,ANISOTROPIC_INNER_CORE_VAL, &
SAVE_BOUNDARY_MESH, &
USE_MESH_COLORING_GPU, &
- APPROXIMATE_HESS_KL)
+ ANISOTROPIC_KL,APPROXIMATE_HESS_KL)
call sync_all()
! prepares rotation arrays
Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_kernels.f90 2012-02-23 04:58:54 UTC (rev 19663)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_kernels.f90 2012-02-23 06:14:45 UTC (rev 19664)
@@ -34,7 +34,6 @@
kappavstore_crust_mantle,ibool_crust_mantle, &
kappahstore_crust_mantle,muhstore_crust_mantle, &
eta_anisostore_crust_mantle,ispec_is_tiso_crust_mantle, &
- ! --idoubling_crust_mantle, &
LOCAL_PATH)
implicit none
@@ -61,7 +60,6 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
-! integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
@@ -104,22 +102,22 @@
if( SAVE_TRANSVERSE_KL ) then
! transverse isotropic kernel arrays for file output
allocate(alphav_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- alphah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- betav_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- betah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- eta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
+ alphah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
+ betav_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
+ betah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
+ eta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
! isotropic kernel arrays for file output
allocate(bulk_c_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- bulk_betav_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- bulk_betah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- bulk_beta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
+ bulk_betav_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
+ bulk_betah_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
+ bulk_beta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
endif
if( .not. ANISOTROPIC_KL ) then
! allocates temporary isotropic kernel arrays for file output
allocate(bulk_c_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT), &
- bulk_beta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
+ bulk_beta_kl_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT))
endif
! crust_mantle
@@ -802,3 +800,381 @@
close(27)
end subroutine save_kernels_hessian
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine rotate_kernels_dble(cij_kl,cij_kll,theta_in,phi_in)
+
+! Purpose : compute the kernels in r,theta,phi (cij_kll)
+! from the kernels in x,y,z (cij_kl) (x,y,z <-> r,theta,phi)
+! At r,theta,phi fixed
+! theta and phi are in radians
+
+! Coeff from Min's routine rotate_anisotropic_tensor
+! with the help of Collect[Expand[cij],{dij}] in Mathematica
+
+! Definition of the output array cij_kll :
+! cij_kll(1) = C11 ; cij_kll(2) = C12 ; cij_kll(3) = C13
+! cij_kll(4) = C14 ; cij_kll(5) = C15 ; cij_kll(6) = C16
+! cij_kll(7) = C22 ; cij_kll(8) = C23 ; cij_kll(9) = C24
+! cij_kll(10) = C25 ; cij_kll(11) = C26 ; cij_kll(12) = C33
+! cij_kll(13) = C34 ; cij_kll(14) = C35 ; cij_kll(15) = C36
+! cij_kll(16) = C44 ; cij_kll(17) = C45 ; cij_kll(18) = C46
+! cij_kll(19) = C55 ; cij_kll(20) = C56 ; cij_kll(21) = C66
+! where the Cij (Voigt's notation) are defined as function of
+! the components of the elastic tensor in spherical coordinates
+! by eq. (A.1) of Chen & Tromp, GJI 168 (2007)
+
+ implicit none
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL) :: theta_in,phi_in
+ real(kind=CUSTOM_REAL),dimension(21) :: cij_kll,cij_kl
+
+ double precision :: theta,phi
+ double precision :: costheta,sintheta,cosphi,sinphi
+ double precision :: costhetasq,sinthetasq,cosphisq,sinphisq
+ double precision :: costwotheta,sintwotheta,costwophi,sintwophi
+ double precision :: cosfourtheta,sinfourtheta,cosfourphi,sinfourphi
+ double precision :: costhetafour,sinthetafour,cosphifour,sinphifour
+ double precision :: sintwophisq,sintwothetasq
+ double precision :: costhreetheta,sinthreetheta,costhreephi,sinthreephi
+
+
+ if (CUSTOM_REAL == SIZE_REAL) then
+ theta = dble(theta_in)
+ phi = dble(phi_in)
+ else
+ theta = theta_in
+ phi = phi_in
+ endif
+
+ costheta = dcos(theta)
+ sintheta = dsin(theta)
+ cosphi = dcos(phi)
+ sinphi = dsin(phi)
+
+ costhetasq = costheta * costheta
+ sinthetasq = sintheta * sintheta
+ cosphisq = cosphi * cosphi
+ sinphisq = sinphi * sinphi
+
+ costhetafour = costhetasq * costhetasq
+ sinthetafour = sinthetasq * sinthetasq
+ cosphifour = cosphisq * cosphisq
+ sinphifour = sinphisq * sinphisq
+
+ costwotheta = dcos(2.d0*theta)
+ sintwotheta = dsin(2.d0*theta)
+ costwophi = dcos(2.d0*phi)
+ sintwophi = dsin(2.d0*phi)
+
+ costhreetheta=dcos(3.d0*theta)
+ sinthreetheta=dsin(3.d0*theta)
+ costhreephi=dcos(3.d0*phi)
+ sinthreephi=dsin(3.d0*phi)
+
+ cosfourtheta = dcos(4.d0*theta)
+ sinfourtheta = dsin(4.d0*theta)
+ cosfourphi = dcos(4.d0*phi)
+ sinfourphi = dsin(4.d0*phi)
+ sintwothetasq = sintwotheta * sintwotheta
+ sintwophisq = sintwophi * sintwophi
+
+
+ cij_kll(1) = 1.d0/16.d0* (cij_kl(16) - cij_kl(16)* costwophi + &
+ 16.d0* cosphi*cosphisq* costhetafour* (cij_kl(1)* cosphi + cij_kl(6)* sinphi) + &
+ 2.d0* (cij_kl(15) + cij_kl(17))* sintwophi* sintwothetasq - &
+ 2.d0* (cij_kl(16)* cosfourtheta* sinphisq + &
+ 2.d0* costhetafour* (-4* cij_kl(7)* sinphifour - &
+ (cij_kl(2) + cij_kl(21))* sintwophisq) + &
+ 8.d0* cij_kl(5)* cosphi*cosphisq* costheta*costhetasq* sintheta - &
+ 8.d0* cij_kl(8)* costhetasq* sinphisq* sinthetasq - &
+ 8.d0* cij_kl(12)* sinthetafour + &
+ 8.d0* cosphisq* costhetasq* sintheta* ((cij_kl(4) + &
+ cij_kl(20))* costheta* sinphi - &
+ (cij_kl(3) + cij_kl(19))*sintheta) + &
+ 8.d0* cosphi* costheta* (-cij_kl(11)* costheta*costhetasq* &
+ sinphi*sinphisq + (cij_kl(10) + cij_kl(18))* costhetasq* sinphisq* sintheta + &
+ cij_kl(14)* sintheta*sinthetasq) + 2.d0* sinphi* (cij_kl(13) + &
+ cij_kl(9)* sinphisq)* sintwotheta + &
+ sinphi* (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta))
+
+ cij_kll(2) = 1.d0/4.d0* (costhetasq* (cij_kl(1) + 3.d0* cij_kl(2) + cij_kl(7) - &
+ cij_kl(21) + (-cij_kl(1) + cij_kl(2) - cij_kl(7) + &
+ cij_kl(21))* cosfourphi + (-cij_kl(6) + cij_kl(11))* sinfourphi) + &
+ 4.d0* (cij_kl(8)* cosphisq - cij_kl(15)* cosphi* sinphi + &
+ cij_kl(3)* sinphisq)* sinthetasq - &
+ 2.d0* (cij_kl(10)* cosphisq*cosphi + &
+ (cij_kl(9) - cij_kl(20))* cosphisq* sinphi + &
+ (cij_kl(5) - cij_kl(18))* cosphi* sinphisq + &
+ cij_kl(4)* sinphisq*sinphi)* sintwotheta)
+
+ cij_kll(3) = 1.d0/8.d0* (sintwophi* (3.d0* cij_kl(15) - cij_kl(17) + &
+ 4.d0* (cij_kl(2) + cij_kl(21))* costhetasq* sintwophi* sinthetasq) + &
+ 4.d0* cij_kl(12)* sintwothetasq + 4.d0* cij_kl(1)* cosphifour* sintwothetasq + &
+ 2.d0* cosphi*cosphisq* (8.d0* cij_kl(6)* costhetasq* sinphi* sinthetasq + &
+ cij_kl(5)* sinfourtheta) + 2.d0* cosphisq* (3.d0* cij_kl(3) - cij_kl(19) + &
+ (cij_kl(3) + cij_kl(19))* cosfourtheta + &
+ (cij_kl(4) + cij_kl(20))* sinphi* sinfourtheta) + &
+ 2.d0* sinphi* (sinphi* (3.d0* cij_kl(8) - &
+ cij_kl(16) + (cij_kl(8) + cij_kl(16))* cosfourtheta + &
+ 2.d0* cij_kl(7)* sinphisq* sintwothetasq)+ &
+ (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta)+ &
+ 2.d0* cosphi* ((cij_kl(15) + cij_kl(17))* cosfourtheta* sinphi + &
+ 8.d0* cij_kl(11)* costhetasq* sinphi*sinphisq* sinthetasq + &
+ (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq)*sinfourtheta))
+
+ cij_kll(4) = 1.d0/8.d0* (cosphi* costheta *(5.d0* cij_kl(4) - &
+ cij_kl(9) + 4.d0* cij_kl(13) - &
+ 3.d0* cij_kl(20) + (cij_kl(4) + 3.d0* cij_kl(9) - &
+ 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + &
+ 1.d0/2.d0* (cij_kl(4) - cij_kl(9) + &
+ cij_kl(20))* costhreephi * (costheta + 3.d0* costhreetheta) - &
+ costheta* (-cij_kl(5) + 5.d0* cij_kl(10) + &
+ 4.d0* cij_kl(14) - 3.d0* cij_kl(18) + &
+ (3.d0* cij_kl(5) + cij_kl(10) - &
+ 4.d0* cij_kl(14) + cij_kl(18))* costwotheta)* sinphi - &
+ 1.d0/2.d0* (cij_kl(5) - cij_kl(10) - cij_kl(18))* (costheta + &
+ 3.d0* costhreetheta)* sinthreephi + &
+ 4.d0* (cij_kl(6) - cij_kl(11))* cosfourphi* costhetasq* sintheta - &
+ 4.d0* (cij_kl(1) + cij_kl(3) - cij_kl(7) - cij_kl(8) + cij_kl(16) - cij_kl(19) + &
+ (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + &
+ cij_kl(16) - cij_kl(19))* costwotheta)* sintwophi* sintheta - &
+ 4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
+ cij_kl(21))* costhetasq* sinfourphi* sintheta + &
+ costwophi* ((cij_kl(6) + cij_kl(11) + 6.d0* cij_kl(15) - &
+ 2.d0* cij_kl(17))* sintheta + &
+ (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + cij_kl(17)))* sinthreetheta))
+
+ cij_kll(5) = 1.d0/4.d0* (2.d0* (cij_kl(4) + &
+ cij_kl(20))* cosphisq* (costwotheta + cosfourtheta)* sinphi + &
+ 2.d0* cij_kl(9)* (costwotheta + cosfourtheta)* sinphi*sinphisq + &
+ 16.d0* cij_kl(1)* cosphifour* costheta*costhetasq* sintheta + &
+ 4.d0* costheta*costhetasq* (-2.d0* cij_kl(8)* sinphisq + &
+ 4.d0* cij_kl(7)* sinphifour + &
+ (cij_kl(2) + cij_kl(21))* sintwophisq)* sintheta + &
+ 4.d0* cij_kl(13)* (1.d0 + 2.d0* costwotheta)* sinphi* sinthetasq + &
+ 8.d0* costheta* (-2.d0* cij_kl(12) + cij_kl(8)* sinphisq)* sintheta*sinthetasq + &
+ 2.d0* cosphi*cosphisq* (cij_kl(5)* (costwotheta + cosfourtheta) + &
+ 8.d0* cij_kl(6)* costheta*costhetasq* sinphi* sintheta) + &
+ 2.d0* cosphi* (cosfourtheta* (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
+ costwotheta* (cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
+ 8.d0* cij_kl(11)* costheta*costhetasq* sinphi*sinphisq* sintheta) - &
+ (cij_kl(3) + cij_kl(16) + cij_kl(19) + &
+ (cij_kl(3) - cij_kl(16) + cij_kl(19))* costwophi + &
+ (cij_kl(15) + cij_kl(17))* sintwophi)* sinfourtheta)
+
+ cij_kll(6) = 1.d0/2.d0* costheta*costhetasq* ((cij_kl(6) + cij_kl(11))* costwophi + &
+ (cij_kl(6) - cij_kl(11))* cosfourphi + 2.d0* (-cij_kl(1) + cij_kl(7))* sintwophi + &
+ (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* sinfourphi) + &
+ 1.d0/4.d0* costhetasq* (-(cij_kl(4) + 3* cij_kl(9) + cij_kl(20))* cosphi - &
+ 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi + &
+ (3.d0* cij_kl(5) + cij_kl(10) + cij_kl(18))* sinphi + &
+ 3.d0* (cij_kl(5) - cij_kl(10) - cij_kl(18))* sinthreephi)* sintheta + &
+ costheta* ((cij_kl(15) + cij_kl(17))* costwophi + &
+ (-cij_kl(3) + cij_kl(8) + cij_kl(16) - cij_kl(19))* sintwophi)* sinthetasq + &
+ (-cij_kl(13)* cosphi + cij_kl(14)* sinphi)* sintheta*sinthetasq
+
+ cij_kll(7) = cij_kl(7)* cosphifour - cij_kl(11)* cosphi*cosphisq* sinphi + &
+ (cij_kl(2) + cij_kl(21))* cosphisq* sinphisq - &
+ cij_kl(6)* cosphi* sinphi*sinphisq + &
+ cij_kl(1)* sinphifour
+
+ cij_kll(8) = 1.d0/2.d0* (2.d0* costhetasq* sinphi* (-cij_kl(15)* cosphi + &
+ cij_kl(3)* sinphi) + 2.d0* cij_kl(2)* cosphifour* sinthetasq + &
+ (2.d0* cij_kl(2)* sinphifour + &
+ (cij_kl(1) + cij_kl(7) - cij_kl(21))* sintwophisq)* sinthetasq + &
+ cij_kl(4)* sinphi*sinphisq* sintwotheta + &
+ cosphi*cosphisq* (2.d0* (-cij_kl(6) + cij_kl(11))* sinphi* sinthetasq + &
+ cij_kl(10)* sintwotheta) + cosphi* sinphisq* (2.d0* (cij_kl(6) - &
+ cij_kl(11))* sinphi* sinthetasq + &
+ (cij_kl(5) - cij_kl(18))* sintwotheta) + &
+ cosphisq* (2.d0* cij_kl(8)* costhetasq + &
+ (cij_kl(9) - cij_kl(20))* sinphi* sintwotheta))
+
+ cij_kll(9) = cij_kl(11)* cosphifour* sintheta - sinphi*sinphisq* (cij_kl(5)* costheta + &
+ cij_kl(6)* sinphi* sintheta) + cosphisq* sinphi* (-(cij_kl(10) + &
+ cij_kl(18))* costheta + &
+ 3.d0* (cij_kl(6) - cij_kl(11))* sinphi* sintheta) + &
+ cosphi* sinphisq* ((cij_kl(4) + cij_kl(20))* costheta + &
+ 2.d0* (-2.d0* cij_kl(1) + cij_kl(2) + cij_kl(21))* sinphi* sintheta) + &
+ cosphi*cosphisq* (cij_kl(9)* costheta - 2.d0* (cij_kl(2) - 2.d0* cij_kl(7) + &
+ cij_kl(21))* sinphi* sintheta)
+
+ cij_kll(10) = 1.d0/4.d0* (4.d0* costwotheta* (cij_kl(10)* cosphi*cosphisq + &
+ (cij_kl(9) - cij_kl(20))* cosphisq* sinphi + &
+ (cij_kl(5) - cij_kl(18))* cosphi* sinphisq + &
+ cij_kl(4)* sinphi*sinphisq) + (cij_kl(1) + 3.d0* cij_kl(2) - &
+ 2.d0* cij_kl(3) + cij_kl(7) - &
+ 2.d0* cij_kl(8) - cij_kl(21) + 2.d0* (cij_kl(3) - cij_kl(8))* costwophi + &
+ (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
+ 2.d0* cij_kl(15)* sintwophi + &
+ (-cij_kl(6) + cij_kl(11))* sinfourphi)* sintwotheta)
+
+ cij_kll(11) = 1.d0/4.d0* (2.d0* costheta* ((cij_kl(6) + cij_kl(11))* costwophi + &
+ (-cij_kl(6) + cij_kl(11))* cosfourphi + &
+ 2.d0* (-cij_kl(1) + cij_kl(7))* sintwophi + &
+ (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(21))* sinfourphi) + &
+ (-(cij_kl(4) + 3.d0* cij_kl(9) + cij_kl(20))* cosphi + &
+ (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi + &
+ (3.d0* cij_kl(5) + cij_kl(10) + cij_kl(18))* sinphi + &
+ (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sintheta)
+
+ cij_kll(12) = 1.d0/16.d0* (cij_kl(16) - 2.d0* cij_kl(16)* cosfourtheta* sinphisq + &
+ costwophi* (-cij_kl(16) + 8.d0* costheta* sinthetasq* ((cij_kl(3) - &
+ cij_kl(8) + cij_kl(19))* costheta + &
+ (cij_kl(5) - cij_kl(10) - cij_kl(18))* cosphi* sintheta)) + &
+ 2.d0* (cij_kl(15) + cij_kl(17))* sintwophi* sintwothetasq + &
+ 2.d0* (8.d0* cij_kl(12)* costhetafour + &
+ 8.d0* cij_kl(14)* cosphi* costheta*costhetasq* sintheta + &
+ 4.d0* cosphi* costheta* (cij_kl(5) + cij_kl(10) + cij_kl(18) + &
+ (cij_kl(4) + cij_kl(20))* sintwophi)* &
+ sintheta*sinthetasq + 8.d0* cij_kl(1)* cosphifour* sinthetafour + &
+ 8.d0* cij_kl(6)* cosphi*cosphisq* sinphi* sinthetafour + &
+ 8.d0* cij_kl(11)* cosphi* sinphi*sinphisq* sinthetafour + &
+ 8.d0* cij_kl(7)* sinphifour* sinthetafour + &
+ 2.d0* cij_kl(2)* sintwophisq* sinthetafour + &
+ 2.d0* cij_kl(21)* sintwophisq* sinthetafour + &
+ 2.d0* cij_kl(13)* sinphi* sintwotheta + &
+ 2.d0* cij_kl(9)* sinphi*sinphisq* sintwotheta + &
+ cij_kl(3)* sintwothetasq + cij_kl(8)* sintwothetasq + &
+ cij_kl(19)* sintwothetasq + cij_kl(13)* sinphi* sinfourtheta - &
+ cij_kl(9)* sinphi*sinphisq* sinfourtheta))
+
+ cij_kll(13) = 1.d0/8.d0* (cosphi* costheta* (cij_kl(4) + 3.d0* cij_kl(9) + &
+ 4.d0* cij_kl(13) + cij_kl(20) - (cij_kl(4) + 3.d0* cij_kl(9) - &
+ 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + 4.d0* (-cij_kl(1) - &
+ cij_kl(3) + cij_kl(7) + cij_kl(8) + cij_kl(16) - cij_kl(19) + &
+ (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
+ cij_kl(19))* costwotheta)* sintwophi* sintheta + &
+ 4.d0* (cij_kl(6) - cij_kl(11))* cosfourphi* sinthetasq*sintheta - &
+ 4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
+ cij_kl(21))* sinfourphi* sinthetasq*sintheta + &
+ costheta* ((-3.d0* cij_kl(5) - cij_kl(10) - 4.d0* cij_kl(14) - &
+ cij_kl(18) + (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + &
+ cij_kl(18))* costwotheta)* sinphi + 6.d0* ((cij_kl(4) - cij_kl(9) + &
+ cij_kl(20))* costhreephi + (-cij_kl(5) + cij_kl(10) + &
+ cij_kl(18))* sinthreephi)* sinthetasq) + costwophi* ((3* cij_kl(6) + &
+ 3.d0* cij_kl(11) + 2.d0* (cij_kl(15) + cij_kl(17)))* sintheta - &
+ (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + &
+ cij_kl(17)))* sinthreetheta))
+
+ cij_kll(14) = 1.d0/4.d0* (2.d0* cij_kl(13)* (costwotheta + cosfourtheta)* sinphi + &
+ 8.d0* costheta*costhetasq* (-2.d0* cij_kl(12) + cij_kl(8)* sinphisq)* sintheta + &
+ 4.d0* (cij_kl(4) + cij_kl(20))* cosphisq* (1.d0 + &
+ 2.d0* costwotheta)* sinphi* sinthetasq + &
+ 4.d0* cij_kl(9)* (1.d0 + 2.d0* costwotheta)* sinphi*sinphisq* sinthetasq + &
+ 16.d0* cij_kl(1)* cosphifour* costheta* sintheta*sinthetasq + &
+ 4.d0* costheta* (-2.d0* cij_kl(8)* sinphisq + 4.d0* cij_kl(7)* sinphifour + &
+ (cij_kl(2) + cij_kl(21))* sintwophisq)* sintheta*sinthetasq + &
+ 4.d0* cosphi*cosphisq* sinthetasq* (cij_kl(5) + 2.d0* cij_kl(5)* costwotheta + &
+ 4.d0* cij_kl(6)* costheta* sinphi* sintheta) + &
+ 2.d0* cosphi* (cosfourtheta* (cij_kl(14) - (cij_kl(10) + cij_kl(18))* sinphisq) + &
+ costwotheta* (cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq) + &
+ 8.d0* cij_kl(11)* costheta* sinphi*sinphisq* sintheta*sinthetasq) + &
+ (cij_kl(3) + cij_kl(16) + cij_kl(19) + (cij_kl(3) - cij_kl(16) + &
+ cij_kl(19))* costwophi + (cij_kl(15) + cij_kl(17))* sintwophi)* sinfourtheta)
+
+ cij_kll(15) = costwophi* costheta* (-cij_kl(17) + (cij_kl(15) + cij_kl(17))* costhetasq) + &
+ 1.d0/16.d0* (-((11.d0* cij_kl(4) + cij_kl(9) + 4.d0* cij_kl(13) - &
+ 5.d0* cij_kl(20))* cosphi + (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
+ (cij_kl(5) + 11.d0* cij_kl(10) + 4.d0* cij_kl(14) - &
+ 5.d0* cij_kl(18))* sinphi + (-cij_kl(5) + cij_kl(10) + &
+ cij_kl(18))* sinthreephi)* sintheta + &
+ 8.d0* costheta* ((-cij_kl(1) - cij_kl(3) + cij_kl(7) + cij_kl(8) - cij_kl(16) +&
+ cij_kl(19) + (cij_kl(1) - cij_kl(3) - &
+ cij_kl(7) + cij_kl(8) + cij_kl(16) - cij_kl(19))* costwotheta)* sintwophi +&
+ ((cij_kl(6) + cij_kl(11))* costwophi + &
+ (cij_kl(6) - cij_kl(11))* cosfourphi + (-cij_kl(1) + cij_kl(2) - cij_kl(7) +&
+ cij_kl(21))* sinfourphi)* sinthetasq) +&
+ ((cij_kl(4) + 3.d0* cij_kl(9) - 4.d0* cij_kl(13) + cij_kl(20))* cosphi + &
+ 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
+ (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))* sinphi + &
+ 3.d0* (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sinthreetheta)
+
+ cij_kll(16) = 1.d0/4.d0*(cij_kl(1) - cij_kl(2) + cij_kl(7) + cij_kl(16) + &
+ cij_kl(19) + cij_kl(21) + 2.d0*(cij_kl(16) - cij_kl(19))*costwophi* costhetasq + &
+ (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(16) + &
+ cij_kl(19) - cij_kl(21))*costwotheta - 2.d0* cij_kl(17)* costhetasq* sintwophi + &
+ 2.d0* ((-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
+ (-cij_kl(6) + cij_kl(11))* sinfourphi)* sinthetasq + ((cij_kl(5) - cij_kl(10) +&
+ cij_kl(18))* cosphi + (-cij_kl(5) + cij_kl(10) + cij_kl(18))* costhreephi +&
+ (-cij_kl(4) + cij_kl(9) + cij_kl(20))* sinphi - &
+ (cij_kl(4) - cij_kl(9) + cij_kl(20))* sinthreephi)* sintwotheta)
+
+ cij_kll(17) = 1.d0/8.d0* (4.d0* costwophi* costheta* (cij_kl(6) + cij_kl(11) - &
+ 2.d0* cij_kl(15) - (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + &
+ cij_kl(17)))* costwotheta) - (2.d0* cosphi* (-3.d0* cij_kl(4) +&
+ cij_kl(9) + 2.d0* cij_kl(13) + cij_kl(20) + (cij_kl(4) - cij_kl(9) + &
+ cij_kl(20))* costwophi) - (cij_kl(5) - 5.d0* cij_kl(10) + &
+ 4.d0* cij_kl(14) + 3.d0* cij_kl(18))* sinphi + (-cij_kl(5) + cij_kl(10) + &
+ cij_kl(18))* sinthreephi)* sintheta + &
+ 8.d0* costheta* ((-cij_kl(1) + cij_kl(3) + cij_kl(7) - cij_kl(8) + &
+ (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
+ cij_kl(19))* costwotheta)* sintwophi + ((cij_kl(6) - cij_kl(11))* cosfourphi + &
+ (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* sinfourphi)* sinthetasq) +&
+ ((cij_kl(4) + 3.d0* cij_kl(9) - 4.d0* cij_kl(13) + cij_kl(20))* cosphi + &
+ 3.d0* (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi - &
+ (3.d0* cij_kl(5) + cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))* sinphi + &
+ 3.d0* (-cij_kl(5) + cij_kl(10) + cij_kl(18))* sinthreephi)* sinthreetheta)
+
+ cij_kll(18) = 1.d0/2.d0* ((cij_kl(5) - cij_kl(10) + cij_kl(18))* cosphi* costwotheta - &
+ (cij_kl(5) - cij_kl(10) - cij_kl(18))* costhreephi* costwotheta - &
+ 2.d0* (cij_kl(4) - cij_kl(9) + &
+ (cij_kl(4) - cij_kl(9) + cij_kl(20))* costwophi)* costwotheta* sinphi + &
+ (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(16) - cij_kl(19) + cij_kl(21) + &
+ (-cij_kl(16) + cij_kl(19))* costwophi + &
+ (-cij_kl(1) + cij_kl(2) - cij_kl(7) + cij_kl(21))* cosfourphi + &
+ cij_kl(17)* sintwophi + &
+ (-cij_kl(6) + cij_kl(11))* sinfourphi)* sintwotheta)
+
+ cij_kll(19) = 1.d0/4.d0* (cij_kl(16) - cij_kl(16)* costwophi + &
+ (-cij_kl(15) + cij_kl(17))* sintwophi + &
+ 4.d0* cij_kl(12)* sintwothetasq + &
+ 2.d0* (2.d0* cij_kl(1)* cosphifour* sintwothetasq + &
+ cosphi*cosphisq* (8.d0* cij_kl(6)* costhetasq* sinphi* sinthetasq + &
+ cij_kl(5)* sinfourtheta) + cosphisq* (-cij_kl(3) + cij_kl(19) + (cij_kl(3) +&
+ cij_kl(19))* cosfourtheta + (cij_kl(4) + cij_kl(20))* sinphi* sinfourtheta) + &
+ sinphi* (cosfourtheta* ((cij_kl(15) + cij_kl(17))* cosphi + &
+ cij_kl(16)* sinphi) + (cij_kl(2) + cij_kl(7) - 2.d0* cij_kl(8) + cij_kl(21) + &
+ (cij_kl(2) - cij_kl(7) + cij_kl(21))* costwophi)* sinphi* sintwothetasq + &
+ (-cij_kl(13) + cij_kl(9)* sinphisq)* sinfourtheta) + &
+ cosphi* (8.d0* cij_kl(11)* costhetasq* sinphi*sinphisq* sinthetasq + &
+ (-cij_kl(14) + (cij_kl(10) + cij_kl(18))* sinphisq)* sinfourtheta)))
+
+ cij_kll(20) = 1.d0/8.d0* (2.d0* cosphi* costheta* (-3.d0* cij_kl(4) - cij_kl(9) + &
+ 4.d0* cij_kl(13) + cij_kl(20) + (cij_kl(4) + 3.d0* cij_kl(9) - &
+ 4.d0* cij_kl(13) + cij_kl(20))* costwotheta) + &
+ (cij_kl(4) - cij_kl(9) + cij_kl(20))* costhreephi* (costheta + &
+ 3.d0* costhreetheta) - &
+ 2.d0* costheta* (-cij_kl(5) - 3.d0* cij_kl(10) + 4.d0* cij_kl(14) + &
+ cij_kl(18) + (3.d0* cij_kl(5) + &
+ cij_kl(10) - 4.d0* cij_kl(14) + cij_kl(18))*costwotheta)* sinphi - &
+ (cij_kl(5) - cij_kl(10) - cij_kl(18))* &
+ (costheta + 3.d0* costhreetheta)* sinthreephi + 8.d0* (cij_kl(6) - &
+ cij_kl(11))* cosfourphi* costhetasq* sintheta - 8.d0* (cij_kl(1) - &
+ cij_kl(3) - cij_kl(7) + cij_kl(8) + &
+ (cij_kl(1) - cij_kl(3) - cij_kl(7) + cij_kl(8) + cij_kl(16) - &
+ cij_kl(19))* costwotheta)* sintwophi* sintheta - &
+ 8.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
+ cij_kl(21))* costhetasq* sinfourphi* sintheta + &
+ 2.d0* costwophi* ((cij_kl(6) + cij_kl(11) - 2.d0* cij_kl(15) + &
+ 2.d0* cij_kl(17))* sintheta + &
+ (cij_kl(6) + cij_kl(11) - 2.d0* (cij_kl(15) + cij_kl(17)))* sinthreetheta))
+
+ cij_kll(21) = 1.d0/4.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) + cij_kl(16) + &
+ cij_kl(19) + cij_kl(21) - 2.d0* (cij_kl(1) - cij_kl(2) + cij_kl(7) - &
+ cij_kl(21))* cosfourphi* costhetasq + &
+ (cij_kl(1) - cij_kl(2) + cij_kl(7) - cij_kl(16) - cij_kl(19) + &
+ cij_kl(21))* costwotheta + &
+ 2.d0* (-cij_kl(6) + cij_kl(11))* costhetasq* sinfourphi - &
+ 2.d0* ((-cij_kl(16) + cij_kl(19))* costwophi + cij_kl(17)* sintwophi)* sinthetasq - &
+ ((cij_kl(5) - cij_kl(10) + cij_kl(18))* cosphi + (-cij_kl(5) + cij_kl(10) +&
+ cij_kl(18))* costhreephi + &
+ (-cij_kl(4) + cij_kl(9) + cij_kl(20))* sinphi - (cij_kl(4) - cij_kl(9) + &
+ cij_kl(20))* sinthreephi)* sintwotheta)
+
+ end subroutine rotate_kernels_dble
More information about the CIG-COMMITS
mailing list