[cig-commits] [commit] devel: updates deville routines to work with NGLLX == 5, 6 or 7 (2c2e45f)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Mon Dec 22 11:29:12 PST 2014
Repository : https://github.com/geodynamics/specfem3d
On branch : devel
Link : https://github.com/geodynamics/specfem3d/compare/8409ff46e1df08c2033322539a2dbb07ba86390c...2c2e45f29579c2e5be06c967890aba4311722b93
>---------------------------------------------------------------
commit 2c2e45f29579c2e5be06c967890aba4311722b93
Author: daniel peter <peterda at ethz.ch>
Date: Mon Dec 22 17:45:26 2014 +0100
updates deville routines to work with NGLLX == 5, 6 or 7
>---------------------------------------------------------------
2c2e45f29579c2e5be06c967890aba4311722b93
setup/constants.h.in | 2 +-
src/specfem3D/compute_forces_acoustic_Dev.F90 | 210 ++++++++++-
src/specfem3D/compute_forces_viscoelastic_Dev.F90 | 399 ++++++++++++++++++++-
...compute_forces_viscoelastic_calling_routine.F90 | 8 +-
src/specfem3D/initialize_simulation.f90 | 6 +-
5 files changed, 590 insertions(+), 35 deletions(-)
diff --git a/setup/constants.h.in b/setup/constants.h.in
index 633f5f0..cd9f996 100644
--- a/setup/constants.h.in
+++ b/setup/constants.h.in
@@ -65,7 +65,7 @@
integer, parameter :: NGLLZ = NGLLX
! use inlined products of Deville et al. (2002) to speedup the calculations to compute internal forces
-! Deville routines only implemented for NGLLX = NGLLY = NGLLZ = 5
+! Deville routines only implemented for NGLLX = NGLLY = NGLLZ = 5, 6 or 7
logical, parameter :: USE_DEVILLE_PRODUCTS = .true.
!!-----------------------------------------------------------
diff --git a/src/specfem3D/compute_forces_acoustic_Dev.F90 b/src/specfem3D/compute_forces_acoustic_Dev.F90
index 65330e1..9b2c8d5 100644
--- a/src/specfem3D/compute_forces_acoustic_Dev.F90
+++ b/src/specfem3D/compute_forces_acoustic_Dev.F90
@@ -139,11 +139,11 @@
! pages 386 and 389 and Figure 8.3.1
! computes 1. matrix multiplication for tempx1,..
- call mxm5_single(hprime_xx,m1,chi_elem,tempx1,m2)
+ call mxm_single(hprime_xx,m1,chi_elem,tempx1,m2)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3dmat_single(chi_elem,m1,hprime_xxT,m1,tempx2,NGLLX)
+ call mxm_3dmat_single(chi_elem,m1,hprime_xxT,m1,tempx2,NGLLX)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_single(chi_elem,m2,hprime_xxT,tempx3,m1)
+ call mxm_single(chi_elem,m2,hprime_xxT,tempx3,m1)
if (PML_CONDITIONS .and. (.not. backward_simulation) .and. NSPEC_CPML > 0) then
! do not merge this second line with the first using an ".and." statement
@@ -160,18 +160,18 @@
! pages 386 and 389 and Figure 8.3.1
! computes 1. matrix multiplication for tempx1,..
- call mxm5_single(hprime_xx,m1,chi_elem_old,tempx1_old,m2)
+ call mxm_single(hprime_xx,m1,chi_elem_old,tempx1_old,m2)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3dmat_single(chi_elem_old,m1,hprime_xxT,m1,tempx2_old,NGLLX)
+ call mxm_3dmat_single(chi_elem_old,m1,hprime_xxT,m1,tempx2_old,NGLLX)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_single(chi_elem_old,m2,hprime_xxT,tempx3_old,m1)
+ call mxm_single(chi_elem_old,m2,hprime_xxT,tempx3_old,m1)
! computes 1. matrix multiplication for tempx1,..
- call mxm5_single(hprime_xx,m1,chi_elem_new,tempx1_new,m2)
+ call mxm_single(hprime_xx,m1,chi_elem_new,tempx1_new,m2)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3dmat_single(chi_elem_new,m1,hprime_xxT,m1,tempx2_new,NGLLX)
+ call mxm_3dmat_single(chi_elem_new,m1,hprime_xxT,m1,tempx2_new,NGLLX)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_single(chi_elem_new,m2,hprime_xxT,tempx3_new,m1)
+ call mxm_single(chi_elem_new,m2,hprime_xxT,tempx3_new,m1)
endif ! is_CPML
endif ! PML_CONDITIONS
@@ -253,11 +253,11 @@
! pages 386 and 389 and Figure 8.3.1
! computes 1. matrix multiplication for newtempx1,..
- call mxm5_single(hprimewgll_xxT,m1,tempx1,newtempx1,m2)
+ call mxm_single(hprimewgll_xxT,m1,tempx1,newtempx1,m2)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3dmat_single(tempx2,m1,hprimewgll_xx,m1,newtempx2,NGLLX)
+ call mxm_3dmat_single(tempx2,m1,hprimewgll_xx,m1,newtempx2,NGLLX)
! computes 3. matrix multiplication for newtempx3,..
- call mxm5_single(tempx3,m2,hprimewgll_xx,newtempx3,m1)
+ call mxm_single(tempx3,m2,hprimewgll_xx,newtempx3,m1)
! second double-loop over GLL to compute all the terms
#ifdef FORCE_VECTORIZATION
@@ -312,6 +312,32 @@
!
! please leave the routines here to help compilers inlining the code
+ subroutine mxm_single(A,n1,B,C,n3)
+
+! 2-dimensional arrays e.g. (25,5)/(5,25)
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,NGLLX),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(NGLLX,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+
+ ! matrix-matrix multiplication wrapper
+ if (NGLLX == 5) then
+ call mxm5_single(A,n1,B,C,n3)
+ else if (NGLLX == 6) then
+ call mxm6_single(A,n1,B,C,n3)
+ else if (NGLLX == 7) then
+ call mxm7_single(A,n1,B,C,n3)
+ endif
+
+ end subroutine mxm_single
+
+ !-------------
+
subroutine mxm5_single(A,n1,B,C,n3)
! 2-dimensional arrays (25,5)/(5,25)
@@ -341,9 +367,100 @@
end subroutine mxm5_single
+ !-------------
+
+ subroutine mxm6_single(A,n1,B,C,n3)
+
+! 2-dimensional arrays (36,6)/(6,36)
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(6,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j) &
+ + A(i,6) * B(6,j)
+ enddo
+ enddo
+
+ end subroutine mxm6_single
+
+ !-------------
+
+ subroutine mxm7_single(A,n1,B,C,n3)
+
+! 2-dimensional arrays (49,7)/(7,49)
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(7,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C(i,j) = A(i,1) * B(1,j) &
+ + A(i,2) * B(2,j) &
+ + A(i,3) * B(3,j) &
+ + A(i,4) * B(4,j) &
+ + A(i,5) * B(5,j) &
+ + A(i,6) * B(6,j) &
+ + A(i,7) * B(7,j)
+ enddo
+ enddo
+
+ end subroutine mxm7_single
+
!--------------------------------------------------------------------------------------------
+ subroutine mxm_3dmat_single(A,n1,B,n2,C,n3)
+
+! 3-dimensional arrays e.g. (5,5,5) for A and C
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,NGLLX,n3),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(NGLLX,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+
+ ! matrix-matrix multiplication wrapper
+ if (NGLLX == 5) then
+ call mxm5_3dmat_single(A,n1,B,n2,C,n3)
+ else if (NGLLX == 6) then
+ call mxm6_3dmat_single(A,n1,B,n2,C,n3)
+ else if (NGLLX == 7) then
+ call mxm7_3dmat_single(A,n1,B,n2,C,n3)
+ endif
+
+ end subroutine mxm_3dmat_single
+
+ !-------------
+
subroutine mxm5_3dmat_single(A,n1,B,n2,C,n3)
! 3-dimensional arrays (5,5,5) for A and C
@@ -375,5 +492,74 @@
end subroutine mxm5_3dmat_single
+ !-------------
+
+ subroutine mxm6_3dmat_single(A,n1,B,n2,C,n3)
+
+! 3-dimensional arrays (6,6,6) for A and C
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6,n3),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(6,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do k = 1,n3
+ do j = 1,n2
+ do i = 1,n1
+ C(i,j,k) = A(i,1,k) * B(1,j) &
+ + A(i,2,k) * B(2,j) &
+ + A(i,3,k) * B(3,j) &
+ + A(i,4,k) * B(4,j) &
+ + A(i,5,k) * B(5,j) &
+ + A(i,6,k) * B(6,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm6_3dmat_single
+
+ !-------------
+
+ subroutine mxm7_3dmat_single(A,n1,B,n2,C,n3)
+
+! 3-dimensional arrays (6,6,6) for A and C
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7,n3),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(7,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do k = 1,n3
+ do j = 1,n2
+ do i = 1,n1
+ C(i,j,k) = A(i,1,k) * B(1,j) &
+ + A(i,2,k) * B(2,j) &
+ + A(i,3,k) * B(3,j) &
+ + A(i,4,k) * B(4,j) &
+ + A(i,5,k) * B(5,j) &
+ + A(i,6,k) * B(6,j) &
+ + A(i,7,k) * B(7,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm7_3dmat_single
+
end subroutine compute_forces_acoustic_Dev
diff --git a/src/specfem3D/compute_forces_viscoelastic_Dev.F90 b/src/specfem3D/compute_forces_viscoelastic_Dev.F90
index 5c1a9f0..8f1c286 100644
--- a/src/specfem3D/compute_forces_viscoelastic_Dev.F90
+++ b/src/specfem3D/compute_forces_viscoelastic_Dev.F90
@@ -30,9 +30,9 @@
#include "config.fh"
-! Deville routine for NGLL == 5 (default)
+! Deville routine for NGLL == 5, 6 or 7 (default in constants.h is 5)
- subroutine compute_forces_viscoelastic_Dev_5p(iphase,NSPEC_AB,NGLOB_AB, &
+ subroutine compute_forces_viscoelastic_Dev(iphase,NSPEC_AB,NGLOB_AB, &
displ,veloc,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT, &
@@ -314,7 +314,7 @@
! pages 386 and 389 and Figure 8.3.1
! computes 1. matrix multiplication for tempx1,..
- call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
+ call mxm_3comp_singleA(hprime_xx,m1,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1,m2)
! counts:
! + m1 * m2 * 3 * 9 = 5 * 25 * 3 * 9 = 3375 FLOP
@@ -322,7 +322,7 @@
! + m1 * 5 float = 100 BYTE (hprime_xx once, assuming B3_** in cache)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
+ call mxm_3comp_3dmat_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m1,hprime_xxT,m1,tempx2,tempy2,tempz2,NGLLX)
! counts:
! + m1 * m1 * NGLLX * 3 * 9 = 5 * 5 * 5 * 3 * 9 = 3375 FLOP
@@ -330,7 +330,7 @@
! + m1 * 5 float = 100 BYTE (hprime_xxT once, assuming dummy*_** in cache)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
+ call mxm_3comp_singleB(dummyx_loc,dummyy_loc,dummyz_loc,m2,hprime_xxT,tempx3,tempy3,tempz3,m1)
! counts:
! + m1 * m2 * 3 * 9 = 5 * 25 * 3 * 9 = 3375 FLOP
@@ -342,7 +342,7 @@
! temporary variables used for fixing attenuation in a consistent way
! computes 1. matrix multiplication for tempx1,..
- call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc_att,dummyy_loc_att,dummyz_loc_att, &
+ call mxm_3comp_singleA(hprime_xx,m1,dummyx_loc_att,dummyy_loc_att,dummyz_loc_att, &
tempx1_att,tempy1_att,tempz1_att,m2)
tempx1_att(:,:,:) = tempx1_att(:,:,:) + tempx1(:,:,:)
@@ -350,7 +350,7 @@
tempz1_att(:,:,:) = tempz1_att(:,:,:) + tempz1(:,:,:)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3comp_3dmat_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m1,hprime_xxT,m1, &
+ call mxm_3comp_3dmat_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m1,hprime_xxT,m1, &
tempx2_att,tempy2_att,tempz2_att,NGLLX)
tempx2_att(:,:,:) = tempx2_att(:,:,:) + tempx2(:,:,:)
@@ -358,7 +358,7 @@
tempz2_att(:,:,:) = tempz2_att(:,:,:) + tempz2(:,:,:)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_3comp_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m2,hprime_xxT, &
+ call mxm_3comp_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m2,hprime_xxT, &
tempx3_att,tempy3_att,tempz3_att,m1)
tempx3_att(:,:,:) = tempx3_att(:,:,:) + tempx3(:,:,:)
@@ -370,13 +370,13 @@
! because array is_CPML() is unallocated when PML_CONDITIONS is false
if (is_CPML(ispec)) then
! computes 1. matrix multiplication for tempx1,..
- call mxm5_3comp_singleA(hprime_xx,m1,dummyx_loc_att,dummyy_loc_att,dummyz_loc_att, &
+ call mxm_3comp_singleA(hprime_xx,m1,dummyx_loc_att,dummyy_loc_att,dummyz_loc_att, &
tempx1_att,tempy1_att,tempz1_att,m2)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3comp_3dmat_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m1,hprime_xxT,m1, &
+ call mxm_3comp_3dmat_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m1,hprime_xxT,m1, &
tempx2_att,tempy2_att,tempz2_att,NGLLX)
! computes 3. matrix multiplication for tempx1,..
- call mxm5_3comp_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m2,hprime_xxT, &
+ call mxm_3comp_singleB(dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,m2,hprime_xxT, &
tempx3_att,tempy3_att,tempz3_att,m1)
endif
endif
@@ -826,7 +826,7 @@
! pages 386 and 389 and Figure 8.3.1
! computes 1. matrix multiplication for newtempx1,..
- call mxm5_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2)
+ call mxm_3comp_singleA(hprimewgll_xxT,m1,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1,m2)
! counts:
! + m1 * m2 * 3 * 9 = 3375 FLOP
@@ -834,7 +834,7 @@
! + m1 * 5 float = 100 BYTE (hprimewgll_xxT once, assumes E3*, C1* in cache)
! computes 2. matrix multiplication for tempx2,..
- call mxm5_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX)
+ call mxm_3comp_3dmat_singleB(tempx2,tempy2,tempz2,m1,hprimewgll_xx,m1,newtempx2,newtempy2,newtempz2,NGLLX)
! counts:
! + m1 * m1 * NGLLX * 3 * 9 = 3375 FLOP
@@ -842,7 +842,7 @@
! + m1 * 5 float = 100 BYTE (hprimewgll_xx once, assumes E3*, C1* in cache)
! computes 3. matrix multiplication for newtempx3,..
- call mxm5_3comp_singleB(tempx3,tempy3,tempz3,m2,hprimewgll_xx,newtempx3,newtempy3,newtempz3,m1)
+ call mxm_3comp_singleB(tempx3,tempy3,tempz3,m2,hprimewgll_xx,newtempx3,newtempy3,newtempz3,m1)
! counts:
! + m1 * m2 * 3 * 9 = 3375 FLOP
@@ -978,10 +978,36 @@
!
!--------------------------------------------------------------------------------------------
!
-! note: the matrix-matrix multiplications are used for very small matrices ( 5 x 5 x 5 elements);
+! note: the matrix-matrix multiplications are used for very small matrices ( default 5 x 5 x 5 elements);
! thus, calling external optimized libraries for these multiplications are in general slower
!
! please leave the routines here to help compilers inlining the code
+ subroutine mxm_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays e.g. (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,NGLLX),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(NGLLX,n3),intent(in) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! matrix-matrix multiplication wrapper
+
+ if (NGLLX == 5) then
+ call mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+ else if (NGLLX == 6) then
+ call mxm6_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+ else if (NGLLX == 7) then
+ call mxm7_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+ endif
+
+ end subroutine mxm_3comp_singleA
+
+ !-------------
subroutine mxm5_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
@@ -1024,9 +1050,130 @@
end subroutine mxm5_3comp_singleA
+ !-------------
+
+ subroutine mxm6_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (36,6)/(6,36), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(6,n3),intent(in) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j) &
+ + A(i,6) * B1(6,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j) &
+ + A(i,6) * B2(6,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j) &
+ + A(i,6) * B3(6,j)
+ enddo
+ enddo
+
+ end subroutine mxm6_3comp_singleA
+
+ !-------------
+
+ subroutine mxm7_3comp_singleA(A,n1,B1,B2,B3,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (49,7)/(7,49), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7),intent(in) :: A
+ real(kind=CUSTOM_REAL),dimension(7,n3),intent(in) :: B1,B2,B3
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C1(i,j) = A(i,1) * B1(1,j) &
+ + A(i,2) * B1(2,j) &
+ + A(i,3) * B1(3,j) &
+ + A(i,4) * B1(4,j) &
+ + A(i,5) * B1(5,j) &
+ + A(i,6) * B1(6,j) &
+ + A(i,7) * B1(7,j)
+
+ C2(i,j) = A(i,1) * B2(1,j) &
+ + A(i,2) * B2(2,j) &
+ + A(i,3) * B2(3,j) &
+ + A(i,4) * B2(4,j) &
+ + A(i,5) * B2(5,j) &
+ + A(i,6) * B2(6,j) &
+ + A(i,7) * B2(7,j)
+
+ C3(i,j) = A(i,1) * B3(1,j) &
+ + A(i,2) * B3(2,j) &
+ + A(i,3) * B3(3,j) &
+ + A(i,4) * B3(4,j) &
+ + A(i,5) * B3(5,j) &
+ + A(i,6) * B3(6,j) &
+ + A(i,7) * B3(7,j)
+ enddo
+ enddo
+
+ end subroutine mxm7_3comp_singleA
+
!--------------------------------------------------------------------------------------------
+ subroutine mxm_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays e.g. (25,5)/(5,25), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,NGLLX),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(NGLLX,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! matrix-matrix multiplication wrapper
+ if (NGLLX == 5) then
+ call mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+ else if (NGLLX == 6) then
+ call mxm6_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+ else if (NGLLX == 7) then
+ call mxm7_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+ endif
+
+ end subroutine mxm_3comp_singleB
+
+ !-------------
+
subroutine mxm5_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
! 3 different arrays for x/y/z-components, 2-dimensional arrays (25,5)/(5,25), same B matrix for all 3 component arrays
@@ -1068,9 +1215,130 @@
end subroutine mxm5_3comp_singleB
+ !-------------
+
+ subroutine mxm6_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (36,6)/(6,36), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(6,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j) &
+ + A1(i,6) * B(6,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j) &
+ + A2(i,6) * B(6,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j) &
+ + A3(i,6) * B(6,j)
+ enddo
+ enddo
+
+ end subroutine mxm6_3comp_singleB
+
+ !-------------
+
+ subroutine mxm7_3comp_singleB(A1,A2,A3,n1,B,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 2-dimensional arrays (49,7)/(7,49), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(7,n3),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j
+
+ ! matrix-matrix multiplication
+ do j = 1,n3
+ do i = 1,n1
+ C1(i,j) = A1(i,1) * B(1,j) &
+ + A1(i,2) * B(2,j) &
+ + A1(i,3) * B(3,j) &
+ + A1(i,4) * B(4,j) &
+ + A1(i,5) * B(5,j) &
+ + A1(i,6) * B(6,j) &
+ + A1(i,7) * B(7,j)
+
+ C2(i,j) = A2(i,1) * B(1,j) &
+ + A2(i,2) * B(2,j) &
+ + A2(i,3) * B(3,j) &
+ + A2(i,4) * B(4,j) &
+ + A2(i,5) * B(5,j) &
+ + A2(i,6) * B(6,j) &
+ + A2(i,7) * B(7,j)
+
+ C3(i,j) = A3(i,1) * B(1,j) &
+ + A3(i,2) * B(2,j) &
+ + A3(i,3) * B(3,j) &
+ + A3(i,4) * B(4,j) &
+ + A3(i,5) * B(5,j) &
+ + A3(i,6) * B(6,j) &
+ + A3(i,7) * B(7,j)
+ enddo
+ enddo
+
+ end subroutine mxm7_3comp_singleB
+
!--------------------------------------------------------------------------------------------
+ subroutine mxm_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays e.g. (5,5,5), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL,NGLLX
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,NGLLX,n3),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(NGLLX,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C1,C2,C3
+
+ ! matrix-matrix multiplication wrapper
+ if (NGLLX == 5) then
+ call mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+ else if (NGLLX == 6) then
+ call mxm6_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+ else if (NGLLX == 7) then
+ call mxm7_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+ endif
+
+ end subroutine mxm_3comp_3dmat_singleB
+
+ !-------------
+
subroutine mxm5_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
! 3 different arrays for x/y/z-components, 3-dimensional arrays (5,5,5), same B matrix for all 3 component arrays
@@ -1114,5 +1382,104 @@
end subroutine mxm5_3comp_3dmat_singleB
- end subroutine compute_forces_viscoelastic_Dev_5p
+ !-------------
+
+ subroutine mxm6_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (6,6,6), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,6,n3),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(6,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do k = 1,n3
+ do j = 1,n2
+ do i = 1,n1
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j) &
+ + A1(i,6,k) * B(6,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j) &
+ + A2(i,6,k) * B(6,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j) &
+ + A3(i,6,k) * B(6,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm6_3comp_3dmat_singleB
+
+ !-------------
+
+ subroutine mxm7_3comp_3dmat_singleB(A1,A2,A3,n1,B,n2,C1,C2,C3,n3)
+
+! 3 different arrays for x/y/z-components, 3-dimensional arrays (7,7,7), same B matrix for all 3 component arrays
+
+ use constants,only: CUSTOM_REAL
+
+ implicit none
+
+ integer,intent(in) :: n1,n2,n3
+ real(kind=CUSTOM_REAL),dimension(n1,7,n3),intent(in) :: A1,A2,A3
+ real(kind=CUSTOM_REAL),dimension(7,n2),intent(in) :: B
+ real(kind=CUSTOM_REAL),dimension(n1,n2,n3),intent(out) :: C1,C2,C3
+
+ ! local parameters
+ integer :: i,j,k
+
+ ! matrix-matrix multiplication
+ do k = 1,n3
+ do j = 1,n2
+ do i = 1,n1
+ C1(i,j,k) = A1(i,1,k) * B(1,j) &
+ + A1(i,2,k) * B(2,j) &
+ + A1(i,3,k) * B(3,j) &
+ + A1(i,4,k) * B(4,j) &
+ + A1(i,5,k) * B(5,j) &
+ + A1(i,6,k) * B(6,j) &
+ + A1(i,7,k) * B(7,j)
+
+ C2(i,j,k) = A2(i,1,k) * B(1,j) &
+ + A2(i,2,k) * B(2,j) &
+ + A2(i,3,k) * B(3,j) &
+ + A2(i,4,k) * B(4,j) &
+ + A2(i,5,k) * B(5,j) &
+ + A2(i,6,k) * B(6,j) &
+ + A2(i,7,k) * B(7,j)
+
+ C3(i,j,k) = A3(i,1,k) * B(1,j) &
+ + A3(i,2,k) * B(2,j) &
+ + A3(i,3,k) * B(3,j) &
+ + A3(i,4,k) * B(4,j) &
+ + A3(i,5,k) * B(5,j) &
+ + A3(i,6,k) * B(6,j) &
+ + A3(i,7,k) * B(7,j)
+ enddo
+ enddo
+ enddo
+
+ end subroutine mxm7_3comp_3dmat_singleB
+
+ end subroutine compute_forces_viscoelastic_Dev
diff --git a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90 b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
index dc77cb6..d70552f 100644
--- a/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
+++ b/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
@@ -496,7 +496,7 @@ subroutine compute_forces_viscoelastic_Dev_sim1(iphase)
select case (NGLLX)
- case (5)
+ case (5,6,7)
!----------------------------------------------------------------------------------------------
@@ -536,7 +536,7 @@ subroutine compute_forces_viscoelastic_Dev_sim1(iphase)
! phase_ispec_inner_elastic,&
! num_colors_outer_elastic,num_colors_inner_elastic)
#else
- call compute_forces_viscoelastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
+ call compute_forces_viscoelastic_Dev(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D, &
@@ -590,8 +590,8 @@ subroutine compute_forces_viscoelastic_Dev_sim3(iphase)
select case (NGLLX)
- case (5)
- call compute_forces_viscoelastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+ case (5,6,7)
+ call compute_forces_viscoelastic_Dev(iphase, NSPEC_AB,NGLOB_AB, &
b_displ,b_veloc,b_accel, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
diff --git a/src/specfem3D/initialize_simulation.f90 b/src/specfem3D/initialize_simulation.f90
index 105de19..1ef201e 100644
--- a/src/specfem3D/initialize_simulation.f90
+++ b/src/specfem3D/initialize_simulation.f90
@@ -263,8 +263,10 @@
! check that optimized routines from Deville et al. (2002) can be used
if (USE_DEVILLE_PRODUCTS) then
- if (NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
- stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5'
+ if (NGLLX /= NGLLY .and. NGLLX /= NGLLZ) &
+ stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ'
+ if (NGLLX /= 5 .and. NGLLX /= 6 .and. NGLLX /= 7) &
+ stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5, 6 or 7'
endif
! gravity only on GPU supported
More information about the CIG-COMMITS
mailing list