[cig-commits] [commit] devel: adds flop count to compute_forces_viscoelastic_Dev_5p() routine (d364e44)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Sep 11 10:23:39 PDT 2014
Repository : https://github.com/geodynamics/specfem3d
On branch : devel
Link : https://github.com/geodynamics/specfem3d/compare/3704c888212d30d103ff64ed797e3904be39dd35...e84e7ba6f8959cb80449833335af69c684b5a5ee
>---------------------------------------------------------------
commit d364e4499b2e6dea9aaf5d01e1e2148ae6fc2ad7
Author: daniel peter <peterda at ethz.ch>
Date: Tue Aug 26 10:34:43 2014 +0200
adds flop count to compute_forces_viscoelastic_Dev_5p() routine
>---------------------------------------------------------------
d364e4499b2e6dea9aaf5d01e1e2148ae6fc2ad7
src/specfem3D/compute_forces_viscoelastic_Dev.F90 | 110 +++++++++++++++++++++-
1 file changed, 109 insertions(+), 1 deletion(-)
diff --git a/src/specfem3D/compute_forces_viscoelastic_Dev.F90 b/src/specfem3D/compute_forces_viscoelastic_Dev.F90
index 61d716b..b0f8420 100644
--- a/src/specfem3D/compute_forces_viscoelastic_Dev.F90
+++ b/src/specfem3D/compute_forces_viscoelastic_Dev.F90
@@ -259,6 +259,18 @@
do ispec_p = 1,num_elements
+! arithmetic intensity: ratio of number-of-arithmetic-operations / number-of-bytes-accessed-on-DRAM
+!
+! hand-counts on floating-point operations: counts addition/subtraction/multiplication/division
+! no counts for operations on indices in do-loops (?)
+!
+! counts accesses to global memory, but no shared/cache memory or register loads/stores
+! float/real has 4 bytes
+
+! hand-counts: floating-point operations FLOP, DRAM accesses in BYTES
+! for "simplest kernel" (isotropic without attenuation, dynamic fault, etc.)
+! and for single element, assuming NGLLX == NGLLY == NGLLZ == 5
+
! returns element id from stored element list
ispec = phase_ispec_inner_elastic(ispec_p,iphase)
@@ -271,6 +283,11 @@
endif
endif ! adjoint
+! counts:
+! 0 FLOP
+!
+! 1 float = 4 BYTE
+
! Kelvin Voigt damping: artificial viscosity around dynamic faults
! stores displacment values in local array
@@ -300,6 +317,12 @@
enddo
endif
+! counts:
+! + 0 FLOP
+!
+! + NGLLX * NGLLY * NGLLZ * ( 1 + 3 ) float = 2000 BYTE
+
+
! use first order Taylor expansion of displacement for local storage of stresses
! at this current time step, to fix attenuation in a consistent way
if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
@@ -354,6 +377,11 @@
enddo
enddo
+! counts:
+! + m1 * m2 * 3 * 9 = 5 * 25 * 3 * 9 = 3375 FLOP
+!
+! + m1 * 5 float = 100 BYTE (hprime_xx once, assuming B3_** in cache)
+
if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
! temporary variables used for fixing attenuation in a consistent way
do j=1,m2
@@ -436,6 +464,12 @@
enddo
enddo
+! counts:
+! + m1 * m1 * NGLLX * 3 * 9 = 5 * 5 * 5 * 3 * 9 = 3375 FLOP
+!
+! + m1 * 5 float = 100 BYTE (hprime_xxT once, assuming dummy*_** in cache)
+
+
if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
! temporary variables used for fixing attenuation in a consistent way
do j=1,m1
@@ -521,6 +555,12 @@
enddo
enddo
+! counts:
+! + m1 * m2 * 3 * 9 = 5 * 25 * 3 * 9 = 3375 FLOP
+!
+! + 0 BYTE (assuming A3_**, hprime_xxT in cache)
+
+
if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
! temporary variables used for fixing attenuation in a consistent way
do j=1,m1
@@ -594,6 +634,11 @@
gammazl = gammaz(i,j,k,ispec)
jacobianl = jacobian(i,j,k,ispec)
+! counts:
+! + 0 FLOP
+!
+! + NGLLX * NGLLY * NGLLZ * 10 float = 5000 BYTE (assuming A3_**, hprime_xxT in cache)
+
duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
@@ -606,6 +651,11 @@
duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
+! counts:
+! + NGLLX * NGLLY * NGLLZ * 9 * 5 = 5625 FLOP
+!
+! + 0 BYTE (assuming temp*_** in cache)
+
! save strain on the Moho boundary
if (SAVE_MOHO_MESH ) then
if (is_moho_top(ispec)) then
@@ -639,6 +689,11 @@
duzdxl_plus_duxdzl = duzdxl + duxdzl
duzdyl_plus_duydzl = duzdyl + duydzl
+! counts:
+! + NGLLX * NGLLY * NGLLZ * 6 * 1 = 750 FLOP
+!
+! + 0 BYTE (assuming registers)
+
if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
! temporary variables used for fixing attenuation in a consistent way
duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
@@ -721,6 +776,11 @@
kappal = kappastore(i,j,k,ispec)
mul = mustore(i,j,k,ispec)
+! counts:
+! + 0 FLOP
+!
+! + NGLLX * NGLLY * NGLLZ * 2 float = 1000 BYTE
+
! attenuation
if(ATTENUATION) then
! use unrelaxed parameters if attenuation
@@ -782,6 +842,11 @@
endif ! ANISOTROPY
+! counts:
+! + NGLLX * NGLLY * NGLLZ * 16 = 2000 FLOP
+!
+! + 0 BYTE
+
! subtract memory variables if attenuation
if(ATTENUATION) then
! way 1
@@ -863,7 +928,7 @@
enddo
endif
- endif
+ endif ! ATTENUATION
! define symmetric components of sigma
sigma_yx = sigma_xy
@@ -883,6 +948,12 @@
tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+! counts:
+! + NGLLX * NGLLY * NGLLZ * 9 * 6 = 6750 FLOP
+!
+! + NGLLX * NGLLY * NGLLZ * 9 float = 4500 BYTE (temp* stores)
+
enddo
enddo
enddo
@@ -931,6 +1002,11 @@
enddo
enddo
+! counts:
+! + m1 * m2 * 3 * 9 = 3375 FLOP
+!
+! + m1 * 5 float = 100 BYTE (hprimewgll_xxT once, assumes E3*, C1* in cache)
+
! call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
! hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
do i=1,m1
@@ -956,6 +1032,11 @@
enddo
enddo
+! counts:
+! + m1 * m1 * NGLLX * 3 * 9 = 3375 FLOP
+!
+! + m1 * 5 float = 100 BYTE (hprimewgll_xx once, assumes E3*, C1* in cache)
+
! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
do j=1,m1
do i=1,m2
@@ -977,6 +1058,11 @@
enddo
enddo
+! counts:
+! + m1 * m2 * 3 * 9 = 3375 FLOP
+!
+! + 0 BYTE (assumes E1*, C1*, hprime* in cache)
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -994,6 +1080,13 @@
accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
+
+! counts:
+! + NGLLX * NGLLY * NGLLZ * 3 * 6 = 2250 FLOP
+!
+! + NGLLX * NGLLY * 3 float = 300 BYTE (wgllwgll once)
+! + NGLLX * NGLLY * NGLLZ * (1 + 3 ) float = 2000 BYTE (ibool & accel, assumes newtemp* in cache)
+
! update memory variables based upon the Runge-Kutta scheme
if(ATTENUATION) then
@@ -1079,6 +1172,21 @@
epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
endif
+! counts:
+! + 0 FLOP
+!
+! + 0 BYTE
+
+! counts:
+! -----------------
+! total of: 37625 FLOP per element
+!
+! 15204 BYTE DRAM accesses per block
+!
+! arithmetic intensity: 37625 FLOP / 15204 BYTES ~ 2.5 FLOP/BYTE
+! -----------------
+
+
enddo ! spectral element loop
end subroutine compute_forces_viscoelastic_Dev_5p
More information about the CIG-COMMITS
mailing list