BROWSER/doc_surf81/compensated__summation__mod_8F90_source.html

 MODULE compensated_summation_mod
 USE parkind1, ONLY : jprb
 USE yomhook , ONLY : lhook, dr_hook

 !**** COMPENSATED_SUMMATION_MOD
 !     Purpose.
 !     --------
 !     Functions to perform compensated (i.e. accurate) summation.
 !     These functions are used by ORDER_INDEPENDENT_SUMMATION_MOD

 !**   Interface.
 !     ----------

 !      CALL COMPENSATED_SUM     (P,KN,PCORR,PERR)
 !      CALL COMPENSATED_SUM_OMP (P,KN,PCORR,PERR)
 !
 !        These routines transform the elements of the array P, such that:
 !
 !        1)  p(kn)         contains sum(p)
 !
 !        2)  p(1)...p(kn-1) contain the rounding errors that were made
 !                           in calculating sum(p).
 !        3)  The exact sum of the elements of p is unmodified.
 !
 !        On return, pcorr contains the sum of the rounding errors, perr
 !        contains the sum of their absolute values.
 !
 !        After calling this routine, an accurate sum of the elements of p
 !        can be calculated as res=p(n)+pcorr.
 !
 !        CALL COMPENSATED_SUM_OMP is an OpenMP-parallelized version of
 !        CALL COMPENSATED_SUM.


 !      CALL COMPENSATED_DOT_PRODUCT     (P1,P2,   POUT,PCORR,PERR)
 !      CALL COMPENSATED_DOT_PRODUCT     (P1,P2,PW,POUT,PCORR,PERR)
 !      CALL COMPENSATED_DOT_PRODUCT_OMP (P1,P2,   POUT,PCORR,PERR)
 !      CALL COMPENSATED_DOT_PRODUCT_OMP (P1,P2,PW,POUT,PCORR,PERR)
 !
 !        These routines are variations on COMPENSATED_SUM that are
 !        provided simply to reduce memory-access time.

 !      CALL COMPENSATED_DOT_PRODUCT (P1,P2,POUT,PCORR,PERR)
 !
 !      is functionally equivalent to the following:
 !
 !          POUT(:) = P1(:)*P2(:)
 !          CALL CALL COMPENSATED_SUM (POUT,KN,PCORR,PERR)

 !      CALL COMPENSATED_DOT_PRODUCT_OMP (P1,P2,POUT,PCORR,PERR)
 !
 !      is functionally equivalent to the following:
 !
 !          POUT(:) = P1(:)*P2(:)
 !          CALL CALL COMPENSATED_SUM_OMP (POUT,KN,PCORR,PERR)

 !      CALL COMPENSATED_DOT_PRODUCT (P1,P2,PW,POUT,PCORR,PERR)
 !
 !      is functionlly equivalent to the following:
 !
 !          POUT(:) = P1(:)*P2(:)*PW(:)
 !          CALL CALL COMPENSATED_SUM (POUT,KN,PCORR,PERR)

 !      CALL COMPENSATED_DOT_PRODUCT_OMP (P1,P2,PW,POUT,PCORR,PERR)
 !
 !      is functionlly equivalent to the following:
 !
 !          POUT(:) = P1(:)*P2(:)*PW(:)
 !          CALL CALL COMPENSATED_SUM_OMP (POUT,KN,PCORR,PERR)

 !**   Algorithm
 !     ---------

 !  The algorithm is based on Ogita et al. (2005) SIAM J. Sci. Computing,
 !  Vol.26, No.6, pp1955-1988. This is based in turn on an algorithm
 !  by Knuth (1969, seminumerical algorithms).
 !
 !  The basic idea is that we can transform a pair of floating-point
 !  numbers "a" and "b" into a new pair "x" and "y", such that:
 !
 !      x = add(a,b)  and x+y = a+b
 !
 !  where "add" denotes floating-point addition, and "+" denotes
 !  exact, mathematical (i.e. infinite-precision) addition.
 !
 !  Applying this to an array, p, we can transform the array such that:
 !
 !      p(kn) := add(p(1),p(2),...,p(kn))
 !  and p(1) + p(2) + ... + p(kn) is unchanged.


 !     Author.
 !     -------
 !        Mike Fisher  ECMWF

 !     Modifications.
 !     --------------
 !        Original: 2006-20-22

 !     ------------------------------------------------------------------

 USE parkind1  ,ONLY : jpim     ,jprb

 SAVE
 PRIVATE
 PUBLIC compensated_sum, &
      & compensated_sum_omp, &
      & compensated_dot_product, &
      & compensated_dot_product_omp

 INTERFACE compensated_sum
   MODULE PROCEDURE compensated_sum
 END INTERFACE compensated_sum

 INTERFACE compensated_sum_omp
   MODULE PROCEDURE compensated_sum_omp
 END INTERFACE compensated_sum_omp

 INTERFACE compensated_dot_product
   MODULE PROCEDURE compensated_dot_product
 END INTERFACE compensated_dot_product

 INTERFACE compensated_dot_product_omp
   MODULE PROCEDURE compensated_dot_product_omp
 END INTERFACE compensated_dot_product_omp

 CONTAINS

 SUBROUTINE compensated_sum (P,KN,PCORR,PERR)
   IMPLICIT NONE

   INTEGER(KIND=JPIM), INTENT(IN)    :: KN
   REAL(KIND=JPRB),    INTENT(INOUT) :: P(kn)
   REAL(KIND=JPRB),    INTENT(OUT)   :: PCORR, PERR

   REAL(KIND=JPRB) :: ZX,ZZ,ZPSUM
   INTEGER(KIND=JPIM) :: J

   REAL(KIND=JPRB) :: ZHOOK_HANDLE
   IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_SUM',0,zhook_handle)
   pcorr = 0.0
   perr  = 0.0

   zpsum = p(1)
   DO j=2,kn
 !--- It is vital that these 4 lines are not optimized in any way that
 !--- changes the results.
     zx     = p(j) + zpsum
     zz     = zx   - p(j)
     p(j-1) = (p(j)-(zx-zz)) + (zpsum-zz)
     zpsum  = zx
 !--- accumulate the correction and the error
     pcorr = pcorr + p(j-1)
     perr  = perr  + abs(p(j-1))
   ENDDO
   p(kn) = zpsum

 !-----------------------------------------------------------------
 !  Vectorization
 !  -------------
 !
 !  NB: As coded, the above loop may not run very well on a vector
 !      computer. However, any loop ordering that preserves the exact
 !      sum, and ends up with the floating-point sum in the last
 !      element and rounding errors in the rest of the array,
 !      should be OK. For example:
 !
 !      ILEN=KN
 !      DO
 !        IHALF=ILEN/2
 !      !--- no vector dependency
 !        DO J=1,IHALF
 !--- It is vital that these 4 lines are not optimized in any way that
 !--- changes the results.
 !          ZX     = P(KN-IHALF+J) + P(KN-ILEN+J)
 !          ZZ     = ZX   - P(KN-IHALF+J)
 !          P(KN-ILEN+J)  = (P(KN-IHALF+J)-(ZX-ZZ)) + (P(KN-ILEN+J)-ZZ)
 !          P(KN-IHALF+J) = ZX
 !--- accumulate the correction and the error
 !          PCORR = PCORR + P(KN-ILEN+J)
 !          PERR  = PERR  + ABS(P(KN-ILEN+J))
 !        ENDDO
 !        ILEN=ILEN-IHALF
 !        IF (ILEN<=1) EXIT
 !      ENDDO
 !-----------------------------------------------------------------

 IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_SUM',1,zhook_handle)
 END SUBROUTINE compensated_sum

 SUBROUTINE compensated_sum_omp (P,KN,PCORR,PERR)
   USE oml_mod

   IMPLICIT NONE

   INTEGER(KIND=JPIM), INTENT(IN)    :: KN
   REAL(KIND=JPRB),    INTENT(INOUT) :: P(kn)
   REAL(KIND=JPRB),    INTENT(OUT)   :: PCORR, PERR

   REAL(KIND=JPRB), ALLOCATABLE :: ZERRS(:),ZCORS(:)
   REAL(KIND=JPRB) :: ZX,ZZ
   INTEGER(KIND=JPIM) :: J,JCHUNK,ILEN,INCHUNKS,IMINLEN,ILENCHUNK, &
                     & INTHREADS,I,ISTART,IEND

 !--- IMINLEN is a tunable parameter. It represents the vector length
 !--- below which there is too little work to make it worth spawning threads

   REAL(KIND=JPRB) :: ZHOOK_HANDLE
   IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_SUM_OMP',0,zhook_handle)
   iminlen=1000  !-- this value is pure guesswork (Mike Fisher)

   inthreads = oml_max_threads()

   ilenchunk = max(iminlen,(kn+inthreads-1)/inthreads)
   inchunks=1+(kn-1)/ilenchunk

   ALLOCATE(zerrs(inchunks))
   ALLOCATE(zcors(inchunks))

 !--- First, we split the array into chunks, and apply compensated_sum
 !--- to each chunk independently.

 !$OMP PARALLEL DO PRIVATE(ISTART,IEND), SCHEDULE(STATIC), IF(INCHUNKS>1)
   DO jchunk=1,inchunks
     istart = 1+(jchunk-1)*ilenchunk
     iend   = min(jchunk*ilenchunk,kn)
     CALL compensated_sum (p(istart:iend),1+iend-istart, &
                       & zcors(jchunk), zerrs(jchunk))
   ENDDO
 !$OMP END PARALLEL DO

   pcorr = sum(zcors)
   perr  = sum(zerrs)

 !--- The final element of each chunk contains a partial sum. We apply
 !--- compensated summation to the vector of the final elements.

   DO jchunk=2,inchunks
     i = min(jchunk*ilenchunk,kn)
     ilen = i - (jchunk-1)*ilenchunk
 !--- It is vital that these 4 lines are not optimized
     zx        = p(i) + p(i-ilen)
     zz        = zx   - p(i)
     p(i-ilen) = (p(i)-(zx-zz)) + (p(i-ilen)-zz)
     p(i)      = zx
 !--- accumulate the result and its error bound
     pcorr     = pcorr + p(i-ilen)
     perr      = perr  + abs(p(i-ilen))
   ENDDO

   DEALLOCATE(zerrs)
   DEALLOCATE(zcors)
 IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_SUM_OMP',1,zhook_handle)
 END SUBROUTINE compensated_sum_omp

 SUBROUTINE compensated_dot_product (P1,P2,PW,POUT,KN,PCORR,PERR)
   IMPLICIT NONE

   INTEGER(KIND=JPIM), INTENT(IN)           :: KN
   REAL(KIND=JPRB),    INTENT(IN)           :: P1(kn), P2(kn)
   REAL(KIND=JPRB),    INTENT(IN), OPTIONAL :: PW(kn)
   REAL(KIND=JPRB),    INTENT(OUT)          :: POUT(kn)
   REAL(KIND=JPRB),    INTENT(OUT)          :: PCORR, PERR

   REAL(KIND=JPRB) :: ZX,ZZ,ZPJ,ZPSUM
   INTEGER(KIND=JPIM) :: J
 !==================================================================
 !INTEGER(KIND=JPIM) :: ILEN, IHALF
 !==================================================================

   REAL(KIND=JPRB) :: ZHOOK_HANDLE
   IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_DOT_PRODUCT',0,zhook_handle)
   pcorr = 0.0
   perr  = 0.0

   IF (PRESENT(pw)) THEN
     zpsum = p1(1)*p2(1)*pw(1)
   ELSE
     zpsum = p1(1)*p2(1)
   ENDIF

   DO j=2,kn
     IF (PRESENT(pw)) THEN
       zpj = p1(j)*p2(j)*pw(j)
     ELSE
       zpj = p1(j)*p2(j)
     ENDIF
 !--- It is vital that these 4 lines are not optimized in any way that
 !--- changes the results.
     zx     = zpj + zpsum
     zz     = zx   - zpj
     pout(j-1) = (zpj-(zx-zz)) + (zpsum-zz)
     zpsum     = zx
 !--- accumulate the correction and the error
     pcorr = pcorr + pout(j-1)
     perr  = perr  + abs(pout(j-1))
   ENDDO
   pout(kn) = zpsum

 !==================================================================
 !== vectorized version
 !      DO J=1,KN
 !        IF (PRESENT(PW)) THEN
 !          POUT(J) = P1(J)*P2(J)*PW(J)
 !        ELSE
 !          POUT(J) = P1(J)*P2(J)
 !        ENDIF
 !      ENDDO
 !
 !      ILEN=KN
 !      DO
 !        IHALF=ILEN/2
 !      !--- no vector dependency
 !        DO J=1,IHALF
 !!--- It is vital that these 4 lines are not optimized in any way that
 !!--- changes the results.
 !          ZX     = POUT(KN-IHALF+J) + POUT(KN-ILEN+J)
 !          ZZ     = ZX   - POUT(KN-IHALF+J)
 !          POUT(KN-ILEN+J)  = (POUT(KN-IHALF+J)-(ZX-ZZ)) + (POUT(KN-ILEN+J)-ZZ)
 !          POUT(KN-IHALF+J) = ZX
 !!--- accumulate the correction and the error
 !          PCORR = PCORR + POUT(KN-ILEN+J)
 !          PERR  = PERR  + ABS(POUT(KN-ILEN+J))
 !        ENDDO
 !        ILEN=ILEN-IHALF
 !        IF (ILEN<=1) EXIT
 !      ENDDO
 !==================================================================
 IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_DOT_PRODUCT',1,zhook_handle)
 END SUBROUTINE compensated_dot_product

 SUBROUTINE compensated_dot_product_omp (P1,P2,PW,POUT,KN,PCORR,PERR)
   USE oml_mod

   IMPLICIT NONE

   INTEGER(KIND=JPIM), INTENT(IN)           :: KN
   REAL(KIND=JPRB),    INTENT(IN)           :: P1(kn), P2(kn)
   REAL(KIND=JPRB),    INTENT(IN), OPTIONAL :: PW(kn)
   REAL(KIND=JPRB),    INTENT(OUT)          :: POUT(kn)
   REAL(KIND=JPRB),    INTENT(OUT)          :: PCORR, PERR

   REAL(KIND=JPRB), ALLOCATABLE :: ZERRS(:),ZCORS(:)
   REAL(KIND=JPRB) :: ZX,ZZ
   INTEGER(KIND=JPIM) :: J,JCHUNK,ILEN,INCHUNKS,IMINLEN,ILENCHUNK, &
                     & INTHREADS,I,ISTART,IEND

 !--- IMINLEN is a tunable parameter. It represents the vector length
 !--- below which there is too little work to make it worth spawning threads

   REAL(KIND=JPRB) :: ZHOOK_HANDLE
   IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_DOT_PRODUCT_OMP',0,zhook_handle)
   iminlen=1000  !-- this value is pure guesswork (Mike Fisher)

   inthreads = oml_max_threads()

   ilenchunk = max(iminlen,(kn+inthreads-1)/inthreads)
   inchunks=1+(kn-1)/ilenchunk

   ALLOCATE(zerrs(inchunks))
   ALLOCATE(zcors(inchunks))

 !--- First, we split the array into chunks, and apply compensated_sum
 !--- to each chunk independently.

   IF (PRESENT(pw)) THEN
 !$OMP PARALLEL DO PRIVATE(ISTART,IEND), SCHEDULE(STATIC), IF(INCHUNKS>1)
     DO jchunk=1,inchunks
       istart = 1+(jchunk-1)*ilenchunk
       iend   = min(jchunk*ilenchunk,kn)
       CALL compensated_dot_product (p1(istart:iend),p2(istart:iend),   &
                         &           pw(istart:iend),pout(istart:iend), &
                         &           1+iend-istart, &
                         &           zcors(jchunk), zerrs(jchunk))
     ENDDO
 !$OMP END PARALLEL DO
   ELSE
 !$OMP PARALLEL DO PRIVATE(ISTART,IEND), SCHEDULE(STATIC), IF(INCHUNKS>1)
     DO jchunk=1,inchunks
       istart = 1+(jchunk-1)*ilenchunk
       iend   = min(jchunk*ilenchunk,kn)
       CALL compensated_dot_product (p1=p1(istart:iend),p2=p2(istart:iend),  &
                         &           pout=pout(istart:iend),           &
                         &           kn=1+iend-istart, &
                         &           pcorr=zcors(jchunk), perr=zerrs(jchunk))
     ENDDO
 !$OMP END PARALLEL DO
   ENDIF

   pcorr = sum(zcors)
   perr  = sum(zerrs)

 !--- The final element of each chunk contains a partial sum. We apply
 !--- compensated summation to the vector of the final elements.

   DO jchunk=2,inchunks
     i = min(jchunk*ilenchunk,kn)
     ilen = i - (jchunk-1)*ilenchunk
 !--- It is vital that these 4 lines are not optimized
     zx           = pout(i) + pout(i-ilen)
     zz           = zx   - pout(i)
     pout(i-ilen) = (pout(i)-(zx-zz)) + (pout(i-ilen)-zz)
     pout(i)      = zx
 !--- accumulate the result and its error bound
     pcorr     = pcorr + pout(i-ilen)
     perr      = perr  + abs(pout(i-ilen))
   ENDDO

   DEALLOCATE(zerrs)
   DEALLOCATE(zcors)
 IF (lhook) CALL dr_hook('COMPENSATED_SUMMATION_MOD:COMPENSATED_DOT_PRODUCT_OMP',1,zhook_handle)
 END SUBROUTINE compensated_dot_product_omp

 END MODULE compensated_summation_mod
compensated_summation_mod::compensated_sum
Definition: compensated_summation_mod.F90:111

parkind1::jpim
integer, parameter jpim
Definition: parkind1.F90:13

yomhook::dr_hook
Definition: yomhook.F90:20

compensated_summation_mod
Definition: compensated_summation_mod.F90:1

compensated_summation_mod::compensated_dot_product
Definition: compensated_summation_mod.F90:119

parkind1::jprb
integer, parameter jprb
Definition: parkind1.F90:32

compensated_summation_mod::compensated_sum_omp
Definition: compensated_summation_mod.F90:115

sum
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))

yomhook::lhook
logical lhook
Definition: yomhook.F90:15

parkind1
Definition: parkind1.F90:1

compensated_summation_mod::compensated_dot_product_omp
Definition: compensated_summation_mod.F90:123

oml_mod::oml_max_threads
integer(kind=jpim) function, public oml_max_threads()
Definition: oml_mod.F90:256

oml_mod
Definition: oml_mod.F90:1

yomhook
Definition: yomhook.F90:1