1 SUBROUTINE gstats(KNUM,KSWITCH)
67 INTEGER(KIND=JPIM),
INTENT(IN) :: KNUM
68 INTEGER(KIND=JPIM),
INTENT(IN) :: KSWITCH
70 INTEGER(KIND=JPIM) :: IMOD,ICALL
71 INTEGER(KIND=JPIM) :: IIMEM, IIPAG, IIMEMC
72 INTEGER(KIND=JPIB) :: IMEM, IMEMH, IMEMS, IMEMC, IPAG, INUM
73 INTEGER(KIND=JPIB) :: GETMAXRSS, GETHWM, GETSTK, GETCURHEAP, GETPAG
74 EXTERNAL getmaxrss, gethwm, getstk, getcurheap, getpag
75 REAL(KIND=JPRD) :: ZTIMED,ZCLOCK,ZCLOCK1,ZTIME,ZTCPU,ZVCPU
76 LOGICAL :: LLFIRST=.true.
77 LOGICAL :: LLMFIRST=.true.
78 CHARACTER(LEN=32),
SAVE :: CCDESC_DRHOOK(
jpmaxstat)
79 CHARACTER(LEN=32),
SAVE :: CCDESC_BARR(
jpmaxstat)
80 SAVE iimem, iipag, iimemc
82 INTEGER(KIND=JPIM),
SAVE :: NUM_THREADS
83 REAL(KIND=JPRB),
ALLOCATABLE,
SAVE :: ZHOOK_HANDLE(:)
84 REAL(KIND=JPRB),
SAVE :: ZHOOK_HANDLE_COMMS, ZHOOK_HANDLE_COMMS1
85 REAL(KIND=JPRB),
SAVE :: ZHOOK_HANDLE_TRANS
86 REAL(KIND=JPRB),
SAVE :: ZHOOK_HANDLE_BARR
89 CHARACTER (LEN = 10) :: CLDATEOD,CLZONEOD
90 INTEGER(KIND=JPIM) :: IVALUES(8)
92 INTEGER(KIND=JPIM) :: NMAX_STATS, KULNAM
96 LOGICAL :: LLFINDSUMB=.false.
97 INTEGER(KIND=JPIM),
SAVE :: ISUMBSTACK(10)
98 INTEGER(KIND=JPIM) :: J
99 REAL(KIND=JPRB) :: ZSUM,ZSUMB,ZTOT
100 REAL(KIND=JPRD) :: SBYTES,RBYTES
101 INTEGER(KIND=JPIM) :: NSEND,NRECV
103 #include "user_clock.h" 113 IF(.NOT.
ALLOCATED(zhook_handle))
THEN 115 ALLOCATE(zhook_handle(num_threads))
121 ccdesc_barr(inum)=
'>BAR-'//
ccdesc(inum)(1:21)//
'('//cc//
')' 127 & .OR.
cctype(inum).EQ.
'BAR'.OR.
cctype(inum).EQ.
'OMP')
THEN 128 ccdesc_drhook(inum)=
'>'//
cctype(inum)//
'-'//
ccdesc(inum)(1:21)//
'('//cc//
')' 147 CALL user_clock(ptotal_cp=ztcpu,pvector_cp=zvcpu)
158 IF (
lhook .AND. (kswitch == 0 .OR. kswitch == 1))
THEN 162 IF(
cctype(knum).EQ.
"TRS")
THEN 163 CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_trans)
164 ELSEIF(
cctype(knum).EQ.
'MP-')
THEN 165 CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_comms)
166 ELSEIF(
cctype(knum).EQ.
'MPL'.AND.knum.NE.682)
THEN 167 CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_comms1)
168 ELSEIF(
cctype(knum).EQ.
'OMP')
THEN 169 CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle)
170 ELSEIF(
cctype(knum).EQ.
'BAR')
THEN 171 CALL dr_hook(ccdesc_drhook(knum),kswitch,zhook_handle_barr)
197 ntmem(:,5) = 99999999
206 IF(kswitch == 0.OR. kswitch == 1)
THEN 209 imod = mod(
ncalls(knum),2)
210 IF(.NOT.((kswitch == 0.AND. imod == 1) .OR.&
211 &(kswitch == 2.AND. imod == 1) .OR.&
212 &(kswitch == 3.AND. imod == 1) .OR.&
213 &(kswitch == 1.AND. imod == 0)))
THEN 214 WRITE(
jperr,*)
'KNUM,KSWITCH,IMOD,NCALLS(KNUM)',&
215 &knum,kswitch,imod,
ncalls(knum)
223 IF( kswitch == 0 )
THEN 235 isumbstack(j+1)=isumbstack(j)
238 IF( ztimed > 0.1_jprd .AND. (
timesumb(knum) > 1.0_jprd) )
THEN 239 WRITE(0,
'("GSTATS(SUMB): KNUM=",I4," ZTIMED=",F10.6," TIMESUMB=",F10.6)')&
242 IF( isumbstack(j) > 0 )
THEN 243 WRITE(0,
'("GSTATS(SUMB): ",I4,2X,I8,2X,A40)')isumbstack(j),&
254 WRITE(0,
'("GSTATS(SUMB): OVERLAPPING COUNTERS ",I4,2X,I4)')knum,j
268 imem = getmaxrss()/1024
270 imemh = gethwm()/1024
271 imems = getstk()/1024
274 IF(imem > iimem.OR.ipag > iipag.OR.(
lstats_alloc.AND.(imemc.NE.iimemc)))
THEN 276 WRITE(0,*)
".---------------------------------------------------------" 277 WRITE(0,*)
"| Memory trace details" 278 WRITE(0,*)
"| --------------------" 279 WRITE(0,*)
"| Memory examined at each GSTATS call if NSTATS_MEM>0." 280 WRITE(0,*)
"| Header for each trace line is:" 282 WRITE(0,*)
"| RSS_INC: Increase in RSS_MAX (KB)" 283 WRITE(0,*)
"| RSS_MAX: Maximum real working set so far (KB)" 284 WRITE(0,*)
"| HEAP_MX: High Water Mark for heap so far (KB)" 285 WRITE(0,*)
"| STK: Current Stack usage (KB)" 286 WRITE(0,*)
"| PGS: Page faults w I/O since last trace line" 287 WRITE(0,*)
"| CALL: Number of gstats call" 288 WRITE(0,*)
"| HEAP: Current malloc'd total (KB)" 290 WRITE(0,*)
"| Trace line written for NSTATS_MEM MPI tasks if RSS_MAX" 291 WRITE(0,*)
"| RSS_MAX increases, PGS>0, or HEAP changed" 292 WRITE(0,*)
"| (if LTATS_ALLOC=.TRUE.)" 293 WRITE(0,*)
"`---------------------------------------------------------" 295 WRITE(0,
'(A10,A5,21X,A7,2A8,A7,A5,A5,A8)') &
296 &
"MEMORY ",
" KNUM",
"RSS_INC",
" RSS_MAX",
" HEAP_MX",
" STK", &
297 &
" PGS",
" CALL",
" HEAP" 300 WRITE(0,
'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') &
301 &
"MEMORY bfr",knum,
ccdesc(knum),imem-iimem,imem,imemh,imems, &
302 & ipag-iipag,(
ncalls(knum)+1)/2,imemc
318 ELSEIF( kswitch == 1 )
THEN 336 imem = getmaxrss()/1024
338 imemh = gethwm()/1024
339 imems = getstk()/1024
342 IF(imem > iimem.OR.ipag > iipag.OR.(
lstats_alloc.AND.(imemc.NE.iimemc)))
THEN 343 WRITE(0,
'(A10,I5,1X,A20,1X,I6,2(1X,I7),1X,I6,1X,I4,1X,I4,1X,I7)') &
344 &
"MEMORY aft ",knum,
ccdesc(knum),imem-iimem,imem,imemh,imems, &
345 & ipag-iipag,
ncalls(knum)/2,imemc
350 imem=imem-
ntmem(knum,2)
352 IF(imem >
ntmem(knum,1))
THEN 359 IF( knum >= 500 .AND.
ncalls(knum)/2 > 10 )
THEN 360 IF( ztime >
timesum(knum)/float(
ncalls(knum)/2) + 0.2_jprd )
THEN 364 IF( knum /= 1007 .AND. knum /= 1013 .AND. knum /= 635 .AND. knum /= 636 )
THEN 374 ELSEIF( kswitch == 2 )
THEN 389 ELSEIF( kswitch == 3 )
THEN real(kind=jprb), dimension(:), allocatable unknown_recvbytes
integer(kind=jpim), dimension(0:jpmaxstat, 5) ntmem
integer(kind=jpim) myproc_stats
real(kind=jprd), dimension(:), allocatable time_trace
real(kind=jprd), dimension(1:jpmaxdelays) tdelay_value
subroutine gstats(KNUM, KSWITCH)
character *10, dimension(1:jpmaxdelays) cdelay_time
real(kind=jprd), dimension(0:jpmaxstat) tvcpusum
integer(kind=jpim), dimension(:), allocatable numrecv
real(kind=jprb), dimension(:), allocatable sendbytes
real(kind=jprd), dimension(0:jpmaxstat) timelcall
integer(kind=jpim) nstats_mem
integer(kind=jpim), dimension(1:jpmaxdelays) ndelay_counter
integer(kind=jpim), dimension(:), allocatable unknown_numrecv
real(kind=jprd), dimension(0:jpmaxstat) ttcpulcall
real(kind=jprd), dimension(0:jpmaxstat) thistime
subroutine, public mpl_statson(SENDNUM, SENDBYTES, RECVNUM, RECVBYTES)
integer(kind=jpim), parameter jpmaxstat
real(kind=jprd), dimension(0:jpmaxstat) thisvcpu
integer(kind=jpim) ntrace_stats
integer(kind=jpim), dimension(:), allocatable ncall_trace
subroutine user_clock(PELAPSED_TIME, PELAPSED_TIME_SINCE, PVECTOR_CP, PTOTAL_CP)
real(kind=jprd) time_last_call
integer(kind=jpim) last_knum
real(kind=jprb), dimension(:), allocatable unknown_sendbytes
character *3, dimension(0:jpmaxstat) cctype
character *50, dimension(0:jpmaxstat) ccdesc
integer(kind=jpim) function, public oml_my_thread()
integer(kind=jpim) ndelay_index
integer(kind=jpim), parameter jperr
real(kind=jprd), dimension(0:jpmaxstat) timesumb
real(kind=jprd), dimension(0:jpmaxstat) tvcpulcall
real(kind=jprd), dimension(0:jpmaxstat) ttcpusum
integer(kind=jpim), dimension(:), allocatable unknown_numsend
integer(kind=jpim) ncalls_total
real(kind=jprb), dimension(:), allocatable recvbytes
real(kind=jprd), dimension(0:jpmaxstat) timemax
integer(kind=jpim) last_kswitch
integer(kind=jpim) function, public oml_max_threads()
integer(kind=jpim), dimension(0:jpmaxstat) nswitchval
integer(kind=jpim), dimension(0:jpmaxstat) ncalls
real(kind=jprd), dimension(0:jpmaxstat) timesum
integer(kind=jpim), parameter jpmaxdelays
real(kind=jprd), dimension(0:jpmaxstat) timesqsum
subroutine, public mpl_statsread(SENDNUM, SENDBYTES, RECVNUM, RECVBYTES)
integer(kind=jpim), dimension(:), allocatable numsend
real(kind=jprd), dimension(0:jpmaxstat) thistcpu