SURFEX v8.1
General documentation of Surfex
gstats_print.F90
Go to the documentation of this file.
1 SUBROUTINE gstats_print(KULOUT,PAVEAVE,KLEN)
2 
3 !**** *GSTATS_PRINT* - print timing statistics
4 
5 ! PURPOSE.
6 ! --------
7 ! To print out timings gathered by GSTATS
8 
9 
10 !** INTERFACE.
11 ! ----------
12 ! *CALL* *GSTATS_PRINT*
13 
14 ! EXPLICIT ARGUMENTS None
15 ! --------------------
16 
17 
18 ! IMPLICIT ARGUMENTS
19 ! --------------------
20 ! Module YOMSTATS
21 
22 ! METHOD.
23 ! -------
24 
25 
26 ! EXTERNALS.
27 ! ----------
28 
29 ! REFERENCE.
30 ! ----------
31 ! ECMWF Research Department documentation of the IFS
32 
33 ! AUTHOR.
34 ! -------
35 ! Mats Hamrud ECMWF
36 
37 ! MODIFICATIONS.
38 ! --------------
39 ! ORIGINAL : 98-11-15
40 ! D.Salmond: 99-09-21 : Timer for SLCOMM2
41 ! G.Mozdzynski 05-09-25 : fix master ncalls overwrite for nproc>1
42 ! C.Larsson 8-May-2006 : Added xml file output
43 ! G.Mozdzynski 16-Oct-2007 : xml file output under switch LXML_STATS
44 ! P.Towers 11-May-2011 : mpl comms statistics output
45 ! F. Vana 05-Mar-2015 Support for single precision
46 ! G. Mozdzynski 18-Aug-2015 Avoid confusion, procs are tasks
47 ! ------------------------------------------------------------------
48 USE parkind1 ,ONLY : jprd, jpim
49 
50 USE yomgstats
51 USE yommpi , ONLY : mrealt
52 #ifdef SFX_MPI
53 USE mpl_module
54 USE mpl_stats_mod
55 #endif
56 
57 IMPLICIT NONE
58 
59 INTEGER(KIND=JPIM) :: KULOUT,KLEN
60 REAL(KIND=JPRD) :: PAVEAVE(0:klen)
61 CHARACTER*7 CLACTION(0:3)
62 CHARACTER(LEN=JPMAXDELAYS*10) CLTEMP
63 INTEGER(KIND=JPIM),PARAMETER :: JPARRAYS=8
64 REAL(KIND=JPRD) :: ZREABUF(jparrays*(jpmaxstat+1))
65 REAL(KIND=JPRD) :: ZAVEAVE(0:jpmaxstat),ZAVEMAX(0:jpmaxstat),ZTIMELCALL(0:jpmaxstat),&
66  &ZTHISTIME(0:JPMAXSTAT),ZFRACMAX(0:JPMAXSTAT),&
67  &ZSUMMAX(0:JPMAXSTAT),ZSUMTOT(0:JPMAXSTAT)
68 REAL(KIND=JPRD) :: ZT_SUM,ZT_SUM2,ZT_SUM3,ZT_SUMIO,ZT_SUM4,ZT_SUM5,ZT_SUMB
69 REAL(KIND=JPRD) :: ZDELAY,ZDELAY_MAX
70 
71 REAL(KIND=JPRD) :: ZMPL(nproc_stats)
72 REAL(KIND=JPRD) :: ZBAR(nproc_stats)
73 REAL(KIND=JPRD) :: ZGBR(nproc_stats)
74 REAL(KIND=JPRD) :: ZGB2(nproc_stats)
75 REAL(KIND=JPRD) :: ZOMP(nproc_stats)
76 REAL(KIND=JPRD) :: ZIO (nproc_stats)
77 REAL(KIND=JPRD) :: ZSER(nproc_stats)
78 REAL(KIND=JPRD) :: ZMXD(nproc_stats)
79 
80 INTEGER(KIND=JPIM) :: ICALLSX(0:jpmaxstat)
81 
82 ! LOCAL INTEGER SCALARS
83 INTEGER(KIND=JPIM) :: ICALLS, ILBUF, ILSEND, ILRECV, &
84  &ISEND, ITAG, JJ, JNUM, JROC, JCALL, ICALLER,IACTION
85 INTEGER(KIND=JPIM) :: IMEM, INUM, JMEM
86 INTEGER(KIND=JPIM) :: JDELAY, IDELAY
87 INTEGER(KIND=JPIM) :: NSEND,NRECV
88 
89 ! LOCAL REAL SCALARS
90 REAL(KIND=JPRD) :: ZAVE, ZAVETCPU, ZAVEVCPU, ZCOMTIM, ZDETAIL,&
91  &ZFRAC, ZMAX, ZMEAN, ZSTDDEV, ZSUM, ZSUMB, &
92  &ZTOTAL, ZTOTCPU, ZTOTUNBAL, ZTOTVCPU, &
93  &ZUNBAL, ZMEANT, ZMAXT
94 
95 REAL(KIND=JPRD) :: SBYTES,RBYTES,SENDRATE,RECVRATE
96 REAL(KIND=JPRD) :: AVGSENDLEN,AVGRECVLEN
97 REAL(KIND=JPRD) :: MAXCOMMTIME(501:1000)
98 REAL(KIND=JPRD) :: TOTSENDBYTES(501:1000)
99 REAL(KIND=JPRD) :: TOTRECVBYTES(501:1000)
100 
101 INTEGER(KIND=JPIM) :: IXMLLUN
102 
103 ! ------------------------------------------------------------------
104 
105 ! write(0,*) "JPMAXSTAT,NPRNT_STATS =",JPMAXSTAT,NPRNT_STATS
106 
107 ilbuf = jparrays*(jpmaxstat+1)
108 ilrecv = 500*4
109 zaveave(:) = 0.0_jprd
110 zavemax(:) = 0.0_jprd
111 zfracmax(:)= 0.0_jprd
112 zsummax(:)= 0.0_jprd
113 zsumtot(:)= 0.0_jprd
114 
115 ! OPEN GSTATS.XML for xml statistics
116 IF(lxml_stats)THEN
117  ixmllun=40
118  OPEN (unit=ixmllun, file='gstats.xml',action='write')
119  WRITE(ixmllun,'(A)')'<?xml version="1.0" encoding="UTF-8"?>'
120  WRITE(ixmllun,'(A)')'<gstats '
121  WRITE(ixmllun,'(A)')' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"&
122  & xsi:schemaLocation="http://www.ecmwf.int/services/prepifs/gstats ./gstats.xsd">'
123 ENDIF
124 
125 WRITE(kulout,'(A)')'===-=== START OF TIMING STATISTICS ===-==='
126 IF(lsyncstats.AND.nproc_stats>1) THEN
127  WRITE(kulout,'(A)')'START OF TIMINGS SYNCRONIZED'
128 ENDIF
129 IF(ldetailed_stats) THEN
130  lstats_comms=.true.
131  lstats_omp=.true.
132 ENDIF
133 IF(lbarrier_stats2) THEN
134  DO jnum=1,jbmaxbase
135  IF(nbar_ptr(jnum) > 0) THEN
136  inum=nbar_ptr(jnum)
137  ccdesc(inum)=ccdesc(jnum)
138  cctype(inum)='GB2'
139  ENDIF
140  ENDDO
141 ENDIF
142 IF (lstats .AND. myproc_stats /= 1) THEN
143  jj = 1
144  DO jnum=0,jpmaxstat
145  zreabuf(jj ) = timesum(jnum)
146  zreabuf(jj+1) = timesqsum(jnum)
147  zreabuf(jj+2) = timemax(jnum)
148  zreabuf(jj+3) = timesumb(jnum)
149  zreabuf(jj+4) = timelcall(jnum)
150  zreabuf(jj+5) = ncalls(jnum)
151  zreabuf(jj+6) = ttcpusum(jnum)
152  zreabuf(jj+7) = tvcpusum(jnum)
153  jj = jj+jparrays
154  ENDDO
155  ilsend = ilbuf
156  isend =1
157  itag = jptagstat
158 #ifdef SFX_MPI
159  CALL mpl_send(zreabuf(1:ilsend),kdest=nprcids_stats(isend), &
160  & ktag=itag,cdstring='GSTATS_PRINT:')
161 
162  IF(lstats_mpl) THEN
163  jj=1
164  DO jnum=501,1000
165  zreabuf(jj ) = numsend(jnum)
166  zreabuf(jj+1) = sendbytes(jnum)
167  zreabuf(jj+2) = numrecv(jnum)
168  zreabuf(jj+3) = recvbytes(jnum)
169  jj=jj+4
170  ENDDO
171 
172  ilsend = jj-1
173  itag = jptagstat + 1
174 
175  CALL mpl_send(zreabuf(1:ilsend),kdest=nprcids_stats(isend), &
176  & ktag=itag,cdstring='GSTATS_PRINT:')
177 
178  ENDIF
179 #endif
180 ELSEIF(lstats) THEN
181 #ifdef SFX_MPI
182  IF(lstats_mpl) THEN
183  DO jnum=501,1000
184  maxcommtime(jnum)=0.0_jprd
185  totsendbytes(jnum)=0.0_jprd
186  totrecvbytes(jnum)=0.0_jprd
187  ENDDO
188  ENDIF
189  DO jroc=1,nproc_stats
190  IF (jroc /= 1) THEN
191  itag = jptagstat
192  CALL mpl_recv(zreabuf(1:ilbuf),ksource=nprcids_stats(jroc), &
193  & ktag=itag,cdstring='GSTATS_PRINT:')
194  jj = 1
195  DO jnum=0,jpmaxstat
196  timesum(jnum) = zreabuf(jj )
197  timesqsum(jnum) = zreabuf(jj+1)
198  timemax(jnum) = zreabuf(jj+2)
199  timesumb(jnum) = zreabuf(jj+3)
200  timelcall(jnum) = zreabuf(jj+4)
201  icallsx(jnum) = nint(zreabuf(jj+5))
202  ttcpusum(jnum) = zreabuf(jj+6)
203  tvcpusum(jnum) = zreabuf(jj+7)
204  jj = jj+jparrays
205  ENDDO
206 
207  IF(lstats_mpl) THEN
208  itag = jptagstat+1
209  CALL mpl_recv(zreabuf(1:ilrecv),ksource=nprcids_stats(jroc), &
210  & ktag=itag,cdstring='GSTATS_PRINT:')
211  jj = 1
212  DO jnum=501,1000
213  numsend(jnum) = nint(zreabuf(jj))
214  sendbytes(jnum) = zreabuf(jj+1)
215  numrecv(jnum) = nint(zreabuf(jj+2))
216  recvbytes(jnum) = zreabuf(jj+3)
217  jj=jj+4
218  ENDDO
219  ENDIF
220 
221  ELSE
222  icallsx(:)=ncalls(:)
223 
224  ENDIF
225  IF (jroc == 1) THEN
226  ztotal=timesum(0)
227  ztotcpu = ttcpusum(0)
228  ztotvcpu = tvcpusum(0)
229  ENDIF
230  IF(.NOT. lstatscpu) THEN
231  ttcpusum(1:jpmaxstat) = -0.0_jprd
232  tvcpusum(1:jpmaxstat) = -0.0_jprd
233  ENDIF
234  zt_sum=0.0_jprd
235  zt_sum2=0.0_jprd
236  zt_sum3=0.0_jprd
237  zt_sum4=0.0_jprd
238  zt_sum5=0.0_jprd
239  zt_sumio=0.0_jprd
240  zt_sumb=0.0_jprd
241  IF( ldetailed_stats .AND. jroc <= nprnt_stats ) THEN
242  WRITE(kulout,'(A,I4)') 'TIMING STATISTICS:TASK=',jroc
243  IF(lxml_stats)THEN
244  WRITE(ixmllun,'(A,I4,A)')'<timing task="',jroc,'">'
245  ENDIF
246  IF(nproc_stats > 1) THEN
247  WRITE(kulout,'(A,F6.1,A)')'STARTUP COST ',&
248  &time_start(jroc),' SECONDS'
249  ENDIF
250  WRITE(kulout,'(A)')&
251  &' NUM ROUTINE '//&
252  &'CALLS SUM(s) AVE(ms) CPUAVE(ms) VAVE(ms) '//&
253  &'STDDEV(ms) MAX(ms) '//&
254  &'SUMB(s) FRAC(%)'
255  ENDIF
256  DO jnum=0,jpmaxstat
257  IF(icallsx(jnum) > 1) THEN
258  icalls = icallsx(jnum)/2
259  zsum = timesum(jnum)
260  zave = timesum(jnum)/icalls*1000._jprd
261  zmax = timemax(jnum)*1000._jprd
262  zsumb = timesumb(jnum)
263  zfrac = timesum(jnum)/ztotal*100.0_jprd
264  zfracmax(jnum)=max(zfracmax(jnum),zfrac)
265  zsummax(jnum)=max(zsummax(jnum),timesum(jnum))
266  zsumtot(jnum)=zsumtot(jnum)+zsum
267  zaveave(jnum)=zaveave(jnum)+zave
268  zavemax(jnum)=max(zavemax(jnum),zave)
269  zavetcpu = ttcpusum(jnum)/icalls*1000._jprd
270  zavevcpu = tvcpusum(jnum)/icalls*1000._jprd
271  IF(icalls > 1 ) THEN
272  zstddev = 1000._jprd*&
273  &sqrt(max((timesqsum(jnum)-timesum(jnum)**2/icalls)&
274  &/(icalls-1),0.0_jprd))
275  ELSE
276  zstddev = 0.0_jprd
277  ENDIF
278  IF(cctype(jnum).EQ.'MPL') THEN
279  zt_sum=zt_sum+zsum
280  zt_sumb=zt_sumb+zsumb
281  ENDIF
282  IF(cctype(jnum).EQ.'BAR' .OR. cctype(jnum).EQ.'GBR' .OR. cctype(jnum).EQ.'GB2' ) THEN
283  zt_sum4=zt_sum4+zsum
284  zt_sumb=zt_sumb+zsumb
285  ENDIF
286  IF(cctype(jnum).EQ.'OMP') THEN
287  zt_sum2=zt_sum2+zsum
288  zt_sumb=zt_sumb+zsumb
289  ENDIF
290  IF(cctype(jnum).EQ.'IO-') THEN
291  zt_sumio=zt_sumio+zsum
292  zt_sumb=zt_sumb+zsumb
293  ENDIF
294  IF(cctype(jnum).EQ.'SER') THEN
295  zt_sum3=zt_sum3+zsum
296  zt_sumb=zt_sumb+zsumb
297  ENDIF
298  IF(cctype(jnum).EQ.'MXD') THEN
299  zt_sum5=zt_sum5+zsum
300  zt_sumb=zt_sumb+zsumb
301  ENDIF
302  IF( ldetailed_stats .AND. jroc <= nprnt_stats ) THEN
303 ! IF(JNUM < 501 .OR. LSTATS_COMMS .OR. LSTATS_OMP) THEN
304  WRITE(kulout,'(I4,1X,A3,1X,A40,1X,I5,6(1X,F9.1),1X,F5.1,1X,F8.2)')&
305  &jnum,cctype(jnum),ccdesc(jnum),icalls,zsum,zave,zavetcpu,zavevcpu,&
306  &zstddev,zmax,zsumb,zfrac
307  IF(lxml_stats)THEN
308  WRITE(ixmllun,&
309  & '(A,I4,A,//,A,A40,A,//,A,I5,A,//,6(A,F9.1,A,//),A,F5.1,A,//,A,F8.2,A,//,A)')&
310  & '<num id="',jnum,'">',&
311  & '<description>',ccdesc(jnum),'</description>',&
312  & '<sum unit="seconds">',icalls,'</sum>',&
313  & '<sumcall unit="ms">',zsum,'</sumcall>',&
314  & '<average unit="ms">',zave,'</average>',&
315  & '<cpuaverage unit="ms">',zavetcpu,'</cpuaverage>',&
316  & '<vave unit="ms">',zavevcpu,'</vave>',&
317  & '<stddev unit="ms">',zstddev,'</stddev>',&
318  & '<max unit="ms">',zmax,'</max>',&
319  & '<sumb unit="seconds">',zsumb,'</sumb>',&
320  & '<frac unit="percent">',zfrac,'</frac>',&
321  & '</num>'
322  ENDIF
323 ! ENDIF
324  ENDIF
325  ENDIF
326  ENDDO
327 ! ZCOMTIM = SUM(TIMESUM(51:99))+SUM(TIMESUM(151:199))
328 ! ZDETAIL = SUM(TIMESUM(:))-TIMESUM(0)-TIMESUM(1)-SUM(TIMESUM(20:23))
329  IF( ldetailed_stats .AND. jroc <= nprnt_stats ) THEN
330  WRITE(kulout,*) ''
331  WRITE(kulout,'((A,2F8.1))')&
332  &'CPU-TIME AND VECTOR CPU-TIME AS PERCENT OF TOTAL ',&
333  &ttcpusum(0)/timesum(0)*100.0_jprd,tvcpusum(0)/timesum(0)*100.0_jprd
334  IF(lxml_stats)THEN
335  WRITE(ixmllun,'((A,F8.1,A,//,A,F8.1,A))')&
336  &'<cpufraction>',&
337  &ttcpusum(0)/timesum(0)*100.0_jprd,&
338  &'</cpufraction>',&
339  &'<cpuvectorfraction>',&
340  &tvcpusum(0)/timesum(0)*100.0_jprd,&
341  &'</cpuvectorfraction>'
342  ENDIF
343 
344 
345  IF(zt_sum > 0.0_jprd) THEN
346  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN COMMUNICATIONS = '&
347  & ,zt_sum, ' SECONDS ',zt_sum/timesum(0)*100.0_jprd,&
348  &' PERCENT OF TOTAL'
349  IF(lxml_stats)THEN
350  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)')'<zcom unit="seconds">',&
351  &zt_sum,'</zcom>',&
352  &'<fraczcom unit="percent">',zt_sum/timesum(0)*100.0_jprd,&
353  &'</fraczcom>'
354  ENDIF
355  ENDIF
356  IF(zt_sum2 > 0.0_jprd) THEN
357  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN PARALLEL REGIONS = '&
358  & ,zt_sum2, ' SECONDS ',zt_sum2/timesum(0)*100.0_jprd,&
359  &' PERCENT OF TOTAL'
360  IF(lxml_stats)THEN
361  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)') &
362  &'<parallelztime unit="seconds">',&
363  &zt_sum2, '</parallelztime>',&
364  &'<fracparallelztime unit="percent">',zt_sum2/timesum(0)*100.0_jprd,&
365  &'</fracparallelztime>'
366  ENDIF
367  ENDIF
368  IF(zt_sumio > 0.0_jprd) THEN
369  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN I/O SECTIONS = '&
370  & ,zt_sumio, ' SECONDS ',zt_sumio/timesum(0)*100.0_jprd,&
371  &' PERCENT OF TOTAL'
372  IF(lxml_stats)THEN
373  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)')'<ioztime unit="seconds">',&
374  &zt_sumio, '</ioztime>',&
375  &'<fracioztime unit="percent">',zt_sumio/timesum(0)*100.0_jprd,&
376  &'</fracioztime>'
377  ENDIF
378  ENDIF
379  IF(zt_sum3 > 0.0_jprd) THEN
380  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN SERIAL SECTIONS = '&
381  & ,zt_sum3, ' SECONDS ',zt_sum3/timesum(0)*100.0_jprd,&
382  &' PERCENT OF TOTAL'
383  IF(lxml_stats)THEN
384  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)')'<serialztime unit="seconds">',&
385  & zt_sum3,'</serialztime>',&
386  &'<fracserialztime unit="percent">',&
387  &zt_sum3/timesum(0)*100.0_jprd,&
388  &'</fracserialztime>'
389  ENDIF
390  ENDIF
391  IF(zt_sum4 > 0.0_jprd) THEN
392  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN BARRIERS = '&
393  & ,zt_sum4, ' SECONDS ',zt_sum4/timesum(0)*100.0_jprd,&
394  &' PERCENT OF TOTAL'
395  IF(lxml_stats)THEN
396  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)')&
397  &'<barrierztime unit="seconds">',&
398  &zt_sum4,'</barrierztime>',&
399  & '<fracbarrierztime unit="percent">',&
400  & zt_sum4/timesum(0)*100.0_jprd,'</fracbarrierztime>'
401  ENDIF
402  ENDIF
403  IF(zt_sum5 > 0.0_jprd) THEN
404  WRITE(kulout,'(A,F10.1,A,F6.2,A)')'SUMMED TIME IN MIXED SECTIONS = '&
405  & ,zt_sum5, ' SECONDS ',zt_sum5/timesum(0)*100.0_jprd,&
406  &' PERCENT OF TOTAL'
407  WRITE(ixmllun,'(A,F10.1,A,//,A,F6.2,A)')&
408  &'<mixedztime unit="seconds">',&
409  &zt_sum5,'</mixedztime>',&
410  & '<fracmixedztime unit="percent">',&
411  & zt_sum5/timesum(0)*100.0_jprd,'</fracmixedztime>'
412  ENDIF
413  IF(lstats_comms.AND.lstats_omp)THEN
414  WRITE(kulout,'(A,F8.2)')'FRACTION OF TOTAL TIME ACCOUNTED FOR ',&
415  & (zt_sum+zt_sum2+zt_sumio+zt_sum3+zt_sum4+zt_sum5)/timesum(0)*100.0_jprd
416  WRITE(kulout,'(A,F8.2)')'FRACTION OF TOTAL TIME ACCOUNTED FOR INCLUDING SUMB ',&
417  & (zt_sum+zt_sum2+zt_sumio+zt_sum3+zt_sum4+zt_sum5+zt_sumb)/timesum(0)*100.0_jprd
418  WRITE(kulout,'(" ")')
419  IF(lxml_stats)THEN
420  WRITE(ixmllun,'(A,F8.2,A)')'<fractotal unit="percent">',&
421  &(zt_sum+zt_sum2+zt_sumio+zt_sum3+zt_sum4+zt_sum5)/timesum(0)*100.0_jprd,&
422  &'</fractotal>'
423  ENDIF
424  ENDIF
425  ENDIF
426  IF( ldetailed_stats .AND. jroc < 3 ) THEN
427  IF(lxml_stats)THEN
428  WRITE(ixmllun,'(A)')'</timing>'
429  ENDIF
430  ENDIF
431  IF( lstats_mpl .AND. jroc <= nprnt_stats ) THEN
432  WRITE(kulout,'(/,A,I4,/)') 'COMMUNICATIONS STATISTICS:TASK=',jroc
433  WRITE(kulout,'(A)') &
434  &' NUM ROUTINE '//&
435  &' SUM(s) SENDS AVG(kb) TOTAL(MB) MB/s '//&
436  &' RECVS AVG(kb) TOTAL(MB) MB/s '
437 
438  DO jnum=501,1000
439  IF(numsend(jnum) /= 0 .OR. numrecv(jnum) /= 0 ) THEN
440  sendrate=sendbytes(jnum)*1.e-6/timesum(jnum)
441  recvrate=recvbytes(jnum)*1.e-6/timesum(jnum)
442  IF(numsend(jnum) /= 0) THEN
443  avgsendlen=sendbytes(jnum)*1.e-3/numsend(jnum)
444  ELSE
445  avgsendlen=0.0_jprd
446  ENDIF
447  IF(numrecv(jnum) /= 0) THEN
448  avgrecvlen=recvbytes(jnum)*1.e-3/numrecv(jnum)
449  ELSE
450  avgsendlen=0.0_jprd
451  ENDIF
452  IF(numrecv(jnum) /= 0) THEN
453  avgrecvlen=recvbytes(jnum)*1.e-3/numrecv(jnum)
454  ELSE
455  avgrecvlen=0.0_jprd
456  ENDIF
457  WRITE(kulout,'(I6,1X,A40,f6.1,2(I8,3F8.1))') &
458  & jnum,ccdesc(jnum),timesum(jnum),&
459  & numsend(jnum),avgsendlen,sendbytes(jnum)*1.e-6, sendrate, &
460  & numrecv(jnum),avgrecvlen,recvbytes(jnum)*1.e-6, recvrate
461  ENDIF
462  ENDDO
463  WRITE(kulout,'(/,A,I4,/)') 'UNKNOWN COMMUNICATIONS STATISTICS:TASK=', jroc
464  WRITE(kulout,'(A)') &
465  &' NUM BEFORE ROUTINE '//&
466  &' SENDS TOTAL(MB) '//&
467  &'RECVS TOTAL(MB) '
468  DO jnum=501,1000
469  IF(unknown_numsend(jnum) /= 0 .OR. unknown_numrecv(jnum) /= 0 ) THEN
470  WRITE(kulout,'(I6,1X,A40,2(I8,F8.1))') &
471  & jnum,ccdesc(jnum),&
472  & unknown_numsend(jnum),unknown_sendbytes(jnum)*1.e-6, &
473  & unknown_numrecv(jnum),unknown_recvbytes(jnum)*1.e-6
474  ENDIF
475  ENDDO
476  WRITE(kulout,'(7x,"TOTAL",35x,2(I8,F8.1),//)') &
477  & sum(unknown_numsend(:)),sum(unknown_sendbytes(:))*1.e-6 , &
478  & sum(unknown_numrecv(:)),sum(unknown_recvbytes(:))*1.e-6
479 
480  ENDIF
481 
482  IF(lstats_mpl) THEN
483  DO jnum=501,1000
484  totsendbytes(jnum) = totsendbytes(jnum) + sendbytes(jnum)
485  totrecvbytes(jnum) = totrecvbytes(jnum) + recvbytes(jnum)
486  IF(sendbytes(jnum).GT.0.0_jprd.OR. &
487  & recvbytes(jnum).GT.0.0_jprd) THEN
488  maxcommtime(jnum) = max(maxcommtime(jnum),timesum(jnum))
489  ENDIF
490  ENDDO
491  ENDIF
492 
493  ENDDO
494 #endif
495  IF(lxml_stats)THEN
496  WRITE(ixmllun,'(A)')'<timing_all_tasks>'
497  ENDIF
498  WRITE(kulout,*) ''
499  WRITE(kulout,'(A)') 'STATS FOR ALL TASKS'
500  WRITE(kulout,'(A)') &
501  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
502  ztotunbal = 0.0_jprd
503  DO jnum=0,500
504  IF(ncalls(jnum) > 1) THEN
505  icalls = ncalls(jnum)/2
506  zmean = zaveave(jnum)/nproc_stats
507  zmax = zavemax(jnum)
508  zmeant = zsumtot(jnum)/nproc_stats
509  zmaxt = zsummax(jnum)
510  IF(zmeant .NE. 0.0)THEN
511  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
512  ELSE
513  zunbal=0.0
514  ENDIF
515  zfrac=zfracmax(jnum)
516  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
517  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
518 
519  IF(lxml_stats)THEN
520  WRITE(ixmllun,'(A,I4,A,//,A,A40,A,//,A,I8,A,2(A,F9.1,A,//),2(A,F9.2,A,//),A)')&
521  &'<item id="',jnum,'">',&
522  &'<description>',ccdesc(jnum),'</description>',&
523  &'<calls>',icalls,'</calls>',&
524  &'<mean unit="ms">',zmean,'</mean>',&
525  &'<max unit="ms">',zmax,'</max>',&
526  &'<fraction unit="percent">',zfrac,'</fraction>',&
527  &'<unbalanced unit="percent">',zunbal,'</unbalanced>','</item>'
528  ENDIF
529  ENDIF
530  ENDDO
531  IF(lxml_stats)THEN
532  WRITE(ixmllun,'(A)')'</timing_all_tasks>'
533  ENDIF
534 
535 IF(lstats_comms)THEN
536  IF(lxml_stats)THEN
537  WRITE(ixmllun,'(A)')'<timing_communications>'
538  ENDIF
539  WRITE(kulout,*) ''
540  WRITE(kulout,'(A)') 'STATS FOR COMMUNICATIONS'
541  WRITE(kulout,'(A)') &
542  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
543  zt_sum=0.0
544  DO jnum=500,jpmaxstat
545  IF((cctype(jnum).EQ."MPL".OR.cctype(jnum).EQ."BAR".OR.cctype(jnum).EQ."GBR".OR.cctype(jnum).EQ."GB2") &
546  & .AND.ncalls(jnum) > 1) THEN
547  icalls = ncalls(jnum)/2
548  zmean = zaveave(jnum)/nproc_stats
549  zmax = zavemax(jnum)
550  zmeant = zsumtot(jnum)/nproc_stats
551  zmaxt = zsummax(jnum)
552  IF(zmeant .NE. 0.0)THEN
553  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
554  ELSE
555  zunbal=0.0
556  ENDIF
557  zfrac=zfracmax(jnum)
558  ztotunbal = ztotunbal+(zmaxt-zmeant)
559  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
560  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
561  IF(lxml_stats)THEN
562  WRITE(ixmllun,'(A,I4,A,//,A,A40,A,//,A,I8,A,//,2(A,F9.1,A,//),2(A,F9.2,A,//),A)')&
563  &'<comitem id="',jnum,'">',&
564  &'<description>',ccdesc(jnum),'</description>',&
565  &'<calls>',icalls,'</calls>',&
566  &'<mean unit="ms">',zmean,'</mean>',&
567  &'<max unit="ms">',zmax,'</max>',&
568  &'<fraction unit="percent">',zfrac,'</fraction>',&
569  &'<unbalanced unit="percent">',zunbal,'</unbalanced>','</comitem>'
570  ENDIF
571 
572  zt_sum=zt_sum+zmeant
573  ENDIF
574  ENDDO
575 
576  WRITE(kulout,*) ''
577  WRITE(kulout,'(A,F10.1,A)')'SUMMED TIME IN COMMUNICATIONS = ',zt_sum, ' SECONDS '
578 #ifdef SFX_MPI
579  IF(lstats_mpl) THEN
580  WRITE(kulout,'(/,A,/)') 'TOTAL COMMUNICATIONS VOLUMES AND BANDWIDTH'
581  WRITE(kulout,'(A)') &
582  &' NUM ROUTINE '//&
583  &' SUM(s) SEND(GB) RECV(GB) GB/s'
584  DO jnum=501,1000
585  IF(totsendbytes(jnum).GT.0.0_jprd.OR.totrecvbytes(jnum).GT.0.0_jprd) THEN
586  WRITE(kulout,'(I6,1X,A40,f6.1,2F10.1,F8.1)') &
587  & jnum,ccdesc(jnum),maxcommtime(jnum),&
588  & totsendbytes(jnum)*1.e-9, &
589  & totrecvbytes(jnum)*1.e-9 , &
590  & (totsendbytes(jnum)*1.e-9)/maxcommtime(jnum)
591  ENDIF
592  ENDDO
593  WRITE(kulout,'(/,A,42x,f6.1,2F10.1,F8.1)') &
594  & 'TOTAL', &
595  & sum(maxcommtime) , &
596  & sum(totsendbytes)*1.e-9, &
597  & sum(totrecvbytes)*1.e-9, &
598  & (sum(totsendbytes)*1.e-9)/sum(maxcommtime)
599  ENDIF
600 #endif
601 
602  IF(lxml_stats)THEN
603  WRITE(ixmllun,'(A,F10.1,A)')'<zcom unit="seconds">',zt_sum, '</zcom>'
604  ENDIF
605  WRITE(kulout,*) ''
606  IF(lxml_stats)THEN
607  WRITE(ixmllun,'(A)')'</timing_communications>'
608  ENDIF
609 
610 ENDIF
611 IF(lstats_omp)THEN
612  IF(lxml_stats)THEN
613  WRITE(ixmllun,'(A)')'<timing_parallel>'
614  ENDIF
615  WRITE(kulout,*) ''
616  WRITE(kulout,'(A)') 'STATS FOR PARALLEL REGIONS'
617  WRITE(kulout,'(A)') &
618  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
619  zt_sum=0.0
620  DO jnum=500,jpmaxstat
621  IF(cctype(jnum).EQ."OMP".AND.ncalls(jnum) > 1) THEN
622  icalls = ncalls(jnum)/2
623  zmean = zaveave(jnum)/nproc_stats
624  zmax = zavemax(jnum)
625  zmeant = zsumtot(jnum)/nproc_stats
626  zmaxt = zsummax(jnum)
627  IF(zmeant .NE. 0.0)THEN
628  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
629  ELSE
630  zunbal=0.0
631  ENDIF
632  zfrac=zfracmax(jnum)
633  ztotunbal = ztotunbal+(zmaxt-zmeant)
634  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
635  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
636  IF(lxml_stats)THEN
637  WRITE(ixmllun,'(A,I4,A,//,A,A40,A,//,A,I8,A,//,2(A,F9.1,A,//),2(A,F9.2,A,//),A)')&
638  &'<parallelitem id="',jnum,'">',&
639  &'<description>',ccdesc(jnum),'</description>',&
640  &'<calls>',icalls,'</calls>',&
641  &'<mean unit="seconds">',zmean,'</mean>',&
642  &'<max unit="seconds">',zmax,'</max>',&
643  &'<fraction unit="percent">',zfrac,'</fraction>',&
644  &'<unbalanced unit="percent">',zunbal,'</unbalanced>',&
645  &'</parallelitem>'
646  ENDIF
647 
648  zt_sum=zt_sum+zmeant
649  ENDIF
650  ENDDO
651 
652  WRITE(kulout,*) ''
653  WRITE(kulout,'(A,F10.1,A)')'SUMMED TIME IN PARALLEL REGIONS = ',zt_sum, ' SECONDS '
654  WRITE(kulout,*) ''
655 
656  IF(lxml_stats)THEN
657  WRITE(ixmllun,'(A,F10.1,A)')'<zpar unit="seconds">',zt_sum, '</zpar>'
658  ENDIF
659 
660  WRITE(kulout,*) ''
661  WRITE(kulout,'(A)') 'STATS FOR I/O REGIONS'
662  WRITE(kulout,'(A)') &
663  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
664  zt_sum=0.0
665  DO jnum=500,jpmaxstat
666  IF(cctype(jnum).EQ."IO-".AND.ncalls(jnum) > 1) THEN
667  icalls = ncalls(jnum)/2
668  zmean = zaveave(jnum)/nproc_stats
669  zmax = zavemax(jnum)
670  zmeant = zsumtot(jnum)/nproc_stats
671  zmaxt = zsummax(jnum)
672  IF(zmeant .NE. 0.0)THEN
673  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
674  ELSE
675  zunbal=0.0
676  ENDIF
677  zfrac=zfracmax(jnum)
678  ztotunbal = ztotunbal+(zmaxt-zmeant)
679  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
680  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
681  IF(lxml_stats)THEN
682  WRITE(ixmllun,'(A,I4,A,//,A,A40,A,//,A,I8,A,//,2(A,F9.1,A,//),2(A,F9.2,A,//),A)')&
683  &'<para_io_item id="',jnum,'">',&
684  &'<description>',ccdesc(jnum),'</description>',&
685  &'<calls>',icalls,'</calls>',&
686  &'<mean unit="seconds">',zmean,'</mean>','<max unit="seconds">',&
687  & zmax,'</max>',&
688  &'<fraction unit="percent">',zfrac,'</fraction>',&
689  &'<unbalanced unit="percent">',zunbal,'</unbalanced>',&
690  &'</para_io_item>'
691  ENDIF
692 
693  zt_sum=zt_sum+zmeant
694  ENDIF
695  ENDDO
696 
697  WRITE(kulout,*) ''
698  WRITE(kulout,'(A,F10.1,A)')'SUMMED TIME IN I/O REGIONS = ',&
699  &zt_sum, ' SECONDS '
700  WRITE(kulout,*) ''
701 
702  IF(lxml_stats)THEN
703  WRITE(ixmllun,'(A,F10.1,A)')'<zio unit="seconds">',zt_sum,'</zio>'
704  ENDIF
705 
706  WRITE(kulout,*) ''
707  WRITE(kulout,'(A)') 'STATS FOR SERIAL(no OMP) REGIONS'
708  WRITE(kulout,'(A)') &
709  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
710  zt_sum=0.0
711  DO jnum=500,jpmaxstat
712  IF(cctype(jnum).EQ."SER".AND.ncalls(jnum) > 1) THEN
713  icalls = ncalls(jnum)/2
714  zmean = zaveave(jnum)/nproc_stats
715  zmax = zavemax(jnum)
716  zmeant = zsumtot(jnum)/nproc_stats
717  zmaxt = zsummax(jnum)
718  IF(zmeant .NE. 0.0)THEN
719  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
720  ELSE
721  zunbal=0.0
722  ENDIF
723  zfrac=zfracmax(jnum)
724  ztotunbal = ztotunbal+(zmaxt-zmeant)
725  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
726  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
727  IF(lxml_stats)THEN
728  WRITE(ixmllun,'(A,I4,A,A,A40,A,A,I8,A,2(A,F9.1,A),2(A,F9.2,A,//),A)')&
729  &'<serialitem id="',jnum,'">',&
730  &'<description>',ccdesc(jnum),'</description>',&
731  &'<calls>',icalls,'</calls>',&
732  &'<mean unit="ms">',zmean,'</mean>','<max unit="ms">',zmax,'</max>',&
733  &'<fraction unit="percent">',zfrac,'</fraction>',&
734  &'<unbalanced unit="percent">',zunbal,'</unbalanced>','</serialitem>'
735  ENDIF
736 
737  zt_sum=zt_sum+zmeant
738  ENDIF
739  ENDDO
740 
741  WRITE(kulout,*) ''
742  WRITE(kulout,'(A,F10.1,A)')'SUMMED TIME IN SERIAL REGIONS = ',zt_sum, ' SECONDS '
743  WRITE(kulout,*) ''
744 
745  IF(lxml_stats)THEN
746  WRITE(ixmllun,'(A,F10.1,A)')'<zserial unit="seconds">',&
747  &zt_sum, '</zserial>'
748  ENDIF
749 
750  WRITE(kulout,*) ''
751  WRITE(kulout,'(A)') 'STATS FOR MIXED SECTIONS'
752  WRITE(kulout,'(A)') &
753  &' NUM ROUTINE CALLS MEAN(ms) MAX(ms) FRAC(%) UNBAL(%)'
754  zt_sum=0.0
755  DO jnum=500,jpmaxstat
756  IF(cctype(jnum).EQ."MXD".AND.ncalls(jnum) > 1) THEN
757  icalls = ncalls(jnum)/2
758  zmean = zaveave(jnum)/nproc_stats
759  zmax = zavemax(jnum)
760  zmeant = zsumtot(jnum)/nproc_stats
761  zmaxt = zsummax(jnum)
762  IF(zmeant .NE. 0.0)THEN
763  zunbal= (zmaxt-zmeant)/ztotal*100._jprd
764  ELSE
765  zunbal=0.0
766  ENDIF
767  zfrac=zfracmax(jnum)
768  ztotunbal = ztotunbal+(zmaxt-zmeant)
769  WRITE(kulout,'(I4,1X,A40,1X,I8,2(1X,F9.1),2(1X,F9.2))')&
770  &jnum,ccdesc(jnum),icalls,zmean,zmax,zfrac,zunbal
771  IF(lxml_stats)THEN
772  WRITE(ixmllun,'(A,I4,A,A,A40,A,A,I8,A,2(A,F9.1,A),2(A,F9.2,A,//),A)')&
773  &'<mixeditem id="',jnum,'">',&
774  &'<description>',ccdesc(jnum),'</description>',&
775  &'<calls>',icalls,'</calls>',&
776  &'<mean unit="ms">',zmean,'</mean>','<max unit="ms">',zmax,'</max>',&
777  &'<fraction unit="percent">',zfrac,'</fraction>',&
778  &'<unbalanced unit="percent">',zunbal,'</unbalanced>','</mixeditem>'
779  ENDIF
780 
781  zt_sum=zt_sum+zmeant
782  ENDIF
783  ENDDO
784 
785  WRITE(kulout,*) ''
786  WRITE(kulout,'(A,F10.1,A)')'SUMMED TIME IN MIXED SECTIONS = ',zt_sum, ' SECONDS '
787  WRITE(kulout,*) ''
788 
789  IF(lxml_stats)THEN
790  WRITE(ixmllun,'(A,F10.1,A)')'<zmixed unit="seconds">',&
791  &zt_sum, '</zmixed>'
792  ENDIF
793 
794  IF(lxml_stats)THEN
795  WRITE(ixmllun,'(A)')'</timing_parallel>'
796  ENDIF
797 
798 ENDIF
799 
800  WRITE(kulout,'(A,F10.1,A,F4.1,A)')&
801  &'TOTAL MEASURED IMBALANCE =',ztotunbal,&
802  &' SECONDS, ',ztotunbal/ztotal*100._jprd,' PERCENT'
803 ELSE
804  ztotal=timesum(0)
805  ztotcpu = ttcpusum(0)
806  ztotvcpu = tvcpusum(0)
807 ENDIF
808 
809 IF ( myproc_stats == 1) THEN
810  WRITE(kulout,'(3(A,F10.1)/)')'TOTAL WALLCLOCK TIME ',ztotal,&
811  &' CPU TIME',ztotcpu,' VECTOR TIME ',ztotvcpu
812  IF(lxml_stats)THEN
813  WRITE(ixmllun,'(3(A,F10.1,A,//)/)')'<totalwallclocktime>',ztotal,&
814  &'</totalwallclocktime>',&
815  &'<cputime>',ztotcpu,'</cputime>',&
816  & '<vectortime>',ztotvcpu,'</vectortime>'
817  ENDIF
818 ENDIF
819 IF( ldetailed_stats )THEN
820  itag = jptagstat
821  zdelay_max=0.0_jprd
822  zmpl(:)=0.0_jprd
823  zbar(:)=0.0_jprd
824  zgbr(:)=0.0_jprd
825  zgb2(:)=0.0_jprd
826  zomp(:)=0.0_jprd
827  zio(:)=0.0_jprd
828  zser(:)=0.0_jprd
829  zmxd(:)=0.0_jprd
830 #ifdef SFX_MPI
831  DO jroc=1,nproc_stats
832  IF( jroc > 1 )THEN
833  IF( myproc_stats == jroc )THEN
834  isend=1
835  CALL mpl_send(ndelay_index,kdest=nprcids_stats(isend), &
836  & ktag=itag,cdstring='GSTATS_PRINT:')
837  IF( ndelay_index > 0 )THEN
838  CALL mpl_send(ndelay_counter(1:ndelay_index),kdest=nprcids_stats(isend), &
839  & ktag=itag+1,cdstring='GSTATS_PRINT:')
840  CALL mpl_send(tdelay_value(1:ndelay_index),kdest=nprcids_stats(isend), &
841  & ktag=itag+2,cdstring='GSTATS_PRINT:')
842  DO jdelay=1,ndelay_index
843  cltemp((jdelay-1)*10+1:jdelay*10)=cdelay_time(jdelay)
844  ENDDO
845  CALL mpl_send(cltemp(1:ndelay_index*10),kdest=nprcids_stats(isend), &
846  & ktag=itag+3,cdstring='GSTATS_PRINT:')
847  ENDIF
848  ENDIF
849  IF( myproc_stats == 1 )THEN
850  CALL mpl_recv(ndelay_index,ksource=nprcids_stats(jroc), &
851  & ktag=itag,cdstring='GSTATS_PRINT:')
852  IF( ndelay_index > 0 )THEN
853  CALL mpl_recv(ndelay_counter(1:ndelay_index),ksource=nprcids_stats(jroc), &
854  & ktag=itag+1,cdstring='GSTATS_PRINT:')
855  CALL mpl_recv(tdelay_value(1:ndelay_index),ksource=nprcids_stats(jroc), &
856  & ktag=itag+2,cdstring='GSTATS_PRINT:')
857  CALL mpl_recv(cltemp(1:ndelay_index*10),ksource=nprcids_stats(jroc), &
858  & ktag=itag+3,cdstring='GSTATS_PRINT:')
859  DO jdelay=1,ndelay_index
860  cdelay_time(jdelay)=cltemp((jdelay-1)*10+1:jdelay*10)
861  ENDDO
862  ENDIF
863  ENDIF
864  ENDIF
865  IF( myproc_stats == 1 .AND. ndelay_index > 0 )THEN
866  WRITE(kulout,'("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")')
867  WRITE(kulout,'("TASK=",I6," NUMBER OF UNEXPECTED DELAYS=",I4)') jroc,ndelay_index
868  IF( ndelay_index == jpmaxdelays )THEN
869  WRITE(kulout,'(" NOTE THAT THE MAXIMUM NUMBER OF DELAYS HAS BEEN REACHED =",I6)')jpmaxdelays
870  ENDIF
871  zdelay=sum(tdelay_value(1:ndelay_index))
872  WRITE(kulout,'("TOTAL UNEXPECTED DELAY TIME (SECS) =",F9.1)') zdelay
873  zdelay_max=max(zdelay_max,zdelay)
874  DO jdelay=1,ndelay_index
875  WRITE(kulout,'(A,":",A,":",A,1X,I4,1X,A3,1X,A40,1X,F6.1)')&
876  &cdelay_time(jdelay)(1:2),cdelay_time(jdelay)(3:4),cdelay_time(jdelay)(5:6),&
877  &ndelay_counter(jdelay),cctype(ndelay_counter(jdelay)),&
878  &ccdesc(ndelay_counter(jdelay)),tdelay_value(jdelay)
879  IF( cctype(ndelay_counter(jdelay)) .EQ. 'MPL' ) zmpl(jroc)=zmpl(jroc)+tdelay_value(jdelay)
880  IF( cctype(ndelay_counter(jdelay)) .EQ. 'BAR' ) zbar(jroc)=zbar(jroc)+tdelay_value(jdelay)
881  IF( cctype(ndelay_counter(jdelay)) .EQ. 'GBR' ) zgbr(jroc)=zgbr(jroc)+tdelay_value(jdelay)
882  IF( cctype(ndelay_counter(jdelay)) .EQ. 'GB2' ) zgb2(jroc)=zgb2(jroc)+tdelay_value(jdelay)
883  IF( cctype(ndelay_counter(jdelay)) .EQ. 'OMP' ) zomp(jroc)=zomp(jroc)+tdelay_value(jdelay)
884  IF( cctype(ndelay_counter(jdelay)) .EQ. 'IO-' ) zio(jroc)=zio(jroc)+tdelay_value(jdelay)
885  IF( cctype(ndelay_counter(jdelay)) .EQ. 'SER' ) zser(jroc)=zser(jroc)+tdelay_value(jdelay)
886  IF( cctype(ndelay_counter(jdelay)) .EQ. 'MXD' ) zmxd(jroc)=zmxd(jroc)+tdelay_value(jdelay)
887  ENDDO
888  WRITE(kulout,'(" ")')
889  WRITE(kulout,'("TASK=",I6," UNEXPECTED DELAYS SORTED BY COUNTER")') jroc
890  DO jnum=500,jpmaxstat
891  idelay=0
892  zdelay=0.0_jprd
893  DO jdelay=1,ndelay_index
894  IF( ndelay_counter(jdelay) == jnum )THEN
895  idelay=idelay+1
896  zdelay=zdelay+tdelay_value(jdelay)
897  ENDIF
898  ENDDO
899  IF( idelay /= 0 )THEN
900  WRITE(kulout,'(I4,1X,A3,1X,A40,1X,I4,3X,F6.1)')&
901  &jnum,cctype(jnum),ccdesc(jnum),idelay,zdelay
902  ENDIF
903  ENDDO
904  WRITE(kulout,'(" ")')
905  WRITE(kulout,'(" ")')
906  ENDIF
907  CALL mpl_barrier(cdstring='GSTATS_PRINT')
908  ENDDO
909 #endif
910  IF( myproc_stats == 1 )THEN
911  WRITE(kulout,'("MAXIMUM TOTAL UNEXPECTED DELAY TIME (SECS) =",F9.1)') zdelay_max
912  WRITE(kulout,'(" ")')
913  WRITE(kulout,'(" ")')
914  WRITE(kulout,'(" TASK "," MPL "," BAR "," GBR "," GB2 "," OMP ",&
915  &" IO- "," SER "," MXD ")')
916  DO jroc=1,nproc_stats
917  WRITE(kulout,'(I6,8(2X,F9.1))') jroc,zmpl(jroc),zbar(jroc),zgbr(jroc),zgb2(jroc),&
918  &zomp(jroc),zio(jroc),zser(jroc),zmxd(jroc)
919  ENDDO
920  WRITE(kulout,'(" ")')
921  WRITE(kulout,'(" ")')
922  ENDIF
923 ENDIF
924 
925 ! Trace stats
926 
927 IF (ltrace_stats) THEN
928  WRITE(kulout,'(A)') '=== TRACE OF CALLS TO GSTATS'
929  IF (ncalls_total > ntrace_stats) THEN
930  WRITE(kulout,'(A,2I10)') ' ONLY PART OF TRACE STORED AS BUFFER TO SMALL ',&
932  ENDIF
933  WRITE(kulout,'(A)') '==='
934  claction(0)='ON'
935  claction(1)='OFF'
936  claction(2)='SUSPEND'
937  claction(3)='RESUME'
938  DO jcall=1,min(ncalls_total,ntrace_stats)
939  icaller = mod(ncall_trace(jcall),(jpmaxstat+1))
940  iaction = ncall_trace(jcall)/(jpmaxstat+1)
941  IF (iaction == 0) THEN
942  ztimelcall(icaller) = time_trace(jcall)
943  zthistime(icaller) = 0.0_jprd
944  ELSEIF (iaction == 2) THEN
945  zthistime(icaller) = time_trace(jcall)-ztimelcall(icaller)
946  ELSEIF (iaction == 3) THEN
947  ztimelcall(icaller) = time_trace(jcall)
948  ENDIF
949  IF (iaction == 1) THEN
950  WRITE(kulout,'(1X,F10.3,1X,A,1X,A,1X,F10.3)') &
951  &time_trace(jcall),ccdesc(icaller),claction(iaction),&
952  &zthistime(icaller)+(time_trace(jcall)-ztimelcall(icaller))
953  ELSE
954  WRITE(kulout,'(1X,F10.3,1X,A,1X,A)') time_trace(jcall),ccdesc(icaller),&
955  & claction(iaction)
956  ENDIF
957  ENDDO
958 ENDIF
959 
960 IF(lstats .AND. myproc_stats == 1) THEN
961  paveave(0:klen) = zaveave(0:klen)
962 ELSE
963  paveave(0:klen) = 0.0_jprd
964 ENDIF
965 
966 WRITE(kulout,'(/A)')'===-=== END OF TIMING STATISTICS ===-==='
967 
968 
969 IF(lstats_mem)THEN
970  IF(lxml_stats)THEN
971  WRITE(ixmllun,'(A)')'<memory>'
972  ENDIF
973  WRITE(kulout,*) ''
974  WRITE(kulout,*) 'STATS FOR MEMORY'
975  WRITE(kulout,*) &
976  &' NUM ROUTINE CALLS CALL MAXINCR TOTINCR MININCR'
977  WRITE(kulout,*) &
978  &' NO (KB) (KB) (KB)'
979  DO jnum=0,jpmaxstat
980  IF(ncalls(jnum) > 1) THEN
981  icalls = ncalls(jnum)/2
982  imem=ntmem(jnum,1)
983  inum=ntmem(jnum,3)/2
984  jmem=ntmem(jnum,4)
985  WRITE(kulout,'(I4,1X,A20,1X,I8,1X,I6,3(1X,I9))')&
986  &jnum,ccdesc(jnum),icalls,inum,imem,jmem,ntmem(jnum,5)
987 
988  IF(lxml_stats)THEN
989  WRITE(ixmllun,'(A,I4,A,//,A,A20,A,//,A,I8,A,//,A,I6,A,//,3(A,I9,A,//))')&
990  &'<memitem id="',jnum,'"/>',&
991  &'<description>',ccdesc(jnum),'</description>',&
992  &'<calls>',icalls,'</calls>',&
993  &'<callnum>',inum,'</callnum>','<maxincr unit="kb">',imem,'</maxincr>',&
994  &'<totincr unit="kb">',jmem,'</totincr>',&
995  &'<minincr unit="kb">',ntmem(jnum,5),'</minincr>'
996  ENDIF
997  ENDIF
998  ENDDO
999 
1000  WRITE(kulout,*) ''
1001  WRITE(kulout,'(/A)')'===-=== END OF MEMORY STATISTICS ===-==='
1002  WRITE(kulout,*) ''
1003  IF(lxml_stats)THEN
1004  WRITE(ixmllun,'(A)')'</memory>'
1005  ENDIF
1006 ENDIF
1007 IF(lxml_stats)THEN
1008  WRITE(ixmllun,'(A)')'</gstats>'
1009  CLOSE(ixmllun)
1010 ENDIF
1011 
1012 RETURN
1013 END SUBROUTINE gstats_print
real(kind=jprb), dimension(:), allocatable unknown_recvbytes
Definition: yomgstats.F90:90
integer(kind=jpim), dimension(0:jpmaxstat, 5) ntmem
Definition: yomgstats.F90:124
integer(kind=jpim) myproc_stats
Definition: yomgstats.F90:121
real(kind=jprd), dimension(:), allocatable time_trace
Definition: yomgstats.F90:107
real(kind=jprd), dimension(1:jpmaxdelays) tdelay_value
Definition: yomgstats.F90:113
integer, parameter jpim
Definition: parkind1.F90:13
integer(kind=jpim), parameter jptagstat
Definition: yomgstats.F90:109
integer, parameter jprd
Definition: parkind1.F90:39
character *10, dimension(1:jpmaxdelays) cdelay_time
Definition: yomgstats.F90:114
integer(kind=jpim), parameter jbmaxbase
Definition: yomgstats.F90:68
real(kind=jprd), dimension(0:jpmaxstat) tvcpusum
Definition: yomgstats.F90:98
integer(kind=jpim), dimension(:), allocatable numrecv
Definition: yomgstats.F90:84
real(kind=jprb), dimension(:), allocatable sendbytes
Definition: yomgstats.F90:85
real(kind=jprd), dimension(0:jpmaxstat) timelcall
Definition: yomgstats.F90:96
integer(kind=jpim) nproc_stats
Definition: yomgstats.F90:120
integer(kind=jpim), dimension(1:jpmaxdelays) ndelay_counter
Definition: yomgstats.F90:112
integer(kind=jpim), dimension(:), allocatable unknown_numrecv
Definition: yomgstats.F90:88
integer(kind=jpim), dimension(0:jpmaxstat) nbar_ptr
Definition: yomgstats.F90:80
integer(kind=jpim), parameter jpmaxstat
Definition: yomgstats.F90:70
logical lxml_stats
Definition: yomgstats.F90:61
integer(kind=jpim) ntrace_stats
Definition: yomgstats.F90:73
integer(kind=jpim), dimension(:), allocatable ncall_trace
Definition: yomgstats.F90:82
logical lbarrier_stats2
Definition: yomgstats.F90:64
logical ltrace_stats
Definition: yomgstats.F90:65
real(kind=jprb), dimension(:), allocatable unknown_sendbytes
Definition: yomgstats.F90:89
subroutine gstats_print(KULOUT, PAVEAVE, KLEN)
Definition: gstats_print.F90:2
character *3, dimension(0:jpmaxstat) cctype
Definition: yomgstats.F90:118
logical lsyncstats
Definition: yomgstats.F90:60
logical lstatscpu
Definition: yomgstats.F90:59
character *50, dimension(0:jpmaxstat) ccdesc
Definition: yomgstats.F90:117
logical lstats_mpl
Definition: yomgstats.F90:56
integer(kind=jpim), dimension(:), allocatable nprcids_stats
Definition: yomgstats.F90:122
integer(kind=jpim) ndelay_index
Definition: yomgstats.F90:115
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))
integer(kind=jpim) nprnt_stats
Definition: yomgstats.F90:127
logical lstats_comms
Definition: yomgstats.F90:55
integer(kind=jpim), parameter mrealt
Definition: yommpi.F90:15
real(kind=jprd), dimension(0:jpmaxstat) timesumb
Definition: yomgstats.F90:95
real(kind=jprd), dimension(0:jpmaxstat) ttcpusum
Definition: yomgstats.F90:97
integer(kind=jpim), dimension(:), allocatable unknown_numsend
Definition: yomgstats.F90:87
integer(kind=jpim) ncalls_total
Definition: yomgstats.F90:76
real(kind=jprb), dimension(:), allocatable recvbytes
Definition: yomgstats.F90:86
real(kind=jprd), dimension(0:jpmaxstat) timemax
Definition: yomgstats.F90:94
Definition: yommpi.F90:1
logical ldetailed_stats
Definition: yomgstats.F90:62
integer(kind=jpim), dimension(0:jpmaxstat) ncalls
Definition: yomgstats.F90:74
logical lstats_omp
Definition: yomgstats.F90:54
real(kind=jprd), dimension(0:jpmaxstat) timesum
Definition: yomgstats.F90:92
integer(kind=jpim), parameter jpmaxdelays
Definition: yomgstats.F90:111
real(kind=jprd), dimension(0:jpmaxstat) timesqsum
Definition: yomgstats.F90:93
logical lstats
Definition: yomgstats.F90:53
real(kind=jprd), dimension(:), allocatable time_start
Definition: yomgstats.F90:106
logical lstats_mem
Definition: yomgstats.F90:57
integer(kind=jpim), dimension(:), allocatable numsend
Definition: yomgstats.F90:83