SURFEX v8.1
General documentation of Surfex
ec_cray_meminfo.F90
Go to the documentation of this file.
1 SUBROUTINE ec_cray_meminfo(IU,IDSTRING,KCOMM)
2 
3 IMPLICIT NONE
4 
5 !-- EC_CRAY_MEMINFO:
6 ! Author : Peter Towers (ECMWF) : 2015-2016
7 ! Modified : Sami Saarinen (ECMWF) : 21-SEP-2016 : Added getenv EC_MEMINFO -- export EC_MEMINFO=0 disables any EC_MEMINFO output
8 #ifdef SFX_MPI
9 include "mpif.h"
10 #endif
11 
12 INTEGER(KIND=4), INTENT(IN) :: KCOMM
13 INTEGER(KIND=4), INTENT(IN) :: IU
14 CHARACTER*(*), INTENT(IN) :: IDSTRING
15 INTEGER(KIND = 4) :: ID,KULOUT
16 INTEGER(KIND=4) :: I,J,MYPROC,NPROC,LEN,ERROR,ITAG,NODENUM
17 INTEGER(KIND=8) :: TASKSMALL,NODEHUGE,MEMFREE,CACHED,NFREE
18 INTEGER(KIND=8) :: SMALLPAGE0,SMALLPAGE1,HUGEPAGE0,HUGEPAGE1
19 INTEGER(KIND=8) :: SENDBUF(9),RECVBUF(9)
20 INTEGER(KIND=8) :: GETHWM,GETMAXRSS
21 INTEGER(KIND=8) :: HEAP_SIZE
22 INTEGER(KIND=4) :: PAGESIZE,N18
23 INTEGER(KIND=4) :: NODE0(18),NODE1(18)
24 INTEGER(KIND=8) :: BUCKET0(18),BUCKET1(18)
25 REAL(KIND=4) :: PERCENT_USED(2)
26 CHARACTER(LEN=512) :: TMPDIR
27 CHARACTER(LEN=512) :: PROGRAM
28 CHARACTER(LEN=8) :: NODENAME,LASTNODE
29 CHARACTER(LEN=12) :: VAL
30 CHARACTER(LEN=1) :: M
31 CHARACTER(LEN=160) ::LINE
32 CHARACTER(LEN=56) :: FILENAME
33 CHARACTER(LEN=128) :: JOBNAME
34 CHARACTER(LEN=128) :: JOBID
35 CHARACTER(LEN=2) :: EC_MEMINFO
36 CHARACTER(LEN=4) :: CSTAR
37 CHARACTER(LEN=LEN(CSTAR)+1+LEN(IDSTRING)) :: ID_STRING
38 CHARACTER (LEN = 10) :: CLDATEOD,CLTIMEOD,CLZONEOD
39 INTEGER(KIND=4) :: IVALUES(8)
40 #ifdef SFX_MPI
41 INTEGER(KIND=4) :: IRECV_STATUS(mpi_status_size)
42 #endif
43 LOGICAL :: LLNOCOMM, LLNOHDR
44 CHARACTER(LEN=64) :: CLPFX
45 CHARACTER(LEN=3) :: ZUM
46 INTEGER(KIND=4) :: IPFXLEN
47 INTEGER OMP_GET_MAX_THREADS
48 EXTERNAL omp_get_max_threads
49 
50 CALL getenv('EC_MEMINFO',ec_meminfo)
51 IF (ec_meminfo == '0') RETURN
52 
53 llnocomm = (kcomm == -1 .or. kcomm == -2)
54 llnohdr = (kcomm == -2)
55 
56 CALL flush(0)
57 
58 IF (llnocomm) THEN
59  ! Direct call to obtain EC_meminfo -output
60  error = 0
61  myproc = 0
62  nproc = 1
63  clpfx = idstring
64  ipfxlen = len_trim(clpfx)
65  zum = 'tsk'
66 ELSE
67  clpfx = ' '
68  ipfxlen = 0
69  zum = 'sum'
70 #ifdef SFX_MPI
71  CALL mpi_barrier(kcomm,error)
72 
73  CALL mpi_comm_rank(kcomm, myproc, error)
74 
75  IF(error /= 0 ) THEN
76  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_COMM_RANK"
77  CALL mpi_abort(kcomm,-1,error)
78  ENDIF
79 
80  CALL mpi_comm_size(kcomm,nproc,error)
81 
82  IF(error /= 0 ) THEN
83  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_COMM_SIZE"
84  CALL mpi_abort(kcomm,-1,error)
85  ENDIF
86 #else
87  nproc = 1
88  myproc = 0
89 #endif
90 ENDIF
91 
92 IF(myproc == 0) THEN
93  CALL getarg(0,program)
94 !
95 ! Use already open file for output or $TMPDIR/meminfo
96 !
97  IF(iu == -1) THEN
98  CALL getenv('TMPDIR',tmpdir)
99  IF (tmpdir == ' ') tmpdir = '.'
100  kulout=501
101  OPEN(unit=kulout,file=trim(tmpdir)//"/"//"meminfo",status='unknown', &
102  action='write',position='append')
103  ELSE
104  kulout=iu
105  ENDIF
106 
107  CALL date_and_time(cldateod,cltimeod,clzoneod,ivalues)
108  CALL getenv('EC_JOB_NAME',jobname)
109  IF(jobname == '') THEN
110  CALL getenv('PBS_JOBNAME',jobname)
111  ENDIF
112  CALL getenv('PBS_JOBID',jobid)
113 
114  IF (.not.llnocomm) THEN
115  WRITE(kulout,'(a,/,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ",clpfx(1:ipfxlen)//"## EC_MEMINFO"
116  WRITE(kulout,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO Detailed memory information ", &
117  "for program ",trim(program)
118 #ifdef SFX_OMP
119  WRITE(kulout,'(a,i5,a,i3,a,a,'':'',a,'':'',a,a,a,''-'',a,''-'',a)') &
120  clpfx(1:ipfxlen)//"## EC_MEMINFO Running with ",nproc, &
121  " tasks and ", omp_get_max_threads(), " threads at time ", &
122  cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
123  " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
124 #else
125  WRITE(kulout,'(a,i5,a,i3,a,'':'',a,a,a,''-'',a,''-'',a)') &
126  clpfx(1:ipfxlen)//"## EC_MEMINFO Running with ",nproc, &
127  " tasks and 1 threads at time ", &
128  cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
129  " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
130 #endif
131  WRITE(kulout,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO The Job Name is ",trim(jobname), &
132  " and the Job ID is ",trim(jobid)
133  WRITE(kulout,'(a)') clpfx(1:ipfxlen)//"## EC_MEMINFO "
134  ENDIF
135  IF (.not.llnohdr) THEN
136  WRITE(kulout,'(3a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
137  " | TC | MEMORY USED(MB) |", &
138  " MEMORY FREE(MB) INCLUDING CACHED | %USED %HUGE"
139  WRITE(kulout,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
140  " | Malloc| Inc Heap |", &
141  " Numa node 0 | Numa node 1 | |"
142  WRITE(kulout,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
143  "Node Name | Heap | RSS("//zum//") |", &
144  " Small Huge or | Small Huge or | Total |"
145  WRITE(kulout,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
146  " | ("//zum//") | Small Huge |", &
147  " Only Small | Only Small | Memfree+Cached |"
148  ENDIF
149  IF(iu == -1) THEN
150  WRITE(0,'(a,/,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ",clpfx(1:ipfxlen)//"## EC_MEMINFO"
151  WRITE(0,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO Detailed memory information ", &
152  "for program ",trim(program)
153 #ifdef SFX_OMP
154  WRITE(0,'(a,i5,a,i3,a,a,'':'',a,'':'',a,a,a,''-'',a,''-'',a)') &
155  clpfx(1:ipfxlen)//"## EC_MEMINFO Running with ",nproc, &
156  " tasks and ", omp_get_max_threads(), " threads at time ", &
157  cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
158  " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
159 #else
160  WRITE(0,'(a,i5,a,i3,a,a,'':'',a,a,a,''-'',a,''-'',a)') &
161  clpfx(1:ipfxlen)//"## EC_MEMINFO Running with ",nproc, &
162  " tasks and 1 threads at time ", &
163  cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
164  " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
165 #endif
166  WRITE(0,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO The Job Name is ",trim(jobname), &
167  " and the Job ID is ",trim(jobid)
168  WRITE(0,'(a)') clpfx(1:ipfxlen)//"## EC_MEMINFO "
169  WRITE(0,'(3a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
170  " | TC | MEMORY USED(MB) |", &
171  " MEMORY FREE(MB) INCLUDING CACHED | %USED %HUGE"
172  WRITE(0,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
173  " | Malloc| Inc Heap |", &
174  " Numa node 0 | Numa node 1 | |"
175  WRITE(0,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
176  "Node Name | Heap | RSS("//zum//") |", &
177  " Small Huge or | Small Huge or | Total |"
178  WRITE(0,'(4a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
179  " | ("//zum//") | Small Huge |", &
180  " Only Small | Only Small | Memfree+Cached |"
181  ENDIF
182 ENDIF
183 
184 IF(error /= 0 ) THEN
185  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_BARRIER"
186 #ifdef SFX_MPI
187  CALL mpi_abort(kcomm,-1,error)
188 #endif
189 ENDIF
190 
191 #ifndef DARWIN
192 CALL ec_gethostname(nodename) ! from support/env.c
193 #endif
194 
195 pagesize=2048
196 CALL getenv("HUGETLB_DEFAULT_PAGE_SIZE",val)
197 i=index(val,"M")
198 IF(i > 0) THEN
199  READ(val(1:i-1),*) pagesize
200  pagesize=pagesize*1024
201 ENDIF
202 
203 nodehuge=0
204 
205 WRITE(filename,'(a,i0,a)') "/sys/kernel/mm/hugepages/hugepages-", &
206  pagesize,"kB/nr_hugepages"
207 
208 IF(pagesize > 0) THEN
209  OPEN(502,file=filename,status="old")
210  READ(502,*) nodehuge
211  CLOSE(502)
212 ENDIF
213 
214 nodehuge=nodehuge*pagesize
215 memfree = 0
216 cached = 0
217 
218 OPEN(file="/proc/meminfo",unit=502)
219 DO i=1,10
220  READ(502,'(a)') line
221  IF(line(1:7) == "MemFree") THEN
222  READ(line(9:80),*) memfree
223  ELSEIF(line(1:6) == "Cached") THEN
224  READ(line(8:80),*) cached
225  ENDIF
226 ENDDO
227 CLOSE(502)
228 
229 nodehuge=nodehuge/1024
230 memfree=memfree/1024
231 cached=cached/1024
232 
233 tasksmall=getmaxrss()/(1024*1024)
234 
235 OPEN(file="/proc/buddyinfo",unit=502)
236 
237 READ(502,'(a)') line
238 READ(502,'(a)') line
239 READ(502,'(a)') line
240 node0(:)=-1
241 n18=0
242 READ(line(22:),*,end=98) node0
243 98 CONTINUE
244 n18 = count(node0 >= 0)
245 node1(:)=0
246 READ(502,'(a)',end=99) line
247 READ(line(22:),*) node1(1:n18)
248 
249 99 CONTINUE
250 CLOSE(502)
251 
252 bucket0(:) = 0
253 bucket1(:) = 0
254 bucket0(1)=node0(1)*4096
255 bucket1(1)=node1(1)*4096
256 DO i=2,n18
257  bucket0(i)=node0(i)*4096
258  bucket1(i)=node1(i)*4096
259  DO j=2,i
260  bucket0(i)=bucket0(i)*2
261  bucket1(i)=bucket1(i)*2
262  ENDDO
263 ENDDO
264 
265 smallpage0=0
266 smallpage1=0
267 DO i=1,9
268  smallpage0=smallpage0+bucket0(i)
269  smallpage1=smallpage1+bucket1(i)
270 ENDDO
271 hugepage0=0
272 hugepage1=0
273 DO i=10,n18
274  hugepage0=hugepage0+bucket0(i)
275  hugepage1=hugepage1+bucket1(i)
276 ENDDO
277 
278 smallpage0=smallpage0/(1024*1024)
279 smallpage1=smallpage1/(1024*1024)
280 hugepage0=hugepage0/(1024*1024)
281 hugepage1=hugepage1/(1024*1024)
282 
283 heap_size=gethwm()/(1024*1024)
284 
285 itag = 98765
286 IF(myproc == 0) THEN
287  nodenum=1
288  lastnode=nodename
289 #ifdef SFX_MPI
290  DO i=1,nproc-1
291  CALL mpi_recv(nodename(1:8),8,mpi_byte,i,itag,kcomm,irecv_status,error)
292  IF(error /= 0 ) THEN
293  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_RECV"
294  CALL mpi_abort(kcomm,-1,error)
295  ENDIF
296  CALL mpi_recv(recvbuf(1:9),9,mpi_integer8,i,itag+1,kcomm,irecv_status,error)
297  IF(error /= 0 ) THEN
298  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_RECV"
299  CALL mpi_abort(kcomm,-1,error)
300  ENDIF
301  IF(lastnode==nodename)THEN
302  heap_size=heap_size+recvbuf(8)
303  tasksmall=tasksmall+recvbuf(9)
304  ELSE
305  percent_used(2) = 0
306  IF(heap_size > nodehuge) THEN
307 ! running with small pages
308  percent_used(1)=100.0*(tasksmall+nodehuge)/(tasksmall+nodehuge+memfree+cached)
309  cstar = " s/p"
310  ELSE
311 ! running with huge pages
312  percent_used(1)=100.0*(heap_size+tasksmall)/(tasksmall+nodehuge+memfree+cached)
313  IF (nodehuge > 0) THEN
314  nfree = hugepage0 + hugepage1
315  percent_used(2) = (100.0*(nodehuge - nfree))/nodehuge
316  ENDIF
317  cstar = " H/p"
318  ENDIF
319  IF (.not.llnocomm) THEN
320  id_string = cstar//":"//idstring
321  ELSE
322  id_string = cstar
323  ENDIF
324  WRITE(kulout,'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
325  & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
326  & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
327  & percent_used,trim(id_string)
328  IF(iu == -1) THEN
329  WRITE(0,'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
330  & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
331  & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
332  & percent_used,trim(id_string)
333  ENDIF
334 
335  nodehuge=recvbuf(1)
336  memfree=recvbuf(2)
337  cached=recvbuf(3)
338  smallpage0=recvbuf(4)
339  smallpage1=recvbuf(5)
340  hugepage0=recvbuf(6)
341  hugepage1=recvbuf(7)
342  heap_size=recvbuf(8)
343  tasksmall=recvbuf(9)
344  nodenum=nodenum+1
345  lastnode=nodename
346  ENDIF
347  ENDDO
348 #endif
349  percent_used(2) = 0
350  IF(heap_size > nodehuge) THEN
351 ! running with small pages
352  percent_used(1)=100.0*(tasksmall+nodehuge)/(tasksmall+nodehuge+memfree+cached)
353  cstar = " s/p"
354  ELSE
355 ! running with huge pages
356  percent_used(1)=100.0*(heap_size+tasksmall)/(tasksmall+nodehuge+memfree+cached)
357  IF (nodehuge > 0) THEN
358  nfree = hugepage0 + hugepage1
359  percent_used(2) = (100.0*(nodehuge - nfree))/nodehuge
360  ENDIF
361  cstar = " H/p"
362  ENDIF
363  IF (.not.llnocomm) THEN
364  id_string = cstar//":"//idstring
365  ELSE
366  id_string = cstar
367  ENDIF
368  WRITE(kulout,'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
369  & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
370  & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
371  & percent_used,trim(id_string)
372 
373  IF(iu == -1) THEN
374  WRITE(0,'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//"## EC_MEMINFO ", &
375  & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
376  & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
377  & percent_used,trim(id_string)
378  CLOSE(kulout)
379  ENDIF
380 ELSE
381 #ifdef SFX_MPI
382  CALL mpi_send(nodename(1:8),8,mpi_byte,0,itag,kcomm,error)
383  IF(error /= 0 ) THEN
384  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_SEND"
385  CALL mpi_abort(kcomm,-1,error)
386  ENDIF
387 #endif
388  sendbuf(1)=nodehuge
389  sendbuf(2)=memfree
390  sendbuf(3)=cached
391  sendbuf(4)=smallpage0
392  sendbuf(5)=smallpage1
393  sendbuf(6)=hugepage0
394  sendbuf(7)=hugepage1
395  sendbuf(8)=heap_size
396  sendbuf(9)=tasksmall
397 #ifdef SFX_MPI
398  CALL mpi_send(sendbuf(1:9),9,mpi_integer8,0,itag+1,kcomm,error)
399  IF(error /= 0 ) THEN
400  WRITE(0,*) clpfx(1:ipfxlen)//"## EC_CRAY_MEMINFO error code ",error," from MPI_SEND"
401  CALL mpi_abort(kcomm,-1,error)
402  ENDIF
403 #endif
404 ENDIF
405 #ifdef SFX_MPI
406 IF (.not.llnocomm) CALL mpi_barrier(kcomm,error)
407 #endif
408 END SUBROUTINE ec_cray_meminfo
409 
410 SUBROUTINE meminfo(KOUT,KSTEP)
411 IMPLICIT NONE
412 INTEGER(KIND=4), INTENT(IN) :: KOUT, KSTEP
413 CHARACTER(LEN=32) CLSTEP
414 CHARACTER(LEN=160) :: LINE
415 CHARACTER(LEN=8) :: NODENAME
416 INTEGER(KIND=8) :: NODE(0:17), ISMALL, IHUGE, ITOTAL
417 INTEGER(KIND=4) :: I,INUMA,ICOMM
418 WRITE(clstep,'(11X,"STEP",I5," :")') kstep
419 icomm = -2 ! No headers from EC_CRAY_MEMINFO by default
420 IF (kstep == 0) icomm = -1 ! Do print headers, too
421 #ifdef _CRAYFTN
422 CALL ec_cray_meminfo(kout,trim(clstep),icomm)
423 #endif
424 CALL flush(kout)
425 RETURN ! For now
426 #ifndef DARWIN
427 CALL ec_gethostname(nodename) ! from support/env.c
428 #endif
429 OPEN(file="/proc/buddyinfo",unit=502,err=98)
430 READ(502,'(a)') line
431 READ(502,'(a)') line
432 DO inuma=0,1
433  node(:)=0
434  READ(502,'(a)',end=99) line
435  READ(line(22:160),*,err=99,end=99) node
436  ismall = 0
437  DO i=0,8
438  ismall = ismall + node(i) * (2**i)
439  ENDDO
440  ! Pages >= 2M
441  ihuge = 0
442  DO i=9,SIZE(node)-1
443  ihuge = ihuge + node(i) * (2**i)
444  ENDDO
445  itotal = ismall + ihuge
446  ismall = (ismall * 4096)/(1024*1024)
447  ihuge = (ihuge * 4096)/(1024*1024)
448  itotal = (itotal * 4096)/(1024*1024)
449  WRITE(kout,'(" MEMINFO: STEP=",I0," ",A," NUMA# ",I0," : Free Total = SMALL + HUGEPAGES in MB: ",I0," = ",I0," + ",I0)') &
450  & kstep, nodename, inuma, itotal, ismall, ihuge
451  WRITE(kout,'(" BUDDYINFO: STEP=",I0," ",A," NUMA# ",I0," : Count of free 2^(0..",I0,")*4096B blocks: ",A)') &
452  & kstep, nodename, inuma, SIZE(node)-1, line(22:160)
453 ENDDO
454 99 CONTINUE
455 CLOSE(502)
456 98 CONTINUE
457 CALL flush(kout)
458 END SUBROUTINE meminfo
void ec_gethostname(char a[], int alen)
Definition: env.c:270
subroutine ec_cray_meminfo(IU, IDSTRING, KCOMM)
subroutine meminfo(KOUT, KSTEP)
subroutine getarg(IARG, CLARG)
Definition: get_opt.F:91
ERROR in index
Definition: ecsort_shared.h:90
static int count
Definition: memory_hook.c:21