12 INTEGER(KIND=4),
INTENT(IN) :: KCOMM
13 INTEGER(KIND=4),
INTENT(IN) :: IU
14 CHARACTER*(*),
INTENT(IN) :: IDSTRING
15 INTEGER(KIND = 4) :: ID,KULOUT
16 INTEGER(KIND=4) :: I,J,MYPROC,NPROC,LEN,ERROR,ITAG,NODENUM
17 INTEGER(KIND=8) :: TASKSMALL,NODEHUGE,MEMFREE,CACHED,NFREE
18 INTEGER(KIND=8) :: SMALLPAGE0,SMALLPAGE1,HUGEPAGE0,HUGEPAGE1
19 INTEGER(KIND=8) :: SENDBUF(9),RECVBUF(9)
20 INTEGER(KIND=8) :: GETHWM,GETMAXRSS
21 INTEGER(KIND=8) :: HEAP_SIZE
22 INTEGER(KIND=4) :: PAGESIZE,N18
23 INTEGER(KIND=4) :: NODE0(18),NODE1(18)
24 INTEGER(KIND=8) :: BUCKET0(18),BUCKET1(18)
25 REAL(KIND=4) :: PERCENT_USED(2)
26 CHARACTER(LEN=512) :: TMPDIR
27 CHARACTER(LEN=512) :: PROGRAM
28 CHARACTER(LEN=8) :: NODENAME,LASTNODE
29 CHARACTER(LEN=12) :: VAL
31 CHARACTER(LEN=160) ::LINE
32 CHARACTER(LEN=56) :: FILENAME
33 CHARACTER(LEN=128) :: JOBNAME
34 CHARACTER(LEN=128) :: JOBID
35 CHARACTER(LEN=2) :: EC_MEMINFO
36 CHARACTER(LEN=4) :: CSTAR
37 CHARACTER(LEN=LEN(CSTAR)+1+LEN(IDSTRING)) :: ID_STRING
38 CHARACTER (LEN = 10) :: CLDATEOD,CLTIMEOD,CLZONEOD
39 INTEGER(KIND=4) :: IVALUES(8)
41 INTEGER(KIND=4) :: IRECV_STATUS(mpi_status_size)
43 LOGICAL :: LLNOCOMM, LLNOHDR
44 CHARACTER(LEN=64) :: CLPFX
45 CHARACTER(LEN=3) :: ZUM
46 INTEGER(KIND=4) :: IPFXLEN
47 INTEGER OMP_GET_MAX_THREADS
48 EXTERNAL omp_get_max_threads
50 CALL getenv(
'EC_MEMINFO',ec_meminfo)
51 IF (ec_meminfo ==
'0')
RETURN 53 llnocomm = (kcomm == -1 .or. kcomm == -2)
54 llnohdr = (kcomm == -2)
64 ipfxlen = len_trim(clpfx)
71 CALL mpi_barrier(kcomm,error)
73 CALL mpi_comm_rank(kcomm, myproc, error)
76 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_COMM_RANK" 77 CALL mpi_abort(kcomm,-1,error)
80 CALL mpi_comm_size(kcomm,nproc,error)
83 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_COMM_SIZE" 84 CALL mpi_abort(kcomm,-1,error)
98 CALL getenv(
'TMPDIR',tmpdir)
99 IF (tmpdir ==
' ') tmpdir =
'.' 101 OPEN(unit=kulout,file=trim(tmpdir)//
"/"//
"meminfo",status=
'unknown', &
102 action=
'write',position=
'append')
107 CALL date_and_time(cldateod,cltimeod,clzoneod,ivalues)
108 CALL getenv(
'EC_JOB_NAME',jobname)
109 IF(jobname ==
'')
THEN 110 CALL getenv(
'PBS_JOBNAME',jobname)
112 CALL getenv(
'PBS_JOBID',jobid)
114 IF (.not.llnocomm)
THEN 115 WRITE(kulout,
'(a,/,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ",clpfx(1:ipfxlen)//
"## EC_MEMINFO" 116 WRITE(kulout,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO Detailed memory information ", &
117 "for program ",trim(program)
119 WRITE(kulout,
'(a,i5,a,i3,a,a,'':'',a,'':'',a,a,a,''-'',a,''-'',a)') &
120 clpfx(1:ipfxlen)//
"## EC_MEMINFO Running with ",nproc, &
121 " tasks and ", omp_get_max_threads(),
" threads at time ", &
122 cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
123 " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
125 WRITE(kulout,
'(a,i5,a,i3,a,'':'',a,a,a,''-'',a,''-'',a)') &
126 clpfx(1:ipfxlen)//
"## EC_MEMINFO Running with ",nproc, &
127 " tasks and 1 threads at time ", &
128 cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
129 " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
131 WRITE(kulout,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO The Job Name is ",trim(jobname), &
132 " and the Job ID is ",trim(jobid)
133 WRITE(kulout,
'(a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO " 135 IF (.not.llnohdr)
THEN 136 WRITE(kulout,
'(3a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
137 " | TC | MEMORY USED(MB) |", &
138 " MEMORY FREE(MB) INCLUDING CACHED | %USED %HUGE" 139 WRITE(kulout,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
140 " | Malloc| Inc Heap |", &
141 " Numa node 0 | Numa node 1 | |" 142 WRITE(kulout,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
143 "Node Name | Heap | RSS("//zum//
") |", &
144 " Small Huge or | Small Huge or | Total |" 145 WRITE(kulout,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
146 " | ("//zum//
") | Small Huge |", &
147 " Only Small | Only Small | Memfree+Cached |" 150 WRITE(0,
'(a,/,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ",clpfx(1:ipfxlen)//
"## EC_MEMINFO" 151 WRITE(0,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO Detailed memory information ", &
152 "for program ",trim(program)
154 WRITE(0,
'(a,i5,a,i3,a,a,'':'',a,'':'',a,a,a,''-'',a,''-'',a)') &
155 clpfx(1:ipfxlen)//
"## EC_MEMINFO Running with ",nproc, &
156 " tasks and ", omp_get_max_threads(),
" threads at time ", &
157 cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
158 " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
160 WRITE(0,
'(a,i5,a,i3,a,a,'':'',a,a,a,''-'',a,''-'',a)') &
161 clpfx(1:ipfxlen)//
"## EC_MEMINFO Running with ",nproc, &
162 " tasks and 1 threads at time ", &
163 cltimeod(1:2),cltimeod(3:4),cltimeod(5:10), &
164 " on ",cldateod(7:8),cldateod(5:6),cldateod(3:4)
166 WRITE(0,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO The Job Name is ",trim(jobname), &
167 " and the Job ID is ",trim(jobid)
168 WRITE(0,
'(a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO " 169 WRITE(0,
'(3a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
170 " | TC | MEMORY USED(MB) |", &
171 " MEMORY FREE(MB) INCLUDING CACHED | %USED %HUGE" 172 WRITE(0,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
173 " | Malloc| Inc Heap |", &
174 " Numa node 0 | Numa node 1 | |" 175 WRITE(0,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
176 "Node Name | Heap | RSS("//zum//
") |", &
177 " Small Huge or | Small Huge or | Total |" 178 WRITE(0,
'(4a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
179 " | ("//zum//
") | Small Huge |", &
180 " Only Small | Only Small | Memfree+Cached |" 185 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_BARRIER" 187 CALL mpi_abort(kcomm,-1,error)
196 CALL getenv(
"HUGETLB_DEFAULT_PAGE_SIZE",val)
199 READ(val(1:i-1),*) pagesize
200 pagesize=pagesize*1024
205 WRITE(filename,
'(a,i0,a)')
"/sys/kernel/mm/hugepages/hugepages-", &
206 pagesize,
"kB/nr_hugepages" 208 IF(pagesize > 0)
THEN 209 OPEN(502,file=filename,status=
"old")
214 nodehuge=nodehuge*pagesize
218 OPEN(file=
"/proc/meminfo",unit=502)
221 IF(line(1:7) ==
"MemFree")
THEN 222 READ(line(9:80),*) memfree
223 ELSEIF(line(1:6) ==
"Cached")
THEN 224 READ(line(8:80),*) cached
229 nodehuge=nodehuge/1024
233 tasksmall=getmaxrss()/(1024*1024)
235 OPEN(file=
"/proc/buddyinfo",unit=502)
242 READ(line(22:),*,end=98) node0
244 n18 =
count(node0 >= 0)
246 READ(502,
'(a)',end=99) line
247 READ(line(22:),*) node1(1:n18)
254 bucket0(1)=node0(1)*4096
255 bucket1(1)=node1(1)*4096
257 bucket0(i)=node0(i)*4096
258 bucket1(i)=node1(i)*4096
260 bucket0(i)=bucket0(i)*2
261 bucket1(i)=bucket1(i)*2
268 smallpage0=smallpage0+bucket0(i)
269 smallpage1=smallpage1+bucket1(i)
274 hugepage0=hugepage0+bucket0(i)
275 hugepage1=hugepage1+bucket1(i)
278 smallpage0=smallpage0/(1024*1024)
279 smallpage1=smallpage1/(1024*1024)
280 hugepage0=hugepage0/(1024*1024)
281 hugepage1=hugepage1/(1024*1024)
283 heap_size=gethwm()/(1024*1024)
291 CALL mpi_recv(nodename(1:8),8,mpi_byte,i,itag,kcomm,irecv_status,error)
293 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_RECV" 294 CALL mpi_abort(kcomm,-1,error)
296 CALL mpi_recv(recvbuf(1:9),9,mpi_integer8,i,itag+1,kcomm,irecv_status,error)
298 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_RECV" 299 CALL mpi_abort(kcomm,-1,error)
301 IF(lastnode==nodename)
THEN 302 heap_size=heap_size+recvbuf(8)
303 tasksmall=tasksmall+recvbuf(9)
306 IF(heap_size > nodehuge)
THEN 308 percent_used(1)=100.0*(tasksmall+nodehuge)/(tasksmall+nodehuge+memfree+cached)
312 percent_used(1)=100.0*(heap_size+tasksmall)/(tasksmall+nodehuge+memfree+cached)
313 IF (nodehuge > 0)
THEN 314 nfree = hugepage0 + hugepage1
315 percent_used(2) = (100.0*(nodehuge - nfree))/nodehuge
319 IF (.not.llnocomm)
THEN 320 id_string = cstar//
":"//idstring
324 WRITE(kulout,
'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
325 & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
326 & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
327 & percent_used,trim(id_string)
329 WRITE(0,
'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
330 & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
331 & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
332 & percent_used,trim(id_string)
338 smallpage0=recvbuf(4)
339 smallpage1=recvbuf(5)
350 IF(heap_size > nodehuge)
THEN 352 percent_used(1)=100.0*(tasksmall+nodehuge)/(tasksmall+nodehuge+memfree+cached)
356 percent_used(1)=100.0*(heap_size+tasksmall)/(tasksmall+nodehuge+memfree+cached)
357 IF (nodehuge > 0)
THEN 358 nfree = hugepage0 + hugepage1
359 percent_used(2) = (100.0*(nodehuge - nfree))/nodehuge
363 IF (.not.llnocomm)
THEN 364 id_string = cstar//
":"//idstring
368 WRITE(kulout,
'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
369 & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
370 & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
371 & percent_used,trim(id_string)
374 WRITE(0,
'(a,i4,1x,a,3i8,2x,2i8,1x,2i8,2x,2i8,3x,2f6.1,a)') clpfx(1:ipfxlen)//
"## EC_MEMINFO ", &
375 & nodenum,lastnode,heap_size,tasksmall,nodehuge, &
376 & smallpage0,hugepage0,smallpage1,hugepage1,memfree,cached, &
377 & percent_used,trim(id_string)
382 CALL mpi_send(nodename(1:8),8,mpi_byte,0,itag,kcomm,error)
384 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_SEND" 385 CALL mpi_abort(kcomm,-1,error)
391 sendbuf(4)=smallpage0
392 sendbuf(5)=smallpage1
398 CALL mpi_send(sendbuf(1:9),9,mpi_integer8,0,itag+1,kcomm,error)
400 WRITE(0,*) clpfx(1:ipfxlen)//
"## EC_CRAY_MEMINFO error code ",error,
" from MPI_SEND" 401 CALL mpi_abort(kcomm,-1,error)
406 IF (.not.llnocomm)
CALL mpi_barrier(kcomm,error)
412 INTEGER(KIND=4),
INTENT(IN) :: KOUT, KSTEP
413 CHARACTER(LEN=32) CLSTEP
414 CHARACTER(LEN=160) :: LINE
415 CHARACTER(LEN=8) :: NODENAME
416 INTEGER(KIND=8) :: NODE(0:17), ISMALL, IHUGE, ITOTAL
417 INTEGER(KIND=4) :: I,INUMA,ICOMM
418 WRITE(clstep,
'(11X,"STEP",I5," :")') kstep
420 IF (kstep == 0) icomm = -1
429 OPEN(file=
"/proc/buddyinfo",unit=502,err=98)
434 READ(502,
'(a)',end=99) line
435 READ(line(22:160),*,err=99,end=99) node
438 ismall = ismall + node(i) * (2**i)
443 ihuge = ihuge + node(i) * (2**i)
445 itotal = ismall + ihuge
446 ismall = (ismall * 4096)/(1024*1024)
447 ihuge = (ihuge * 4096)/(1024*1024)
448 itotal = (itotal * 4096)/(1024*1024)
449 WRITE(kout,
'(" MEMINFO: STEP=",I0," ",A," NUMA# ",I0," : Free Total = SMALL + HUGEPAGES in MB: ",I0," = ",I0," + ",I0)') &
450 & kstep, nodename, inuma, itotal, ismall, ihuge
451 WRITE(kout,
'(" BUDDYINFO: STEP=",I0," ",A," NUMA# ",I0," : Count of free 2^(0..",I0,")*4096B blocks: ",A)') &
452 & kstep, nodename, inuma,
SIZE(node)-1, line(22:160)
void ec_gethostname(char a[], int alen)
subroutine ec_cray_meminfo(IU, IDSTRING, KCOMM)
subroutine meminfo(KOUT, KSTEP)
subroutine getarg(IARG, CLARG)