SURFEX v8.1
General documentation of Surfex
opfla_perfmon.c
Go to the documentation of this file.
1 #ifdef PAPI
2 
3 #define _GNU_SOURCE
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <mpi.h>
7 #include <unistd.h> /* sleep(1) */
8 #include <pthread.h>
9 #include <signal.h>
10 #include <string.h>
11 #include <sched.h>
12 #include <papi.h>
13 
14 struct eval {
15  double val;
16  int rank;
17 };
18 
19 int rank; //rank
20 int numranks; //total number of ranks
21 char affinity[128];//core affinity
22 pid_t pid; //pid of process
23 char nodename[128]; //nodename
24 FILE *fp; //outputfil
25 
26 #define NUM_EV 4
27 
28 long_long papi_values[NUM_EV];
29 long_long start_usec_p;
30 long_long start_usec_r;
31 
33 
34 struct thread_data{
35  pthread_t parent_thread;
36  float report_interval;
37 };
38 
39 //global as it needs to live on also if master thread leaves MPI_Init...
40 struct thread_data td;
41 
42 
43 /* Borrowed from util-linux-2.13-pre7/schedutils/taskset.c */
44 static
45 char *cpuset_to_cstr(cpu_set_t *mask, char *str)
46 {
47  char *ptr = str;
48  int i, j, entry_made = 0;
49  for (i = 0; i < CPU_SETSIZE; i++) {
50  if (CPU_ISSET(i, mask)) {
51  int run = 0;
52  entry_made = 1;
53  for (j = i + 1; j < CPU_SETSIZE; j++) {
54  if (CPU_ISSET(j, mask)) run++;
55  else break;
56  }
57  if (!run)
58  sprintf(ptr, "%d,", i);
59  else if (run == 1) {
60  sprintf(ptr, "%d,%d,", i, i + 1);
61  i++;
62  } else {
63  sprintf(ptr, "%d-%d,", i, i + run);
64  i += run;
65  }
66  while (*ptr != 0) ptr++;
67  }
68  }
69  ptr -= entry_made;
70  *ptr = 0;
71  return(str);
72 }
73 
74 static
75 void getProcessIdentity(char *hostname,int hostnamesize,char *affinity,int affinitysize,int *rank,int *numranks,pid_t *pid){
76  cpu_set_t coremask;
77 
78  (void)gethostname(hostname,hostnamesize);
79  (void)sched_getaffinity(0, sizeof(coremask), &coremask);
80  cpuset_to_cstr(&coremask, affinity);
81 
82  MPI_Comm_rank(MPI_COMM_WORLD,rank);
83  MPI_Comm_size(MPI_COMM_WORLD,numranks);
84 
85  //pid
86  *pid=getpid();
87 }
88 
89 static
90 int parseMeminfo(const char *label){
91  int i;
92  int done=0;
93  FILE *fp=fopen("/proc/meminfo","r");
94  char line[120];
95  char *subline;
96  int val=-1;
97  int label_length=strlen(label);
98 
99  while(!done && fp!=NULL){
100  if (fgets(line,sizeof(line),fp)) {
101  subline=strstr(line,label);
102  if(subline!=NULL){
103  val=atoi(subline+label_length);
104  fclose(fp);
105  return val;
106  }
107  }
108  }
109  fclose(fp);
110  return val;
111 }
112 
113 static
114 int report_init(int periodicreport){
115  double rtime,ptime;
116  int events[NUM_EV];
117  int rc, num;
118  static int first_time = 1;
119 
120  /* open file if we use files and write headers*/
121  if(periodicreport){
122  char fname[40];
123  sprintf(fname,"perfmon_report_%d.dat",rank);
124  fp=fopen(fname,"w");
125  if(fp==NULL){
126  return 2;
127  }
128  //header
129  fprintf(fp,"# rank: %d nodename: %s core-affinity: %s \n",rank,nodename,affinity);
130  fprintf(fp,"#rtime ptime gflops l1-hit memusage(MB) freeMem(MB)\n");
131  }
132 
133  if (!first_time) return 0;
134  first_time = 0;
135 
136  //get papi info, first time it intializes PAPI counters/library
137  events[0]=PAPI_L1_DCM;
138  events[1]=PAPI_L1_DCH;
139  events[2]=PAPI_FP_OPS;
140  events[3]=PAPI_TOT_INS;
141 
142  rc = (num = PAPI_num_counters());
143  if (rc != PAPI_OK) {
144  PAPI_perror(rc, "PAPI_num_counters", strlen("PAPI_num_counters"));
145  }
146 
147  //fprintf(stderr,"PAPI_num_counters = %d\n",num);
148 
149  rc = PAPI_start_counters(events, NUM_EV);
150  if (rc != PAPI_OK) {
151  return rc;
152  }
153  start_usec_r=PAPI_get_real_usec();
154  start_usec_p=PAPI_get_virt_usec();
155  return 0;
156 }
157 
158 static
160  double rtime,ptime;
161  static double prevrtime=0.0;
162  double freemem;
163  double memuse;
164  double gflops,l1hitratio;
165  PAPI_dmem_info_t dmem;
166  int i;
167  //get papi info,
168  long_long end_usec_r,end_usec_p;
169  long_long prev_values[NUM_EV];
170 
171  end_usec_r = PAPI_get_real_usec();
172  end_usec_p = PAPI_get_virt_usec();
173 
174  rtime=(double)(end_usec_r-start_usec_r)/1e6;
175  ptime=(double)(end_usec_p-start_usec_p)/1e6;
176 
177  //get memdata
178  PAPI_get_dmem_info(&dmem);
179  for(i=0;i<NUM_EV;i++)
180  prev_values[i]=papi_values[i];
181  PAPI_accum_counters(papi_values, NUM_EV);
182 
183  l1hitratio=100.0*(double)(papi_values[1]-prev_values[1])
184  /(papi_values[0]+papi_values[1]-prev_values[0]-prev_values[1]);
185  gflops=(double)(papi_values[2]-prev_values[2])/(rtime-prevrtime)/1e9;
186 
187 
188  memuse=(double)dmem.size/1.0e3;
189  //get free memory
190  freemem=(double)parseMeminfo("MemFree:")/1.0e3;
191 
192  fprintf(fp," %f %f %f %f %f %f\n",
193  rtime,ptime,gflops,l1hitratio,memuse,freemem);
194  fflush(fp); //we should flush more seldomly?
195  prevrtime=rtime;
196 }
197 
198 static
199 void coll_print(FILE *fp, const char *label,double val,int print_aggregate,MPI_Comm Comm){
200  struct eval in;
201  struct eval out;
202  double sum;
203  in.val=val;
204  in.rank=rank;
205  MPI_Reduce(&val,&sum,1,MPI_DOUBLE,MPI_SUM,0,Comm);
206  if(rank==0){
207  if(print_aggregate)
208  fprintf(fp,"#%19s %14.3f %10.3f ",label,sum,sum/numranks);
209  else
210  fprintf(fp,"#%19s %10.3f ",label,sum/numranks);
211  }
212 
213  MPI_Reduce(&in,&out,1,MPI_DOUBLE_INT,MPI_MINLOC,0,Comm);
214  if(rank==0){
215  fprintf(fp,"%4d %10.3f ",out.rank,out.val);
216  }
217  MPI_Reduce(&in,&out,1,MPI_DOUBLE_INT,MPI_MAXLOC,0,Comm);
218  if(rank==0){
219  fprintf(fp,"%4d %10.3f\n",out.rank,out.val);
220  }
221 }
222 
223 static
224 void report_final(FILE *fp, const MPI_Comm *comm){
225  double rtime,ptime;
226  double memuse,peakmem;
227  double gflops,avegflops;
228  double gflop_opers;
229  PAPI_dmem_info_t dmem;
230  double val;
231  struct eval in;
232  struct eval out;
233  int error=0;
234  double l1hitratio;
235  long_long end_usec_p;
236  long_long end_usec_r;
237  MPI_Comm Comm = comm ? *comm : MPI_COMM_WORLD;
238 
239  //get papi info, first time it intializes PAPI counters
240  end_usec_r = PAPI_get_real_usec();
241  end_usec_p = PAPI_get_virt_usec();
242 
243  if(PAPI_accum_counters(papi_values, NUM_EV) != PAPI_OK)
244  error++;
245 
246  if(PAPI_get_dmem_info(&dmem)!=PAPI_OK)
247  error++;
248 
249  rtime=(double)(end_usec_r-start_usec_r)/1e6;
250  ptime=(double)(end_usec_p-start_usec_p)/1e6;
251  l1hitratio=100.0*(double)papi_values[1]/(papi_values[0]+papi_values[1]);
252  avegflops=(double)papi_values[2]/rtime/1e9;
253  gflop_opers = (double)papi_values[2]/1e9;
254 
255  if(rank==0 ){
256  fprintf(fp,"####### CSC PERFMON REPORT \n");
257  fprintf(fp,"# MPI tasks %d\n",numranks);
258  fprintf(fp,"# aggregated average min(rank/val) max(rank/val) \n");
259  }
260  coll_print(fp,"Real time (s)",rtime,1,Comm);
261  coll_print(fp,"Process time (s)",ptime,1,Comm);
262  coll_print(fp,"Flops (GFlop/s)",avegflops,1,Comm);
263  coll_print(fp,"Flp-opers (10^9)",gflop_opers,1,Comm);
264  coll_print(fp,"L1 hit ratio (%)",l1hitratio,0,Comm);
265  coll_print(fp,"Peak mem size (MB)", (double)dmem.peak/1.0e3,0,Comm );
266  coll_print(fp,"Peak resident (MB)", (double)dmem.high_water_mark/1.0e3 ,0,Comm);
267  if(rank==0) fprintf(fp,"####### \n");
268  fflush(fp);
269 }
270 
271 static
272 void *thread_worker(void *threadarg){
273  //we could of course also directly read global td
274  struct thread_data *tdloc= (struct thread_data *) threadarg;
275 //if zero then we do not report data
276  if(tdloc->report_interval>0){
277  //find out how many total seconds and mikroseconds to sleep
278  int sec=(int)tdloc->report_interval;
279  int usec=(int)((tdloc->report_interval-sec)*1.0e6);
280  //do not allow busy loop when interval less than usec
281  while(sec+usec>0){
282  if(sec>0) sleep(sec);
283  if(usec>0) usleep(usec);
284  //send signal to parent thread that it should report flops etc.
285  pthread_kill(tdloc->parent_thread,SIGUSR1);
286  }
287 
288  }
289 
290  return NULL;
291 }
292 
293 
294 static
296 {
297  pthread_t t;
298  pthread_attr_t thread_attr;
299  int thread_id,thread_create_return;
300  int temp;
301  char *envvar;
302 
303  //initialize global values for process identity
305 
306  //init parameters
307 
308 
309  envvar=getenv("PERFMON_INTERVAL");
310  if(envvar==NULL){
311  td.report_interval=-1; //default never
312  }
313  else {
314  td.report_interval=atof(envvar);
315  }
316 
317 
318 
319  /* if report interval is larger or equal to 10ms then do "unsafe"
320  periodic reporting stuff; start up signal handling and launch
321  thread
322 
323  */
324 
325  if(td.report_interval>=0.009999) {
326  //initialize PAPI counters with periodic reporting
328  if(!init_error){
329  //print PAPI counters when receiving signal USR1
330  signal(SIGUSR1,report_periodic);
331 
332  //launch sampling thread
333  td.parent_thread=pthread_self();
334  temp=pthread_create(&t,NULL,thread_worker,(void *)&td);
335  }
336  }
337  else {
338  //initialize PAPI counters without periodic reporting
339  fprintf(stderr,"Calling report_init(0)\n");
341  if (init_error) fprintf(stderr,
342  "Unable to init PAPI counters (init_error=%d) : %s\n",
343  init_error,
344  PAPI_strerror(init_error));
345  }
346 }
347 
348 
349 int __wrap_MPI_Init(int *argc, char ***argv)
350 {
351  //call true MPI_Init
352  int ret= __real_MPI_Init(argc, argv);
353  common_inits();
354  return ret;
355 }
356 
357 
358 int __wrap_MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
359 {
360  //call true MPI_Init_thread
361  int ret= __real_MPI_Init(argc, argv, required, provided);
362  common_inits();
363  return ret;
364 }
365 
366 
367 
369 {
370 
371  //only print if startup was without errors
372  if(!init_error)
373  report_final(stdout,NULL);
374  return __real_MPI_Finalize();
375 }
376 
377 
382 
383 void csc_perfmon_end_ () { if(!init_error) report_final(stdout,NULL); }
387 
388 #else
389 
391 
392 #endif
int numranks
Definition: opfla_perfmon.c:20
static void report_final(FILE *fp, const MPI_Comm *comm)
long_long start_usec_p
Definition: opfla_perfmon.c:29
void csc_perfmon_end__()
void csc_perfmon_begin_()
void csc_perfmon_end_()
static void common_inits()
static void report_periodic()
long_long start_usec_r
Definition: opfla_perfmon.c:30
char affinity[128]
Definition: opfla_perfmon.c:21
void csc_perfmon_begin__()
static char * cpuset_to_cstr(cpu_set_t *mask, char *str)
Definition: opfla_perfmon.c:45
FILE * fp
Definition: opfla_perfmon.c:24
static int report_init(int periodicreport)
static int parseMeminfo(const char *label)
Definition: opfla_perfmon.c:90
static void * thread_worker(void *threadarg)
int __wrap_MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
long_long papi_values[NUM_EV]
Definition: opfla_perfmon.c:28
int __wrap_MPI_Init(int *argc, char ***argv)
int init_error
Definition: opfla_perfmon.c:32
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))
static int mask
Definition: ifssig.c:38
int __wrap_MPI_Finalize()
void dummy_Opfla_PERFMOM()
void CSC_PERFMON_BEGIN()
void csc_perfmon_begin()
static void coll_print(FILE *fp, const char *label, double val, int print_aggregate, MPI_Comm Comm)
void csc_perfmon_end()
static void getProcessIdentity(char *hostname, int hostnamesize, char *affinity, int affinitysize, int *rank, int *numranks, pid_t *pid)
Definition: opfla_perfmon.c:75
char nodename[128]
Definition: opfla_perfmon.c:23
int rank
Definition: opfla_perfmon.c:19
struct thread_data td
Definition: opfla_perfmon.c:40
pid_t pid
Definition: opfla_perfmon.c:22
subroutine t(CDPREF, CDSUFF, KCODPA, LDNIVA, PMULTI)
Definition: faicor.F90:567
void CSC_PERFMON_END()