35 pthread_t parent_thread;
36 float report_interval;
40 struct thread_data
td;
48 int i, j, entry_made = 0;
49 for (i = 0; i < CPU_SETSIZE; i++) {
50 if (CPU_ISSET(i, mask)) {
53 for (j = i + 1; j < CPU_SETSIZE; j++) {
54 if (CPU_ISSET(j, mask)) run++;
58 sprintf(ptr,
"%d,", i);
60 sprintf(ptr,
"%d,%d,", i, i + 1);
63 sprintf(ptr,
"%d-%d,", i, i + run);
66 while (*ptr != 0) ptr++;
78 (void)gethostname(hostname,hostnamesize);
79 (void)sched_getaffinity(0,
sizeof(coremask), &coremask);
82 MPI_Comm_rank(MPI_COMM_WORLD,rank);
83 MPI_Comm_size(MPI_COMM_WORLD,numranks);
93 FILE *
fp=fopen(
"/proc/meminfo",
"r");
97 int label_length=strlen(label);
99 while(!done && fp!=NULL){
100 if (fgets(line,
sizeof(line),fp)) {
101 subline=strstr(line,label);
103 val=atoi(subline+label_length);
118 static int first_time = 1;
123 sprintf(fname,
"perfmon_report_%d.dat",
rank);
130 fprintf(
fp,
"#rtime ptime gflops l1-hit memusage(MB) freeMem(MB)\n");
133 if (!first_time)
return 0;
137 events[0]=PAPI_L1_DCM;
138 events[1]=PAPI_L1_DCH;
139 events[2]=PAPI_FP_OPS;
140 events[3]=PAPI_TOT_INS;
142 rc = (num = PAPI_num_counters());
144 PAPI_perror(rc,
"PAPI_num_counters", strlen(
"PAPI_num_counters"));
149 rc = PAPI_start_counters(events, NUM_EV);
161 static double prevrtime=0.0;
164 double gflops,l1hitratio;
165 PAPI_dmem_info_t dmem;
168 long_long end_usec_r,end_usec_p;
169 long_long prev_values[NUM_EV];
171 end_usec_r = PAPI_get_real_usec();
172 end_usec_p = PAPI_get_virt_usec();
178 PAPI_get_dmem_info(&dmem);
179 for(i=0;i<NUM_EV;i++)
183 l1hitratio=100.0*(double)(
papi_values[1]-prev_values[1])
185 gflops=(double)(
papi_values[2]-prev_values[2])/(rtime-prevrtime)/1e9;
188 memuse=(double)dmem.size/1.0e3;
192 fprintf(
fp,
" %f %f %f %f %f %f\n",
193 rtime,ptime,gflops,l1hitratio,memuse,freemem);
199 void coll_print(FILE *
fp,
const char *label,
double val,
int print_aggregate,MPI_Comm Comm){
205 MPI_Reduce(&val,&sum,1,MPI_DOUBLE,MPI_SUM,0,Comm);
208 fprintf(fp,
"#%19s %14.3f %10.3f ",label,sum,sum/
numranks);
210 fprintf(fp,
"#%19s %10.3f ",label,sum/
numranks);
213 MPI_Reduce(&in,&out,1,MPI_DOUBLE_INT,MPI_MINLOC,0,Comm);
215 fprintf(fp,
"%4d %10.3f ",out.rank,out.val);
217 MPI_Reduce(&in,&out,1,MPI_DOUBLE_INT,MPI_MAXLOC,0,Comm);
219 fprintf(fp,
"%4d %10.3f\n",out.rank,out.val);
226 double memuse,peakmem;
227 double gflops,avegflops;
229 PAPI_dmem_info_t dmem;
235 long_long end_usec_p;
236 long_long end_usec_r;
237 MPI_Comm Comm = comm ? *comm : MPI_COMM_WORLD;
240 end_usec_r = PAPI_get_real_usec();
241 end_usec_p = PAPI_get_virt_usec();
243 if(PAPI_accum_counters(
papi_values, NUM_EV) != PAPI_OK)
246 if(PAPI_get_dmem_info(&dmem)!=PAPI_OK)
256 fprintf(fp,
"####### CSC PERFMON REPORT \n");
257 fprintf(fp,
"# MPI tasks %d\n",
numranks);
258 fprintf(fp,
"# aggregated average min(rank/val) max(rank/val) \n");
261 coll_print(fp,
"Process time (s)",ptime,1,Comm);
262 coll_print(fp,
"Flops (GFlop/s)",avegflops,1,Comm);
263 coll_print(fp,
"Flp-opers (10^9)",gflop_opers,1,Comm);
264 coll_print(fp,
"L1 hit ratio (%)",l1hitratio,0,Comm);
265 coll_print(fp,
"Peak mem size (MB)", (
double)dmem.peak/1.0e3,0,Comm );
266 coll_print(fp,
"Peak resident (MB)", (
double)dmem.high_water_mark/1.0e3 ,0,Comm);
267 if(
rank==0) fprintf(fp,
"####### \n");
274 struct thread_data *tdloc= (
struct thread_data *) threadarg;
276 if(tdloc->report_interval>0){
278 int sec=(int)tdloc->report_interval;
279 int usec=(
int)((tdloc->report_interval-sec)*1.0e6);
282 if(sec>0) sleep(sec);
283 if(usec>0) usleep(usec);
285 pthread_kill(tdloc->parent_thread,SIGUSR1);
298 pthread_attr_t thread_attr;
299 int thread_id,thread_create_return;
309 envvar=getenv(
"PERFMON_INTERVAL");
311 td.report_interval=-1;
314 td.report_interval=atof(envvar);
325 if(
td.report_interval>=0.009999) {
333 td.parent_thread=pthread_self();
339 fprintf(stderr,
"Calling report_init(0)\n");
342 "Unable to init PAPI counters (init_error=%d) : %s\n",
352 int ret= __real_MPI_Init(argc, argv);
361 int ret= __real_MPI_Init(argc, argv, required, provided);
374 return __real_MPI_Finalize();
static void report_final(FILE *fp, const MPI_Comm *comm)
void csc_perfmon_begin_()
static void common_inits()
static void report_periodic()
void csc_perfmon_begin__()
static char * cpuset_to_cstr(cpu_set_t *mask, char *str)
static int report_init(int periodicreport)
static int parseMeminfo(const char *label)
static void * thread_worker(void *threadarg)
int __wrap_MPI_Init_thread(int *argc, char ***argv, int required, int *provided)
long_long papi_values[NUM_EV]
int __wrap_MPI_Init(int *argc, char ***argv)
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))
int __wrap_MPI_Finalize()
void dummy_Opfla_PERFMOM()
static void coll_print(FILE *fp, const char *label, double val, int print_aggregate, MPI_Comm Comm)
static void getProcessIdentity(char *hostname, int hostnamesize, char *affinity, int affinitysize, int *rank, int *numranks, pid_t *pid)
subroutine t(CDPREF, CDSUFF, KCODPA, LDNIVA, PMULTI)