SURFEX v8.1
General documentation of Surfex
drhook.c
Go to the documentation of this file.
1 #define _DRHOOK_C_ 1
2 
3 #define HOST_NAME_MAX 100
4 
5 /*
6  drhook.c
7 
8  Author: Sami Saarinen, ECMWF, 14..24-Nov-2003
9 
10  Thanks to Bob Walkup & John Hague for IBM Power4 version
11  Thanks to Bob Carruthers for Cray X1 (SV2), XD1 and XT3 versions,
12  as well as David Tanqueray for the flop routines
13 
14  Also thanks to Roland Richter for suggesting the use
15  of "call tracebackqq()" function.
16  In our environment this is accomplished by calling fortran
17  routine intel_trbk() from ifsaux/utilities/gentrbk.F90.
18  This source must be compiled with -DINTEL flag, too.
19 
20 */
21 
22 /*
23 If intending to run on IBM P4+ or newer systems the following definition
24 should be activated to use pm_initialize() instead of pm_init() of PMAPI-lib ($LIBHPM)
25 #define PMAPI_POST_P4
26 */
27 
28 /*
29 If *ALSO* intending to run on IBM P5+ systems, then set also BOTH
30 #define PMAPI_POST_P4
31 #define PMAPI_P5_PLUS
32 */
33 
34 /* Thanks to John Hague (IBM)
35  If intending to run on IBM p6 systems, then set also BOTH
36 #define PMAPI_POST_P4
37 #define PMAPI_P6
38  */
39 
40 #ifndef INTEL
41 #ifdef __INTEL_COMPILER
42 #define INTEL
43 #endif
44 #endif
45 
46 #if defined(PMAPI_P7)
47 #define ENTRY_4 5
48 #define ENTRY_6 4
49 #elif defined(PMAPI_P6)
50 #define ENTRY_4 5
51 #define ENTRY_6 4
52 #elif defined(PMAPI_P5_PLUS)
53 #define ENTRY_4 5
54 #define ENTRY_6 4
55 #else
56 #define ENTRY_4 4
57 #define ENTRY_6 6
58 #endif
59 
60 #if defined(SV2) || defined(XD1) || defined(XT3)
61 #define DT_FLOP
62 #define HPM
63 #define MAX_COUNTERS 6
64 #endif
65 
66 #ifdef RS6K
67 #pragma options opt=3 halt=e
68 #include <pthread.h>
69 #endif
70 
71 /* === This doesn't handle recursive calls correctly (yet) === */
72 
73 #include "drhook.h"
74 
75 static void set_timed_kill();
76 static void process_options();
77 static char *TimeStr(char *s, int slen);
78 
79 int drhook_memtrace = 0; /* set to 1, if opt_memprof or opt_timeline ; used in getcurheap.c to lock stuff */
80 
81 #if !defined(CACHELINESIZE)
82 /* ***Note: A hardcoded cache line size in bytes !!! */
83 #ifdef RS6K
84 #define CACHELINESIZE 128
85 #else
86 #define CACHELINESIZE 64
87 #endif
88 #endif
89 
90 #include "crc.h"
91 #include <time.h>
92 
93 static char *start_stamp = NULL;
94 static char *end_stamp = NULL;
95 
96 #if defined(NECSX)
97 #pragma cdir options -Nv -Csopt
98 extern void necsx_trbk_(const char *msg, int msglen); /* from ../utilities/gentrbk.F90 */
99 #endif
100 
101 #if defined(LINUX) && !defined(XT3) && !defined(XD1) && !defined(CYGWIN)
102 
103 #if defined(__GNUC__) && !defined(NO_TRAPFPE)
104 
105 #define _GNU_SOURCE 1
106 #if defined(CYGWIN)
107 #include <mingw/fenv.h>
108 #else
109 #include <fenv.h>
110 #endif
111 #if defined(DARWIN)
112  /* A temporary fix to link on MacIntosh. Something more clever will be done later -REK. */
113 void feenableexcept() { }
114 void fedisableexcept() { }
115 #endif
116 
117 static void trapfpe(void)
118 {
119  /* Enable some exceptions. At startup all exceptions are masked. */
120  (void) feenableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
121 }
122 
123 static void untrapfpe(void)
124 {
125  /* Disable some exceptions. At startup all exceptions are masked. */
126  (void)fedisableexcept(FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW);
127 }
128 
129 #endif /* defined(__GNUC__) */
130 
131 #endif /* defined(LINUX) && !defined(XT3) && !defined(XD1) */
132 
133 #if (!defined(LINUX) || defined(CYGWIN) || defined(NO_TRAPFPE)) && defined(__GNUC__)
134 /* For example Solaris with gcc */
135 #define trapfpe()
136 #define untrapfpe()
137 #endif
138 
139 #ifndef drhook_harakiri_timeout_default
140 #define drhook_harakiri_timeout_default 500
141 #endif
142 
143 static int drhook_harakiri_timeout = drhook_harakiri_timeout_default;
144 static int drhook_trapfpe = 1;
145 
146 static int atp_enabled = 0; /* Cray ATP specific */
147 static int atp_max_cores = 20; /* Cray ATP specific */
148 static int atp_max_analysis_time = 300; /* Cray ATP specific */
149 static int atp_ignore_sigterm = 0; /* Cray ATP specific */
150 
151 static int any_memstat = 0;
152 static int opt_gethwm = 0;
153 static int opt_getstk = 0;
154 static int opt_getrss = 0;
155 static int opt_getpag = 0;
156 static int opt_walltime = 0;
157 static int opt_cputime = 0;
158 static int opt_wallprof = 0;
159 static int opt_cpuprof = 0;
160 static int opt_hpmprof = 0;
161 static int opt_memprof = 0;
162 static int opt_trim = 0;
163 static int opt_calls = 0;
164 static int opt_self = 1; /* 0=exclude drhook altogether,
165  1=include, but don't print,
166  2=also print */
167 static int opt_propagate_signals = 1;
168 static int opt_sizeinfo = 1;
169 static int opt_clusterinfo = 0;
170 static int opt_callpath = 0;
171 #define callpath_indent_default 2
172 static int callpath_indent = callpath_indent_default;
173 #define callpath_depth_default 50
174 static int callpath_depth = callpath_depth_default;
175 static int callpath_packed = 0;
176 
177 static int opt_calltrace = 0;
178 static int opt_funcenter = 0;
179 static int opt_funcexit = 0;
180 
181 static int opt_timeline = 0; /* myproc or -1 [or 0 for --> timeline feature off (default)] */
182 static int opt_timeline_thread = 1; /* thread-id control :
183  <= 0 print for all threads
184  1 -> #1 only [but curheap still SUM of all threads] (default),
185  n -> print for increasing number of threads separately : [1..n] */
186 static int opt_timeline_format = 1; /* if 1, print only {wall,hwm,rss,curheap} w/o labels "wall=" etc.; else fully expanded fmt */
187 static int opt_timeline_unitno = 6; /* Fortran unit number : default = 6 i.e. stdout */
188 static long long int opt_timeline_freq = 1000000; /* How often to print : every n-th call : default = every 10^6 th call or ... */
189 static double opt_timeline_MB = 1.0; /* ... rss or curheap jumps up/down by more than this many MBytes (default = 1) : unit MBytes */
190 
191 static volatile sig_atomic_t opt_gencore = 0;
192 static int opt_gencore_signal = 0;
193 
194 static int hpm_grp = 0;
195 static int opt_random_memstat = 0; /* > 0 if to obtain random memory stats (maxhwm, maxstk) for tid=1. Updated when rand() % opt_random_memstat == 0 */
196 
197 /* Begin of developer options */
198 static char *drhook_timed_kill = NULL; /* Timer assisted simulated kill of procs/threads by signal */
199 static int drhook_dump_smaps = 0; /* Print /proc/<tid>/smaps from signal handler (before moving to ATP or below) */
200 static int drhook_dump_buddyinfo = 0; /* Print /proc/buddyinfo from signal handler (before moving to ATP or below) */
201 static int drhook_dump_hugepages = 0;
202 static double drhook_dump_hugepages_freq = 0;
203 /* End of developer options */
204 
205 typedef struct drhook_timeline_t {
206  unsigned long long int calls[2]; /* 0=drhook_begin , 1=drhook_end */
207  double last_curheap_MB;
208  double last_rss_MB;
209  char pad[CACHELINESIZE - (2*sizeof(unsigned long long int) + 2*sizeof(double))]; /* padding : e.g. 64 bytes - 4*8 bytes */
210 } drhook_timeline_t; /* cachelinesize optimized --> less false sharing when running with OpenMP */
211 
213 
214 /* HPM-specific */
215 
216 static long long int opt_hpmstop_threshold = -1;
217 static double opt_hpmstop_mflops = 1000000.0; /* Yes, 1 PetaFlop/s !! */
218 
219 
220 #define DRHOOK_STRBUF 1000
221 
222 #ifndef SA_SIGINFO
223 #define SA_SIGINFO 0
224 #define SIG_EXTRA_ARGS /* empty */
225 #define SIG_PASS_EXTRA_ARGS /* empty */
226 #else
227 #define SIG_EXTRA_ARGS , siginfo_t *sigcode, void *sigcontextptr
228 #define SIG_PASS_EXTRA_ARGS , sigcode, sigcontextptr
229 #endif
230 
231 #define NIL "(nil)"
232 
233 #undef MIN
234 #define MIN(a,b) ( (a) < (b) ? (a) : (b) )
235 
236 #undef MAX
237 #define MAX(a,b) ( (a) > (b) ? (a) : (b) )
238 
239 #undef ABS
240 #define ABS(x) ( (x) >= 0 ? (x) : -(x) )
241 
242 #define strequ(s1,s2) ((void *)s1 && (void *)s2 && strcmp(s1,s2) == 0)
243 #define strnequ(s1,s2,n) ((void *)s1 && (void *)s2 && memcmp(s1,s2,n) == 0)
244 
245 extern long long int getstk_();
246 extern long long int getmaxstk_();
247 extern long long int gethwm_();
248 extern long long int getmaxhwm_();
249 extern long long int getrss_();
250 extern long long int getmaxrss_();
251 extern long long int getcurheap_();
252 extern long long int getmaxcurheap_();
253 extern long long int getcurheap_thread_(const int *tidnum); /* *tidnum >= 1 && <= max_threads */
254 extern long long int getmaxcurheap_thread_(const int *tidnum); /* *tidnum >= 1 && <= max_threads */
255 extern long long int getpag_();
256 
257 extern void ec_set_umask_();
258 
259 #if defined(DT_FLOP)
260 extern double flop_();
261 #endif
262 
263 extern double util_cputime_();
264 extern double util_walltime_();
265 
266 #ifdef RS6K
267 static long long int irtc_start = 0;
268 extern long long int irtc();
269 #define WALLTIME() ((double)(irtc() - irtc_start)*1.0e-9)
270 #define CPUTIME() util_cputime_()
271 #elif defined(CRAYXT)
272 /* Cray XT3/XT4 with catamount microkernel */
273 #include <catamount/dclock.h>
274 static double dclock_start = 0;
275 #define WALLTIME() (dclock() - dclock_start)
276 #define CPUTIME() WALLTIME()
277 #else
278 #if defined(SV2)
279 #include <intrinsics.h>
280 #endif
281 #if defined(XD1) || defined(XT3)
282 extern long long int irtc_(); /* integer*8 irtc() */
283 extern long long int irtc_rate_(); /* integer*8 irtc_rate() */
284 #endif
285 #if defined(SV2) || defined(XD1) || defined(XT3)
286 static long long int irtc_start = 0;
287 static double my_irtc_rate = 0;
288 static double my_inv_irtc_rate = 0;
289 #if defined(SV2)
290 #define WALLTIME() ((double)(_rtc() - irtc_start)*my_inv_irtc_rate)
291 #else
292 #define WALLTIME() ((double)(irtc_() - irtc_start)*my_inv_irtc_rate)
293 #endif
294 #define CPUTIME() util_cputime_()
295 #else
296 #define WALLTIME() util_walltime_()
297 #define CPUTIME() util_cputime_()
298 #endif
299 #endif
300 
301 /* #define RAISE(x) { int tmp = x; c_drhook_raise_(&tmp); } */
302 #include "raise.h"
303 #include "cargs.h"
304 
305 extern int get_thread_id_();
306 extern void LinuxTraceBack(const char *prefix, const char *timestr, void *sigcontextptr);
307 
308 /*** typedefs ***/
309 
310 typedef union {
311  struct drhook_key_t *keyptr;
312  double d;
313  unsigned long long int ull;
314 } equivalence_t;
315 
316 typedef struct drhook_key_t {
317  char *name;
318  unsigned short name_len;
319  const equivalence_t *callpath; /* parent's tree down to callpath_depth */
320  int callpath_len;
321  unsigned int callpath_fullhash;
322  unsigned short status; /* 0=inactive, >1 active */
323  unsigned long long int calls;
324  long long int hwm, maxrss, rssnow, stack, maxstack, paging;
325  double wall_in, delta_wall_all, delta_wall_child;
326  double cpu_in, delta_cpu_all, delta_cpu_child;
327 #ifdef HPM
328  unsigned char hpm_stopped, counter_stopped;
329  double this_delta_wall_child;
330  double avg_mipsrate, avg_mflops;
331  unsigned long long int hpm_calls;
332  double mip_count_in, mflop_count_in;
333  long long int *counter_in, *counter_sum;
334 #endif
335  char *filename; /* the filename where the 1st call (on this routine-name)
336  to dr_hook() occurred */
337  long long int sizeinfo; /* # of data elements, bytes, etc. */
338  long long int min_sizeinfo, max_sizeinfo; /* min & max of # of data elements, bytes, etc. */
339  /* memprof specific */
340  long long int mem_seenmax;
341  long long int mem_child, mem_curdelta;
342  long long int maxmem_selfdelta, maxmem_alldelta;
343  long long int mem_maxhwm, mem_maxrss, mem_maxstk, mem_maxpagdelta;
344  long long int paging_in;
345  unsigned long long int alloc_count, free_count;
346  struct drhook_key_t *next;
347 } drhook_key_t;
348 
349 typedef struct drhook_calltree_t {
350  int active;
351  drhook_key_t *keyptr;
352  struct drhook_calltree_t *next;
353  struct drhook_calltree_t *prev;
355 
356 typedef struct drhook_sig_t {
357  char name[32];
358  struct sigaction new;
359  struct sigaction old;
360  int active;
361  int ignore_atexit;
362 } drhook_sig_t;
363 
364 typedef union {
365  void (*func1args)(int sig);
366  void (*func3args)(int sig SIG_EXTRA_ARGS);
367 } drhook_sigfunc_t;
368 
369 typedef struct drhook_prof_t {
370  double pc;
371  double total;
372  double self;
373  unsigned long long int calls;
374  double percall_ms_self;
375  double percall_ms_total;
376  double mipsrate, mflops, divpc;
377  int index;
378  int tid;
379  int cluster;
380  double *maxval;
381  unsigned char is_max;
382  char *name;
383  char *filename;
384  long long int sizeinfo;
385  long long int min_sizeinfo, max_sizeinfo;
386  double sizespeed, sizeavg;
387  const equivalence_t *callpath; /* parent's tree down to callpath_depth */
388  int callpath_len;
389 } drhook_prof_t;
390 
391 typedef struct drhook_memprof_t {
392  double pc;
393  long long int self;
394  long long int children;
395  long long int hwm, rss, stk, pag, leaked;
396  unsigned long long int calls, alloc_count, free_count;
397  int index;
398  int tid;
399  int cluster;
400  long long int *maxval;
401  unsigned char is_max;
402  char *name;
403  char *filename;
404  const equivalence_t *callpath; /* parent's tree down to callpath_depth */
405  int callpath_len;
407 
408 #define MAX_WATCH_FIRST_NBYTES 8
409 
410 typedef struct drhook_watch_t {
411  char *name;
412  int tid;
413  int active;
414  int abort_if_changed;
415  const char *ptr;
416  int nbytes;
417  int watch_first_nbytes;
418  char first_nbytes[MAX_WATCH_FIRST_NBYTES];
419  unsigned int crc32;
420  int printkey;
421  int nvals;
422  struct drhook_watch_t *next;
424 
425 typedef struct drhook_prefix_t {
426  char s[256];
427  char timestr[128];
428  int nsigs;
430 
431 /*** static (local) variables ***/
432 
434 static int numthreads = 0;
435 static int myproc = 1;
436 static int nproc = -1;
437 static int max_threads = 1;
438 static pid_t pid = -1;
439 static drhook_key_t **keydata = NULL;
440 static drhook_calltree_t **calltree = NULL;
441 static drhook_calltree_t **thiscall = NULL;
442 static int signals_set = 0;
443 static volatile sig_atomic_t signal_handler_called = 0;
444 static volatile sig_atomic_t signal_handler_ignore_atexit = 0;
445 static volatile sig_atomic_t unlimited_corefile_retcode = 9999;
446 static volatile unsigned long long int saved_corefile_hardlimit = 0;
447 static int allow_coredump = -1; /* -1 denotes ALL MPI-tasks, 1..NPES == myproc, 0 = coredump will not be enabled by DrHook at init */
448 static drhook_sig_t siglist[1+NSIG] = { 0 };
449 static char *a_out = NULL;
450 static char *mon_out = NULL;
451 static int mon_out_procs = -1;
452 static double percent_limit = -10; /* Lowest percentage accepted into the printouts */
453 static drhook_key_t **keyself = NULL; /* pointers to itself (per thread) */
454 static double *overhead; /* Total Dr.Hook-overhead for every thread in either WALL or CPU secs */
455 static drhook_key_t **curkeyptr = NULL; /* pointers to current keyptr (per thread) */
456 static drhook_watch_t *watch = NULL;
457 static drhook_watch_t *last_watch = NULL;
458 static int watch_count = 0; /* No. of *active* watch points */
459 static drhook_prefix_t *ec_drhook = NULL;
460 static int timestr_len = 0;
461 
462 #define PREFIX(tid) (ec_drhook && tid >= 1 && tid <= numthreads) ? ec_drhook[tid-1].s : ""
463 #define TIDNSIGS(tid) (ec_drhook && tid >= 1 && tid <= numthreads) ? ec_drhook[tid-1].nsigs : -1
464 #define TIMESTR(tid) (timestr_len > 0 && ec_drhook && tid >= 1 && tid <= numthreads) ? TimeStr(ec_drhook[tid-1].timestr,timestr_len) : ""
465 #define FFL __FUNCTION__,__FILE__,__LINE__
466 
467 #ifndef SYS_gettid
468 #define SYS_gettid __NR_gettid
469 #endif
470 
471 static pid_t gettid() {
472  pid_t tid = syscall(SYS_gettid);
473  return tid;
474 }
475 
476 
477 #if !defined(NCALLSTACK)
478 #ifdef PARKIND1_SINGLE
479 /* > 0 : USE call stack approach : needed for single precision version */
480 #define NCALLSTACK 64
481 #else
482 /* == 0 : do NOT use call stack approach : usually for double precision version */
483 #define NCALLSTACK 0
484 #endif
485 #endif
486 static int cstklen = NCALLSTACK;
487 
488 #define HASHSIZE(n) ((unsigned int)1<<(n))
489 #define HASHMASK(n) (HASHSIZE(n)-1)
490 
491 #define NHASH 16
492 #define NHASHMAX 24
493 static int nhash = NHASH;
494 static unsigned int hashsize = HASHSIZE(NHASH);
495 static unsigned int hashmask = HASHMASK(NHASH);
496 
497 #ifdef HPM
498 /* HPM-specific (static) protos */
499 
500 static void stopstart_hpm(int tid, drhook_key_t *pstop, drhook_key_t *pstart);
501 static void stop_only_hpm(int tid, drhook_key_t *pstop);
502 static void init_hpm(int tid);
503 static double mflops_hpm(const drhook_key_t *keyptr);
504 static double mips_hpm(const drhook_key_t *keyptr);
505 static double divpc_hpm(const drhook_key_t *keyptr);
506 static double mflop_count(const drhook_key_t *keyptr);
507 static double mip_count(const drhook_key_t *keyptr);
508 
509 #else
510 /* Dummies for HPM as macros that do nothing */
511 
512 #define stopstart_hpm(tid, pstop, pstart)
513 #define stop_only_hpm(tid, pstop)
514 #define init_hpm(tid)
515 #define mflops_hpm(keyptr) 0
516 #define mips_hpm(keyptr) 0
517 #define divpc_hpm(keyptr) 0
518 #define mflop_count(keyptr) 0
519 #define mip_count(keyptr) 0
520 
521 #endif
522 
523 /*--- spin ---*/
524 
525 static int spin(int secs) {
526  struct timespec req, rem;
527  req.tv_sec = secs;
528  req.tv_nsec = 0;
529  return nanosleep(&req, &rem);
530 }
531 
532 /*--- dump_file ---*/
533 
534 static void dump_file(const char *pfx, int tid, int sig, int nsigs, const char filename[])
535 {
536  /* Developer option: Will this spoil our ATP trace ... ? */
537  FILE *fp;
538  char in[256];
539  char *tst = TIMESTR(tid);
540  if (sig > 0 && nsigs >= 1) {
541  fprintf(stderr,
542  "%s %s [%s@%s:%d] Developer option shows content of the file '%s', signal#%d, nsigs = %d\n",
543  pfx,tst,FFL,filename,sig,nsigs);
544  }
545  else {
546  fprintf(stderr,
547  "%s %s [%s@%s:%d] Developer option shows content of the file '%s'\n",
548  pfx,tst,FFL,filename);
549  }
550  fp = fopen(filename,"r");
551  if (fp) {
552  while (fgets(in,sizeof(in),fp) == in) {
553  fprintf(stderr,"%s %s [%s@%s:%d] %s",pfx,tst,FFL,in);
554  /* fprintf(stderr,"%s",in); */
555  }
556  fclose(fp);
557  }
558 }
559 
560 /*--- dump_hugepages ---*/
561 
562 static void dump_hugepages(int enforce, const char *pfx, int tid, int sig, int nsigs)
563 {
564  if (enforce || drhook_dump_hugepages) {
565  if (enforce || tid == 1) { /* OML-thread id >= 1 */
566  static double next_scheduled = -1;
567  double wt = WALLTIME();
568  if (enforce || wt > next_scheduled) {
569  const int kcomm = -1;
570  const int ftnunitno = 0; /* stderr */
571  fflush(NULL);
572  ec_cray_meminfo_(&ftnunitno,pfx,&kcomm,strlen(pfx));
573  fflush(NULL);
574  if (drhook_dump_buddyinfo) {
575  dump_file(pfx,tid,sig,nsigs,"/proc/buddyinfo");
576  dump_file(pfx,tid,sig,nsigs,"/proc/meminfo");
577  }
578  wt = WALLTIME();
579  next_scheduled = wt + drhook_dump_hugepages_freq;
580  }
581  }
582  }
583 }
584 
585 
586 /*--- set_default_handler ---*/
587 
588 static int set_unlimited_corefile(unsigned long long int *hardlimit);
589 
590 static int set_default_handler(int sig, int unlimited_corefile, int verbose)
591 {
592  int rc = -2;
593  if (sig >= 1 && sig <= NSIG) {
594  unsigned long long int hardlimit = 0;
595  struct sigaction sa = { 0 };
596  sa.sa_handler = SIG_DFL;
597  sigemptyset(&sa.sa_mask);
598  /*
599  sigfillset(&sa.sa_mask); -- if we wanted to block all (catchable) signals whilst in subsequent signal handler SIG_DFL
600  sigaddset(&sa.sa_mask, some_signal_to_be_blocked); ... just in case
601  */
602  sigaction(sig, &sa, NULL);
603  if (unlimited_corefile) rc = set_unlimited_corefile(&hardlimit); /* unconditionally */
604  if (verbose) {
605  int tid = get_thread_id_();
606  char *pfx = PREFIX(tid);
607  char buf[128] = "";
608  if (unlimited_corefile && rc == 0) snprintf(buf,sizeof(buf)," -- hardlimit for core file is now %llu (0x%llx)", hardlimit, hardlimit);
609  fprintf(stderr,
610  "%s %s [%s@%s:%d] "
611  "Enabled default signal handler (SIG_DFL) for signal#%d%s\n",
612  pfx,TIMESTR(tid),FFL,
613  sig,buf);
614  }
615  }
616  return rc;
617 }
618 
619 /*--- malloc_drhook ---*/
620 
621 static void *
623 {
624  size_t size1 = MAX(1,size);
625  void *p = malloc(size1);
626  if (!p) {
627  fprintf(stderr,
628  "***Error in malloc_drhook(): Unable to allocate space for %lld bytes\n",
629  (long long int)size1);
630  RAISE(SIGABRT);
631  }
632  return p;
633 }
634 
635 /*--- calloc_drhook ---*/
636 
637 static void *
638 calloc_drhook(size_t nmemb, size_t size)
639 {
640  size_t n = nmemb * size;
641  void *p = malloc_drhook(n);
642  memset(p,0,n);
643  return p;
644 }
645 
646 /*--- free_drhook ---*/
647 
648 #define free_drhook(x) { if (x) { free(x); x = NULL; } }
649 
650 /*--- callstack ---*/
651 
652 /* Note: For single precision calls -- small performance penalty */
653 
654 typedef struct callstack_t {
655  drhook_key_t **keyptr;
656  unsigned int next;
657  unsigned int maxdepth;
658 } callstack_t;
659 
660 static callstack_t **cstk = NULL;
661 
662 static drhook_key_t *callstack(int tid, void *key, drhook_key_t *keyptr)
663 {
664  /* Single routine -- two usages:
665 
666  (1) Upon c_drhook_start_() we call:
667 
668  (void) callstack(tid, key, u.keyptr);
669  - store keyptr into thread specific call stack
670  - fill *key up to 4-bytes index stating the position in the aforementioned call stack
671 
672  (2) Upon c_drhook_end_() we call:
673 
674  u.keyptr = callstack(tid, (void *)key, NULL);
675  - pass 4-byte index in
676  - obtain keyptr from call stack
677  - decrement call stack
678 
679  */
680 
681  static const unsigned int inc = 64;
682  unsigned int idx, *Index = key;
683  callstack_t *c = cstk[tid-1];
684  if (keyptr) {
685  if (!c) {
686  cstk[tid-1] = c = calloc_drhook(1, sizeof(*c));
687  c->keyptr = (drhook_key_t **) calloc_drhook(cstklen, sizeof(drhook_key_t *));
688  c->next = 0;
689  c->maxdepth = cstklen;
690  }
691  idx = (c->next)++;
692  if (idx >= c->maxdepth) {
693  drhook_key_t **kptr;
694  unsigned int maxdepth = idx + inc;
695  char *pfx = PREFIX(tid);
696  fprintf(stderr,
697  "%s %s [%s@%s:%d] "
698  "Call stack index %u out of range [0,%u) : extending the range to [0,%u) for this thread\n",
699  pfx,TIMESTR(tid),FFL,
700  idx,c->maxdepth,maxdepth);
701  kptr = (drhook_key_t **) calloc_drhook(maxdepth, sizeof(drhook_key_t *));
702  memcpy(kptr,c->keyptr,c->maxdepth * sizeof(drhook_key_t *));
703  free_drhook(c->keyptr);
704  c->keyptr = kptr;
705  c->maxdepth = maxdepth;
706  }
707  if (idx >= c->maxdepth) {
708  char *pfx = PREFIX(tid);
709  fprintf(stderr,
710  "%s %s [%s@%s:%d] "
711  "Call stack index %u still out of range [0,%u). Aborting ...\n",
712  pfx,TIMESTR(tid),FFL,
713  idx,c->maxdepth);
714  RAISE(SIGABRT);
715  }
716  c->keyptr[idx] = keyptr;
717  *Index = idx;
718  }
719  else {
720  idx = --(c->next);
721  if (idx != *Index) {
722  char *pfx = PREFIX(tid);
723  fprintf(stderr,
724  "%s %s [%s@%s:%d] "
725  "Invalid index to call stack %u : out of range [0,%u). Expecting the exact value of %u\n",
726  pfx,TIMESTR(tid),FFL,
727  idx,c->maxdepth,*Index);
728  RAISE(SIGABRT);
729  }
730  keyptr = c->keyptr[idx];
731  }
732  return keyptr;
733 }
734 
735 /*--- strdup_drhook ---*/
736 
737 static char *
738 strdup_drhook(const char *s)
739 {
740  int n = strlen(s);
741  char *p = malloc_drhook(n+1);
742  memcpy(p,s,n);
743  p[n] = 0;
744  return p;
745 }
746 
747 /*--- strdup2_drhook ---*/
748 
749 static char *
750 strdup2_drhook(const char *s, int s_len)
751 {
752  int n = s_len;
753  char *p = malloc_drhook(n+1);
754  memcpy(p,s,n);
755  p[n] = 0;
756  return p;
757 }
758 
759 /*--- timestamp ---*/
760 
761 static char *
763 {
764  time_t tp;
765  const int bufsize = 64;
766  char *buf = malloc_drhook(bufsize+1);
767  time(&tp);
768  strftime(buf, bufsize, "%Y%m%d %H%M%S", localtime(&tp));
769  return buf;
770 }
771 
772 /*--- TimeStr ---*/
773 
774 static char *
775 TimeStr(char *s, int slen)
776 {
777  if (s) {
778  time_t tp;
779  char buf[64];
780  time(&tp);
781  strftime(buf, sizeof(buf), "%Y%m%d:%H%M%S", localtime(&tp));
782  snprintf(s,slen,"[%s:%lld:%.3f]",buf,(long long int)tp,WALLTIME());
783  }
784  return s;
785 }
786 
787 /*--- hashfunc ---*/
788 
789 unsigned int
790 hashfunc(const char *s, int s_len)
791 {
792  unsigned int hashval;
793  if (opt_trim) {
794  for (hashval = 0; s_len>0 ; s++, s_len--) {
795  unsigned char c = islower(*s) ? toupper(*s) : *s;
796  hashval = (hashval<<4)^(hashval>>28)^(c);
797  }
798  }
799  else {
800  for (hashval = s_len; s_len>0 ; s_len--) {
801  hashval = (hashval<<4)^(hashval>>28)^(*s++);
802  }
803  }
804  hashval = (hashval ^ (hashval>>10) ^ (hashval>>20)) & hashmask;
805  return hashval;
806 }
807 
808 /*--- callpath_hashfunc ---*/
809 
810 unsigned int
811 callpath_hashfunc(unsigned int inithash, /* from hashfunc() */
812  const equivalence_t *callpath, int callpath_len,
813  unsigned int *fullhash)
814 {
815  unsigned int hashval;
816  for (hashval = inithash; callpath_len>0 ; callpath++, callpath_len--) {
817  hashval = (hashval<<4)^(hashval>>28)^(callpath->ull);
818  }
819  if (fullhash) *fullhash = hashval;
820  hashval = (hashval ^ (hashval>>10) ^ (hashval>>20)) & hashmask;
821  return hashval;
822 }
823 
824 /*--- insert_calltree ---*/
825 
826 static void
827 insert_calltree(int tid, drhook_key_t *keyptr)
828 {
829  if (tid >= 1 && tid <= numthreads) {
830  drhook_calltree_t *treeptr = thiscall[tid-1];
831  while (treeptr->active) {
832  if (!treeptr->next) {
833  treeptr->next = calloc_drhook(1,sizeof(drhook_calltree_t));
834  treeptr->next->prev = treeptr;
835  }
836  treeptr = treeptr->next;
837  }
838  treeptr->keyptr = keyptr;
839  treeptr->active = 1;
840  thiscall[tid-1] = treeptr;
841 #ifdef HPM
842  if (opt_hpmprof) {
843  drhook_key_t *kptr = treeptr->keyptr;
844  if (!kptr->hpm_stopped) {
845  stopstart_hpm(tid,
846  treeptr->prev ? treeptr->prev->keyptr : NULL, /* stop current (i.e. my parent) */
847  kptr); /* start to gather for me */
848  kptr->this_delta_wall_child = 0;
849  kptr->mip_count_in = mip_count(kptr);
850  kptr->mflop_count_in = mflop_count(kptr);
851 #ifdef DEBUG
852  fprintf(stderr,"insert[%.*s@%d]: this_delta_wall_child=%.15g, mip#%.15g, mflop#%.15g\n",
853  kptr->name_len,kptr->name,
854  tid,kptr->this_delta_wall_child,
855  kptr->mip_count_in,kptr->mflop_count_in);
856 #endif
857  }
858  else {
859  stop_only_hpm(tid,
860  treeptr->prev ? treeptr->prev->keyptr : NULL /* stop current (i.e. my parent) */);
861  } /* if (!kptr->hpm_stopped) else */
862  } /* if (opt_hpmprof) */
863 #endif
864  }
865 }
866 
867 /*--- remove_calltree ---*/
868 
869 static void
870 remove_calltree(int tid, drhook_key_t *keyptr,
871  const double *delta_wall, const double *delta_cpu)
872 {
873  if (tid >= 1 && tid <= numthreads) {
874  drhook_calltree_t *treeptr = thiscall[tid-1];
875  if (treeptr->active && treeptr->keyptr == keyptr) {
876  treeptr->active = 0;
877  if (treeptr->prev) {
878  drhook_key_t *parent_keyptr = treeptr->prev->keyptr;
879  if (parent_keyptr) { /* extra security */
880  if (opt_walltime) {
881  parent_keyptr->delta_wall_child += (*delta_wall);
882 #ifdef HPM
883  if (opt_hpmprof) parent_keyptr->this_delta_wall_child += (*delta_wall);
884 #endif
885  }
886  if (opt_cputime) {
887  parent_keyptr->delta_cpu_child += (*delta_cpu);
888  }
889  if (opt_memprof) {
890  /*
891  const long long int size = 0;
892  c_drhook_memcounter_(&tid, &size, NULL);
893  fprintf(stderr,
894  ">parent(%.*s)->mem_child = %lld ; this(%.*s)->alldelta = %lld, mem_child = %lld\n",
895  parent_keyptr->name_len, parent_keyptr->name, parent_keyptr->mem_child,
896  keyptr->name_len, keyptr->name, keyptr->maxmem_alldelta, keyptr->mem_child);
897  */
898  parent_keyptr->mem_child = MAX(parent_keyptr->mem_child, keyptr->maxmem_alldelta);
899  /*
900  fprintf(stderr,
901  "<parent(%.*s)->mem_child = %lld ; this(%.*s)->alldelta = %lld, mem_child = %lld\n",
902  parent_keyptr->name_len, parent_keyptr->name, parent_keyptr->mem_child,
903  keyptr->name_len, keyptr->name, keyptr->maxmem_alldelta, keyptr->mem_child);
904  */
905  }
906  } /* if (parent_keyptr) */
907  thiscall[tid-1] = treeptr->prev;
908  }
909  else {
910  thiscall[tid-1] = calltree[tid-1];
911  }
912 #ifdef HPM
913  if (opt_hpmprof) {
914  drhook_key_t *kptr = treeptr->keyptr;
915  if (!kptr->hpm_stopped) {
916  double this_delta_wall_self = *delta_wall - kptr->this_delta_wall_child;
917  stopstart_hpm(tid,
918  kptr,
919  thiscall[tid-1]->keyptr); /* stop current, (re-)start previous */
920  /* Calculate moving average of mipsrate & mflops ; divpc we don't bother */
921 #ifdef DEBUG
922  fprintf(stderr,"remove[%.*s@%d]: this_delta_wall_self=%.15g i.e. %.15g - %.15g",
923  kptr->name_len,kptr->name,
924  tid,this_delta_wall_self,
925  *delta_wall,kptr->this_delta_wall_child);
926 #endif
927  if (this_delta_wall_self > 0) {
928  long long int hpm_calls = ++kptr->hpm_calls;
929  double mipsrate, mflops;
930  kptr->mip_count_in = mip_count(kptr) - kptr->mip_count_in;
931  kptr->mflop_count_in = mflop_count(kptr) - kptr->mflop_count_in;
932  mipsrate = kptr->mip_count_in/this_delta_wall_self;
933  kptr->avg_mipsrate = ((hpm_calls-1)*kptr->avg_mipsrate + mipsrate)/hpm_calls;
934  mflops = kptr->mflop_count_in/this_delta_wall_self;
935  kptr->avg_mflops = ((hpm_calls-1)*kptr->avg_mflops + mflops)/hpm_calls;
936 #ifdef DEBUG
937  fprintf(stderr,
938  ", mip#%.15g, mflop#%.15g : mipsrate=%.15g, avg=%.15g; mflops=%.15g, avg=%.15g",
939  kptr->mip_count_in,kptr->mflop_count_in,
940  mipsrate, kptr->avg_mipsrate,
941  mflops, kptr->avg_mflops);
942 #endif
943  }
944 #ifdef DEBUG
945  fprintf(stderr,"\n");
946 #endif
947  if (opt_hpmstop_threshold > 0 && kptr->calls == opt_hpmstop_threshold) {
948  /* check whether hpm should anymore be called for this routine */
949  if (kptr->avg_mflops < opt_hpmstop_mflops) kptr->hpm_stopped = 1;
950  }
951  }
952  else {
953  stop_only_hpm(tid,kptr);
954  } /* if (!kptr->hpm_stopped) else ... */
955  } /* if (opt_hpmprof) */
956 #endif
957  curkeyptr[tid-1] = thiscall[tid-1]->keyptr;
958  }
959  else {
960  curkeyptr[tid-1] = NULL;
961  } /* if (treeptr->active && treeptr->keyptr == keyptr) else ... */
962  }
963 }
964 
965 /*--- memstat ---*/
966 
967 static void
968 memstat(drhook_key_t *keyptr, const int *thread_id, int in_getkey)
969 {
970  if (any_memstat && keyptr) {
971  if (opt_gethwm) keyptr->hwm = gethwm_();
972  if (opt_getrss) {
973  keyptr->maxrss = getrss_();
974  keyptr->rssnow = getcurheap_thread_(thread_id);
975  }
976  if (opt_getstk) {
977  long long int stk = getstk_();
978  keyptr->stack = stk;
979  keyptr->maxstack = MAX(keyptr->maxstack,stk);
980  }
981  if (opt_getpag) keyptr->paging = getpag_();
982  if (opt_memprof) {
983  keyptr->mem_seenmax = getmaxcurheap_thread_(thread_id);
984  if (in_getkey) { /* Upon enter of a Dr.Hook'ed routine */
985  /* A note for "keyptr->mem_curdelta":
986  1) do not reset to 0
987  2) initially calloc'ed to 0 while initializing the keydata[] ~ alias keyptr
988  3) remember the previous value --> catches memory leaks, too !! */
989  /* keyptr->mem_curdelta = 0; */
990  /* Nearly the same holds for "keyptr->mem_child";
991  we need to capture the maximum/hwm for child */
992  /* keyptr->mem_child = 0; */
993  keyptr->paging_in = keyptr->paging;
994  }
995  else { /* Upon exit of a Dr.Hook'ed routine */
996  long long int alldelta = keyptr->mem_curdelta + keyptr->mem_child;
997  if (alldelta > keyptr->maxmem_alldelta) keyptr->maxmem_alldelta = alldelta;
998  if (keyptr->paging - keyptr->paging_in > keyptr->mem_maxpagdelta)
999  keyptr->mem_maxpagdelta = keyptr->paging - keyptr->paging_in;
1000  }
1001  if (keyptr->hwm > keyptr->mem_maxhwm) keyptr->mem_maxhwm = keyptr->hwm;
1002  if (keyptr->maxrss > keyptr->mem_maxrss) keyptr->mem_maxrss = keyptr->maxrss;
1003  if (keyptr->maxstack > keyptr->mem_maxstk) keyptr->mem_maxstk = keyptr->maxstack;
1004  }
1005  }
1006 }
1007 
1008 /*--- flptrap ---*/
1009 
1010 /*
1011  -----------------------------------------------------------------------
1012  If we are trapping Floating-Point Error, then set the processor in SYNC
1013  modes and enable TRP_INVALID, TRP_DIV_BY_ZERO and TRP_OVERFLOW.
1014  -----------------------------------------------------------------------
1015 */
1016 
1017 #ifdef RS6K
1018 static void
1019 flptrap(int sig)
1020 {
1021  if (sig == SIGFPE) {
1022  /* From John Hague, IBM, UK (--> thanks a lot, John !!)*/
1023  int ret = fp_trap(FP_TRAP_FASTMODE);
1024  if ((ret == FP_TRAP_UNIMPL) || (ret == FP_TRAP_ERROR)) {
1025  char errmsg[4096];
1026  sprintf(errmsg,
1027  "flptrap(): Call to 'fp_trap' in signal_trap failed (return code = %d)\n (line %d in file %s)\n",
1028  ret, __LINE__, __FILE__);
1029  perror(errmsg);
1030  RAISE(SIGABRT);
1031  }
1032  fp_enable(TRP_INVALID | TRP_DIV_BY_ZERO | TRP_OVERFLOW);
1033  }
1034 }
1035 #elif defined(__GNUC__) && !defined(NO_TRAPFPE)
1036 static void
1037 flptrap(int sig)
1038 {
1039  if (sig == SIGFPE) {
1040  /* Adapted from www.twinkle.ws/arnaud/CompilerTricks.html#Glibc_FP */
1041  trapfpe(); /* No need for pgf90's -Ktrap=fp now ? */
1042  }
1043 }
1044 #else
1045 static void
1046 flptrap(int sig)
1047 {
1048  return; /* A dummy */
1049 }
1050 #endif
1051 
1052 static void signal_gencore(int sig SIG_EXTRA_ARGS);
1053 static void signal_harakiri(int sig SIG_EXTRA_ARGS);
1054 static void signal_drhook(int sig SIG_EXTRA_ARGS);
1055 static void trapfpe_treatment(int sig, int silent);
1056 
1057 /*--- catch_signals ---*/
1058 
1059 #define CATCHSIG(x) {\
1060  drhook_sig_t *sl = &siglist[x];\
1061  if (sl->active == 0) {\
1062  drhook_sigfunc_t u;\
1063  u.func3args = signal_drhook;\
1064  sl->active = 1;\
1065  sigemptyset(&sl->new.sa_mask);\
1066  sl->new.sa_handler = u.func1args;\
1067  sl->new.sa_flags = SA_SIGINFO;\
1068  sigaction(x,&sl->new,&sl->old);\
1069  trapfpe_treatment(x,silent); \
1070  if (!silent && myproc == 1) {\
1071  int tid = get_thread_id_(); \
1072  char *pfx = PREFIX(tid); \
1073  fprintf(stderr,\
1074  "%s %s [%s@%s:%d] DR_HOOK also catches signal#%d : New handler '%s' installed at %p (old at %p)\n", \
1075  pfx,TIMESTR(tid),FFL, \
1076  x, "signal_drhook", sl->new.sa_handler, sl->old.sa_handler); \
1077  }\
1078  }\
1079 }
1080 
1081 static void
1082 catch_signals(int silent)
1083 {
1084  char *env = getenv("DR_HOOK_CATCH_SIGNALS");
1085  if (!silent && myproc == 1) {
1086  int tid = get_thread_id_();
1087  char *pfx = PREFIX(tid);
1088  fprintf(stderr,
1089  "%s %s [%s@%s:%d] DR_HOOK_CATCH_SIGNALS=%s\n",
1090  pfx,TIMESTR(tid),FFL,
1091  env ? env : "<undef>");
1092  }
1093  if (env) {
1094  const char delim[] = ", \t/";
1095  char *p, *s = strdup_drhook(env);
1096  p = strtok(s,delim);
1097  while (p) {
1098  int sig = atoi(p);
1099  if (sig >= 1 && sig <= NSIG) {
1100  CATCHSIG(sig);
1101  }
1102  else if (sig == -1) { /* Makes ALL (catchable) signals available to DR_HOOK */
1103  int j;
1104  for (j=1; j<=NSIG; j++) {
1105  CATCHSIG(j);
1106  } /* for (j=1; j<=NSIG; j++) */
1107  break;
1108  }
1109  p = strtok(NULL,delim);
1110  }
1111  free_drhook(s);
1112  }
1113 }
1114 
1115 /*--- trapfpe_treatment ---*/
1116 
1117 static void
1118 trapfpe_treatment(int sig, int silent)
1119 {
1120  if (sig == SIGFPE) {
1121 #if defined(__GNUC__) && !defined(NO_TRAPFPE)
1122  int tid = get_thread_id_();
1123  char *pfx = PREFIX(tid);
1124  if (drhook_trapfpe) {
1125  if (!silent && myproc == 1) {
1126  fprintf(stderr,
1127  "%s %s [%s@%s:%d] DR_HOOK enables SIGFPE-related floating point trapping since DRHOOK_TRAPFPE=%d\n",
1128  pfx,TIMESTR(tid),FFL,
1129  drhook_trapfpe);
1130  }
1131  flptrap(sig); /* Has FLP-trapping on, regardless */
1132  }
1133  else {
1134  if (!silent && myproc == 1) {
1135  fprintf(stderr,
1136  "%s %s [%s@%s:%d] DR_HOOK turns SIGFPE-related floating point trapping off since DRHOOK_TRAPFPE=%d\n",
1137  pfx,TIMESTR(tid),FFL,
1138  drhook_trapfpe);
1139  }
1140  untrapfpe(); /* Turns off a possible -Ktrap=fp from pgf90 */
1141  }
1142 #endif
1143  }
1144 }
1145 
1146 /*--- restore_default_signals ---*/
1147 
1148 static void
1150 {
1151  char *env = getenv("DR_HOOK_RESTORE_DEFAULT_SIGNALS");
1152  if (!silent && myproc == 1) {
1153  int tid = get_thread_id_();
1154  char *pfx = PREFIX(tid);
1155  fprintf(stderr,
1156  "%s %s [%s@%s:%d] DR_HOOK_RESTORE_DEFAULT_SIGNALS=%s\n",
1157  pfx,TIMESTR(tid),FFL,
1158  env ? env : "<undef>");
1159  }
1160  if (env) {
1161  int unlim_core = 1;
1162  const char delim[] = ", \t/";
1163  char *p, *s = strdup_drhook(env);
1164  p = strtok(s,delim);
1165  while (p) {
1166  int sig = atoi(p);
1167  if (sig >= 1 && sig <= NSIG) {
1168  drhook_sig_t *sl = &siglist[sig];
1169  if (sl->active == 0) { /* Not touched yet by ignore_signals() */
1170  set_default_handler(sig,unlim_core,(!silent && myproc == 1));
1171  unlim_core = 0;
1172  if (sig == SIGFPE) trapfpe_treatment(sig, (!silent && myproc == 1));
1173  sl->active = -2;
1174  }
1175  }
1176  else if (sig == -1) { /* Restore default signals for all available/catchable to DR_HOOK */
1177  int j;
1178  for (j=1; j<=NSIG; j++) {
1179  drhook_sig_t *sl = &siglist[j];
1180  if (sl->active == 0) { /* Not touched yet by ignore_signals() */
1181  set_default_handler(j,unlim_core,(!silent && myproc == 1));
1182  unlim_core = 0;
1183  if (j == SIGFPE) trapfpe_treatment(j, (!silent && myproc == 1));
1184  sl->active = -2;
1185  }
1186  } /* for (j=1; j<=NSIG; j++) */
1187  break;
1188  }
1189  p = strtok(NULL,delim);
1190  }
1191  free_drhook(s);
1192  }
1193 }
1194 
1195 /*--- ignore_signals ---*/
1196 
1197 static void
1198 ignore_signals(int silent)
1199 {
1200  char *env = getenv("DR_HOOK_IGNORE_SIGNALS");
1201  if (!silent && myproc == 1) {
1202  int tid = get_thread_id_();
1203  char *pfx = PREFIX(tid);
1204  fprintf(stderr,
1205  "%s %s [%s@%s:%d] DR_HOOK_IGNORE_SIGNALS=%s\n",
1206  pfx,TIMESTR(tid),FFL,
1207  env ? env : "<undef>");
1208  }
1209  if (env) {
1210  int tid = get_thread_id_();
1211  char *pfx = PREFIX(tid);
1212  const char delim[] = ", \t/";
1213  char *p, *s = strdup_drhook(env);
1214  p = strtok(s,delim);
1215  while (p) {
1216  int sig = atoi(p);
1217  if (sig >= 1 && sig <= NSIG) {
1218  drhook_sig_t *sl = &siglist[sig];
1219  if (!silent && myproc == 1) {
1220  fprintf(stderr,
1221  "%s %s [%s@%s:%d] DR_HOOK ignores signal#%d altogether\n",
1222  pfx,TIMESTR(tid),FFL,
1223  sig);
1224  }
1225  sl->active = -1;
1226  }
1227  else if (sig == -1) { /* Switches off ALL signals from DR_HOOK */
1228  int j;
1229  for (j=1; j<=NSIG; j++) {
1230  drhook_sig_t *sl = &siglist[j];
1231  if (!silent && myproc == 1) {
1232  fprintf(stderr,
1233  "%s %s [%s@%s:%d] DR_HOOK ignores signal#%d altogether\n",
1234  pfx,TIMESTR(tid),FFL,
1235  j);
1236  }
1237  sl->active = -1;
1238  } /* for (j=1; j<=NSIG; j++) */
1239  break;
1240  }
1241  p = strtok(NULL,delim);
1242  }
1243  free_drhook(s);
1244  }
1245 }
1246 
1247 /*--- gdb__sigdump ---*/
1248 
1249 #if (defined(LINUX) || defined(SUN4)) && !defined(XT3) && !defined(XD1) && !defined(_CRAYC)
1250 static void gdb__sigdump(int sig SIG_EXTRA_ARGS)
1251 {
1252  static int who = 0; /* Current owner of the lock, if > 0 */
1253  int is_set = 0;
1254  int it = get_thread_id_();
1255  drhook_sig_t *sl = &siglist[sig];
1256  char *pfx = PREFIX(it);
1257 
1258  coml_test_lockid_(&is_set, &DRHOOK_lock);
1259  if (is_set && who == it) {
1260  fprintf(stderr,"%s %s [%s@%s:%d] Received (another) signal#%d (%s)\n",
1261  pfx,TIMESTR(it),FFL,
1262  sig,sl->name);
1263  fprintf(stderr,"%s %s [%s@%s:%d] Recursive calls by the same thread#%d not allowed. Bailing out\n",
1264  pfx,TIMESTR(it),FFL,
1265  it);
1266  return;
1267  }
1268  if (!is_set) coml_set_lockid_(&DRHOOK_lock);
1269  who = it;
1270  fprintf(stderr,"%s %s [%s@%s:%d] Received signal#%d(%s) : sigcontextptr=%p\n",
1271  pfx,TIMESTR(it),FFL,
1272  sig,sl->name,sigcontextptr);
1273  LinuxTraceBack(pfx,TIMESTR(it),sigcontextptr);
1274  /* LinuxTraceBack(pfx,TIMESTR(tid),NULL); */
1275  who = 0;
1277 }
1278 #endif
1279 
1280 /*--- signal_drhook ---*/
1281 
1282 #define SETSIG5(x,ignore_flag,handler_name,preserve_old,xstr) { \
1283  drhook_sig_t *sl = &siglist[x]; \
1284  if (sl->active == 0) { \
1285  drhook_sigfunc_t u; \
1286  u.func3args = handler_name; \
1287  sl->active = 1; \
1288  strcpy(sl->name,xstr); \
1289  sigemptyset(&sl->new.sa_mask); \
1290  sl->new.sa_handler = u.func1args; \
1291  sl->new.sa_flags = SA_SIGINFO; \
1292  sigaction(x,&sl->new,preserve_old ? &sl->old : NULL); \
1293  sl->ignore_atexit = ignore_flag; \
1294  trapfpe_treatment(x,silent); \
1295  if (!silent && myproc == 1) { \
1296  int tid = get_thread_id_(); \
1297  char *pfx = PREFIX(tid); \
1298  const char fmt[] = "%s %s [%s@%s:%d] New signal handler '%s' for signal#%d (%s) at %p (old at %p)\n"; \
1299  fprintf(stderr,fmt, \
1300  pfx,TIMESTR(tid),FFL, \
1301  #handler_name, \
1302  x, sl->name, \
1303  sl->new.sa_handler, \
1304  preserve_old ? sl->old.sa_handler : NULL); \
1305  } \
1306  } \
1307 }
1308 
1309 #define SETSIG(x,ignore_flag) SETSIG5(x,ignore_flag,signal_drhook,1,#x)
1310 
1311 #define JSETSIG(x,ignore_flag) { \
1312  drhook_sig_t *sl = &siglist[x]; \
1313  drhook_sigfunc_t u; \
1314  /* fprintf(stderr,"JSETSIG: sl->active = %d\n",sl->active); */ \
1315  u.func3args = signal_harakiri; \
1316  sl->active = 1; \
1317  strcpy(sl->name,#x); \
1318  sigemptyset(&sl->new.sa_mask); \
1319  sl->new.sa_handler = u.func1args; \
1320  sl->new.sa_flags = SA_SIGINFO; \
1321  sigaction(x,&sl->new,&sl->old); \
1322  sl->ignore_atexit = ignore_flag; \
1323  trapfpe_treatment(x,0); \
1324  { \
1325  int tid = get_thread_id_(); \
1326  char *pfx = PREFIX(tid); \
1327  const char fmt[] = "%s %s [%s@%s:%d] Harakiri signal handler '%s' for signal#%d (%s) installed at %p (old at %p)\n"; \
1328  fprintf(stderr,fmt, \
1329  pfx,TIMESTR(tid),FFL, \
1330  "signal_harakiri", \
1331  x, sl->name, \
1332  sl->new.sa_handler, \
1333  sl->old.sa_handler); \
1334  } \
1335  }
1336 
1337 #if defined(RS6K) && defined(__64BIT__)
1338 #define DRH_STRUCT_RLIMIT struct rlimit64
1339 #define DRH_GETRLIMIT getrlimit64
1340 #define DRH_SETRLIMIT setrlimit64
1341 #else
1342 #define DRH_STRUCT_RLIMIT struct rlimit
1343 #define DRH_GETRLIMIT getrlimit
1344 #define DRH_SETRLIMIT setrlimit
1345 #endif
1346 
1347 static int set_unlimited_corefile(unsigned long long int *hardlimit)
1348 {
1349  /*
1350  Make sure we *only* set soft-limit (not hard-limit) to 0 in our scripts i.e. :
1351  $ ulimit -S -c 0
1352  but *not*
1353  $ ulimit -c 0
1354  See man ksh or man bash for more
1355  */
1356  int rc = -1;
1357  if (unlimited_corefile_retcode == 9999) { /* Done only once */
1358  DRH_STRUCT_RLIMIT r;
1359  if (DRH_GETRLIMIT(RLIMIT_CORE, &r) == 0) {
1360  r.rlim_cur = r.rlim_max;
1361  if (DRH_SETRLIMIT(RLIMIT_CORE, &r) == 0) {
1362  saved_corefile_hardlimit = r.rlim_cur;
1363  rc = 0;
1364  }
1365  }
1367  }
1368  if (hardlimit) *hardlimit = saved_corefile_hardlimit;
1370  return rc;
1371 }
1372 
1373 static void
1374 signal_gencore(int sig SIG_EXTRA_ARGS)
1375 {
1376  if (opt_gencore > 0) {
1377  opt_gencore = 0; /* A tiny chance for a race condition between threads */
1378  if (sig == opt_gencore_signal && sig >= 1 && sig <= NSIG) {
1379  signal(sig, SIG_IGN);
1380  signal(SIGABRT, SIG_DFL);
1381  { /* Enable unlimited cores (up to hard-limit) and call abort() --> generates core dump */
1382  if (set_unlimited_corefile(NULL) == 0) {
1383  int tid = get_thread_id_();
1384  char *pfx = PREFIX(tid);
1385  fprintf(stderr,
1386  "%s %s [%s@%s:%d] Received signal#%d and now calling abort() ...\n",
1387  pfx,TIMESTR(tid),FFL,
1388  sig);
1389  LinuxTraceBack(pfx,TIMESTR(tid),NULL);
1390  abort(); /* Dump core, too */
1391  }
1392  }
1393  /* Should never end up here */
1394  fflush(NULL);
1395  _exit(128+ABS(sig));
1396  } /* if (sig >= 1 && sig <= NSIG && sig == opt_gencore_signal) */
1397  }
1398 }
1399 
1400 static char *safe_llitoa(long long int i, char b[], int blen)
1401 {
1402  char const digit[] = "0123456789";
1403  char *p = b;
1404  long long int shifter;
1405  if (i < 0) {
1406  *p++ = '-';
1407  i *= -1;
1408  }
1409  shifter = i;
1410  do { /* Move to where representation ends */
1411  ++p;
1412  shifter = shifter/10;
1413  } while (shifter);
1414  *p = '\0';
1415  do{ /* Move back, inserting digits as u go */
1416  *--p = digit[i%10];
1417  i = i/10;
1418  } while (i);
1419  return b;
1420 }
1421 
1422 
1423 static void
1424 signal_harakiri(int sig SIG_EXTRA_ARGS)
1425 {
1426  /* A signal handler that will force to exit the current thread immediately for sure */
1427 
1428  /* The following output should be malloc-free */
1429 
1430  time_t tp;
1431  int fd = fileno(stderr);
1432  int tid = get_thread_id_();
1433  int nsigs = TIDNSIGS(tid);
1434  char *pfx = PREFIX(tid);
1435  char buf[128];
1436  char s[1024];
1437  strcpy(s,pfx);
1438  /* [%s@%s:%d] for FFL below */
1439  strcat(s," [");
1440  strcat(s,__FUNCTION__);
1441  strcat(s,"@");
1442  strcat(s,__FILE__);
1443  strcat(s,":");
1444  strcat(s,safe_llitoa(__LINE__,buf,sizeof(buf)));
1445  strcat(s,"] [epoch=");
1446  time(&tp);
1447  strcat(s,safe_llitoa(tp,buf,sizeof(buf)));
1448  strcat(s,"] Terminating process to avoid hangs due to signal#");
1449  strcat(s,safe_llitoa(sig,buf,sizeof(buf)));
1450  strcat(s," by raising signal SIGKILL = ");
1451  strcat(s,safe_llitoa(SIGKILL,buf,sizeof(buf)));
1452  strcat(s,", nsigs = ");
1453  strcat(s,safe_llitoa(nsigs,buf,sizeof(buf)));
1454 
1455  write(fd,s,strlen(s));
1456 
1457  raise(SIGKILL); /* Use raise, not RAISE here */
1458  _exit(128+ABS(sig)); /* Should never reach here, bu' in case it does, then ... */
1459 }
1460 
1461 static void
1462 signal_drhook(int sig SIG_EXTRA_ARGS)
1463 {
1464  int nsigs;
1465  int tid = get_thread_id_();
1466  char *pfx = PREFIX(tid);
1467  if (signals_set && sig >= 1 && sig <= NSIG) {
1468  drhook_sig_t *sl = &siglist[sig];
1469  sigset_t newmask, oldmask;
1470 
1471 #if 0
1472  signal(sig, SIG_IGN); /* We may not need this ... */
1473 #endif
1474 
1475  /* Signal catching */
1476 #ifdef _OPENMP
1477 #pragma omp critical
1478  nsigs = (++signal_handler_called);
1479  if (sl->ignore_atexit) signal_handler_ignore_atexit++;
1480 #else
1481  nsigs = (++signal_handler_called); /* A tiny chance for a race condition between threads */
1482  if (sl->ignore_atexit) signal_handler_ignore_atexit++;
1483 #endif
1484 
1485  if (ec_drhook && tid >= 1 && tid <= numthreads) ec_drhook[tid-1].nsigs = nsigs; /* Store for possible signal_harakiri() */
1486 
1487  /*------------------------------------------------------------
1488  Strategy:
1489  - drhook intercepts most interrupts.
1490  - 1st interupt will
1491  - call alarm(10) to try to make sure 2nd interrupt received
1492  - try to call tracebacks and exit (which includes atexits)
1493  - 2nd (and subsequent) interupts will
1494  - spin for 20 sec (to give 1st interrupt time to complete tracebacks)
1495  - and then call _exit (bypassing atexit)
1496  ------------------------------------------------------------*/
1497 
1498  /* if (sig != SIGTERM) signal(SIGTERM, SIG_DFL); */ /* Let the default SIGTERM to occur */
1499 
1500 #ifdef _OPENMP
1501  max_threads = omp_get_max_threads();
1502 #endif
1503  if (nsigs == 1) {
1504  /*---- First call to signal handler: call alarm(drhook_harakiri_timeout), tracebacks, exit ------*/
1505 
1506  /* Enjoy some output (only from the first guy that came in) */
1507  long long int hwm = gethwm_();
1508  long long int rss = getmaxrss_();
1509  long long int maxstack = getmaxstk_();
1510  long long int pag = getpag_();
1511  rss /= 1048576;
1512  hwm /= 1048576;
1513  maxstack /= 1048576;
1514  fprintf(stderr,
1515  "%s %s [%s@%s:%d] Received signal#%d (%s) :: %lldMB (heap),"
1516  " %lldMB (maxrss), %lldMB (maxstack), %lld (paging), nsigs = %d\n",
1517  pfx,TIMESTR(tid),FFL,
1518  sig, sl->name, hwm, rss, maxstack, pag, nsigs);
1519  fprintf(stderr,
1520  "%s %s [%s@%s:%d] Also activating Harakiri-alarm (SIGALRM=%d) to expire after %ds elapsed to prevent hangs, nsigs = %d\n",
1521  pfx,TIMESTR(tid),FFL,
1522  SIGALRM,drhook_harakiri_timeout,nsigs);
1523  JSETSIG(SIGALRM,1); /* This will now set another signal handler than signal_drhook */
1524  fflush(NULL);
1525  alarm(drhook_harakiri_timeout);
1526  }
1527  else if (nsigs > 1) {
1528  /*----- 2nd (and subsequent) calls to signal handler: spin harakiri-timeout + 60 sec, _exit ---------*/
1529  int offset = 60;
1530  int secs = drhook_harakiri_timeout+offset;
1531  fprintf(stderr,
1532  "%s %s [%s@%s:%d] Calling signal_harakiri upon receipt of signal#%d"
1533  " after %ds spin, nsigs = %d\n",
1534  pfx,TIMESTR(tid),FFL,
1535  sig,secs,nsigs);
1536  fflush(NULL);
1537  spin(secs);
1538  signal_harakiri(sig SIG_PASS_EXTRA_ARGS);
1539  }
1540 
1541  /* All below this point should be nsigs == 1 i.e. the first threat arriving signal_drhook() */
1542 
1543 #ifdef RS6K
1544  /*-- llcancel attempted but sometimes hangs ---
1545  {
1546  char *env = getenv("LOADL_STEP_ID");
1547  if (env) {
1548  char *cancel = "delayed_llcancel ";
1549  char cmd[80];
1550  sprintf(cmd,"%s %s &",cancel,env);
1551  fprintf(stderr,"tid#%d issuing command: %s\n",tid,cmd;
1552  fflush(NULL);
1553  system(cmd);
1554  }
1555  }
1556  ------------------------------------*/
1557 #endif
1558 
1559  /* sigfillset(&newmask); -- dead code since sigprocmask() was not called */
1560  /*
1561  sigemptyset(&newmask);
1562  sigaddset(&newmask, sig);
1563  */
1564 
1565  /* Start critical region (we don't want any signals to interfere while doing this) */
1566  /* sigprocmask(SIG_BLOCK, &newmask, &oldmask); */
1567 
1568  if (nsigs == 1) {
1569  /* Print Dr.Hook traceback */
1570  const int ftnunitno = 0; /* stderr */
1571  const int print_option = 2; /* calling tree */
1572  int level = 0;
1573 
1574  dump_hugepages(1,pfx,tid,sig,nsigs);
1575 
1576  if (drhook_dump_smaps) {
1577  pid_t unixtid = gettid();
1578  char filename[256];
1579  snprintf(filename,sizeof(filename),"/proc/%ld/smaps",(long)unixtid);
1580  dump_file(pfx,tid,sig,nsigs,filename);
1581  }
1582 
1583  fprintf(stderr,
1584  "%s %s [%s@%s:%d] Starting DrHook backtrace for signal#%d, nsigs = %d\n",
1585  pfx,TIMESTR(tid),FFL,
1586  sig,nsigs);
1587  fflush(NULL);
1588  c_drhook_print_(&ftnunitno, &tid, &print_option, &level);
1589  fflush(NULL);
1590 
1591  /* To make it less likely that another thread generates a signal while we are
1592  doing a traceback lets wait a while (seems to fix problems of the traceback
1593  terminating abnormally. Probably a better way of doing this involving holding
1594  off signals but sigprocmask is not safe in multithreaded code - P Towers Dec 10 2012
1595  This was originally an issue with the Intel compiler but may be of benefit for other
1596  compilers. Cannot see it doing harm - P Towers Aug 29 2013 */
1597  spin(MIN(5,tid));
1598 
1599  if (sig != SIGABRT && sig != SIGTERM) {
1600 #ifdef RS6K
1601  xl__sigdump(sig SIG_PASS_EXTRA_ARGS); /* Can't use xl__trce(...), since it also stops */
1602 #endif
1603 
1604 #if 1
1605  /* Active code ? */
1606 #if (defined(LINUX) || defined(SUN4)) && !defined(XT3) && !defined(XD1)
1607  LinuxTraceBack(pfx,TIMESTR(tid),NULL);
1608 #endif
1609 #else
1610  /* Dead code ? */
1611 #if (defined(LINUX) || defined(SUN4)) && !defined(XT3) && !defined(XD1) && !defined(_CRAYC)
1612  gdb__sigdump(sig SIG_PASS_EXTRA_ARGS);
1613 #endif
1614 #endif
1615 
1616 #ifdef __INTEL_COMPILER
1617  intel_trbk_(); /* from ../utilities/gentrbk.F90 */
1618 #endif
1619 
1620 #if defined(NECSX)
1621  necsx_trbk_("signal_drhook",13); /* from ../utilities/gentrbk.F90 */
1622 #endif
1623  }
1624 
1625 #ifdef VPP
1626 #if defined(SA_SIGINFO) && SA_SIGINFO > 0
1627  _TraceCalls(sigcontextptr); /* Need VPP's libmp.a by Pierre Lagier */
1628 #endif
1629 #endif
1630 
1631  fprintf(stderr,
1632  "%s %s [%s@%s:%d] DrHook backtrace done for signal#%d, nsigs = %d\n",
1633  pfx,TIMESTR(tid),FFL,
1634  sig,nsigs);
1635  fflush(NULL);
1636  }
1637 
1638  /* sigprocmask(SIG_SETMASK, &oldmask, 0); */
1639  /* End critical region : the original signal state restored */
1640 
1641  {
1642  int restored = 0, tdiff;
1643  time_t t1, t2;
1644  drhook_sigfunc_t u;
1645  u.func3args = signal_drhook;
1646  if (opt_propagate_signals &&
1647  sl->old.sa_handler != SIG_DFL &&
1648  sl->old.sa_handler != SIG_IGN &&
1649  sl->old.sa_handler != u.func1args) {
1650  u.func1args = sl->old.sa_handler;
1651 
1652  if (atp_enabled) {
1653  /* Restore the default, core-file creating action to these "ATP" recognized signals */
1654  switch (sig) {
1655  case SIGTERM:
1656  if (atp_ignore_sigterm) break; /* SIGSEGV not reset to SIG_DFL as ATP now ignores SIGTERM */
1657  /* Fall thru (see man atp on Cray) */
1658  case SIGFPE:
1659  case SIGILL:
1660  case SIGTRAP:
1661  case SIGABRT:
1662  case SIGBUS:
1663  case SIGSEGV:
1664  case SIGSYS:
1665  case SIGXCPU:
1666 #if defined(SIGXFSZ)
1667  case SIGXFSZ:
1668 #endif
1669  fprintf(stderr,
1670  "%s %s [%s@%s:%d] Resetting SIGSEGV (%d) to "
1671  "default signal handler (SIG_DFL) before calling ATP for signal#%d, nsigs = %d\n",
1672  pfx,TIMESTR(tid),FFL,
1673  SIGSEGV,sig,nsigs);
1674  set_default_handler(SIGSEGV,1,1);
1675  restored = 1;
1676  break;
1677  default:
1678  break;
1679  }
1680  }
1681 
1682  fprintf(stderr,
1683  "%s %s [%s@%s:%d] Calling previous signal handler at %p for signal#%d, nsigs = %d\n",
1684  pfx,TIMESTR(tid),FFL,
1685  u.func1args,sig,nsigs);
1686 
1687  time(&t1);
1688  u.func3args(sig SIG_PASS_EXTRA_ARGS); /* This could now be the ATP */
1689  time(&t2);
1690  tdiff = (t2 - t1);
1691 
1692  fprintf(stderr,
1693  "%s %s [%s@%s:%d] Returned from previous signal handler"
1694  " (at %p, signal#%d, time taken = %ds), nsigs = %d\n",
1695  pfx,TIMESTR(tid),FFL,
1696  u.func1args,sig,tdiff,nsigs);
1697 
1698  if (atp_enabled && restored && atp_max_cores > 0) {
1699  /* Assuming it was indeed ATP, then lets spin a bit to allow other cores be dumped */
1701  int grace = 60;
1702  secs = 60 + MIN(tdiff * (atp_max_cores-1),secs);
1703  if (secs > 0) {
1704  fprintf(stderr,
1705  "%s %s [%s@%s:%d] Before aborting (signal#%d) spin %ds (incl. grace %ds)"
1706  " to give ATP time to write all #%d core file(s), nsigs = %d\n",
1707  pfx,TIMESTR(tid),FFL,
1708  sig,secs,grace,atp_max_cores,nsigs);
1709  spin(secs);
1710  }
1711  }
1712 
1713  if (sig != SIGABRT && sig != SIGTERM) {
1714  if (atp_enabled && atp_max_cores > 0) {
1715  fprintf(stderr,
1716  "%s %s [%s@%s:%d] DrHook calls abort() and attempts to dump core (signal#%d), nsigs = %d\n",
1717  pfx,TIMESTR(tid),FFL,
1718  sig,nsigs);
1719  set_default_handler(SIGABRT,1,1);
1720  abort();
1721  }
1722  }
1723  /* Now proceed to definitive _exit() */
1724  }
1725  else {
1726  fprintf(stderr,
1727  "%s %s [%s@%s:%d] Not configured (DR_HOOK_PROPAGATE_SIGNALS=%d) or "
1728  "can't call previous signal handler (for signal#%d) in the chain at %p, nsigs = %d\n",
1729  pfx,TIMESTR(tid),FFL,
1731  sl->old.sa_handler,nsigs);
1732  }
1733  }
1734  }
1735 
1736  {
1737  int errcode = 128 + ABS(sig);
1738  /* Make sure that the process/thread really exits now -- immediately !! */
1739  fprintf(stderr, "%s %s [%s@%s:%d] Error _exit(%d) upon receipt of signal#%d, nsigs = %d\n",
1740  pfx,TIMESTR(tid),FFL,
1741  errcode,sig,nsigs);
1742  fflush(NULL);
1743  _exit(errcode);
1744  }
1745 }
1746 
1747 void
1749 {
1751 }
1752 
1753 void
1755 {
1756  /* Emulates in a one call : export DR_HOOK_NOT_MPI=1" */
1757  /* To have a desired effect, call BEFORE the very first call to DR_HOOK */
1758  static char s[] = "DR_HOOK_NOT_MPI=1"; /* note: must be static */
1759  putenv(s);
1760 }
1761 
1762 
1763 /*--- signal_drhook_init ---*/
1764 
1765 static void
1767 {
1768  char *env = getenv("DR_HOOK_SILENT");
1769  int silent = env ? atoi(env) : 0;
1770  int j;
1772  if (myproc < 1) myproc = 1; /* Just to enable output as if myproc was == 1 */
1773  /* Signals may not yet been set, since MPI not initialized
1774  Only enforce-parameter can enforce to set these => no output on myproc=1 */
1775  if (!enforce && (myproc < 1 || nproc < 0)) return;
1776  if (signals_set) return; /* Extra safety */
1777  /* To present sumpini.F90 (f.ex.) initializing DrHook-signals in case of
1778  DR_HOOK was turned off (=0), then set also export DR_HOOK_INIT_SIGNALS=0 */
1779  env = getenv("DR_HOOK_INIT_SIGNALS");
1780  if (env && *env == '0') {
1781  signals_set = 2; /* Pretend they are set */
1782  return; /* Never initialize signals via DrHook (dangerous, but sometimes necessary) */
1783  }
1784  if (!ec_drhook) {
1785  int slen;
1786  char hostname[HOST_NAME_MAX];
1787  int ntids = 1;
1788 #ifdef _OPENMP
1789  ntids = omp_get_max_threads();
1790 #endif
1791  numthreads = ntids;
1792  ec_drhook = calloc_drhook(ntids, sizeof(*ec_drhook));
1793  slen = sizeof(ec_drhook[0].s);
1794  timestr_len = sizeof(ec_drhook[0].timestr);
1795  if (gethostname(hostname,sizeof(hostname)) != 0) strcpy(hostname,"unknown");
1796  if (myproc == 1) {
1797  fprintf(stderr,"[EC_DRHOOK:hostname:myproc:omptid:pid:unixtid] [YYYYMMDD:HHMMSS:epoch:walltime] [function@file:lineno] -- Max OpenMP threads = %d\n",ntids);
1798  }
1799 #pragma omp parallel num_threads(ntids)
1800  {
1801  int tid = get_thread_id_();
1802  int j = tid - 1;
1803  pid_t unixtid = gettid();
1804  snprintf(ec_drhook[j].s,slen,"[EC_DRHOOK:%s:%d:%d:%lld:%lld]",
1805  hostname,myproc,tid,
1806  (long long int)pid, (long long int)unixtid);
1807  }
1808  }
1809  env = getenv("ATP_ENABLED");
1810  atp_enabled = (env && *env == '1') ? 1 : 0;
1811  if (atp_enabled) {
1812  env = getenv("ATP_MAX_CORES");
1813  if (env) atp_max_cores = atoi(env);
1814  env = getenv("ATP_MAX_ANALYSIS_TIME");
1815  if (env) atp_max_analysis_time = atoi(env);
1816  env = getenv("ATP_IGNORE_SIGTERM");
1817  if (env) atp_ignore_sigterm = atoi(env);
1818  if (!silent && myproc == 1) {
1819  int tid = get_thread_id_();
1820  char *pfx = PREFIX(tid);
1821  fprintf(stderr,"%s %s [%s@%s:%d] ATP_ENABLED=%d\n",pfx,TIMESTR(tid),FFL,atp_enabled);
1822  fprintf(stderr,"%s %s [%s@%s:%d] ATP_MAX_CORES=%d\n",pfx,TIMESTR(tid),FFL,atp_max_cores);
1823  fprintf(stderr,"%s %s [%s@%s:%d] ATP_MAX_ANALYSIS_TIME=%d\n",pfx,TIMESTR(tid),FFL,atp_max_analysis_time);
1824  fprintf(stderr,"%s %s [%s@%s:%d] ATP_IGNORE_SIGTERM=%d\n",pfx,TIMESTR(tid),FFL,atp_ignore_sigterm);
1825  }
1826  }
1827  process_options();
1828  for (j=1; j<=NSIG; j++) { /* Initialize */
1829  drhook_sig_t *sl = &siglist[j];
1830  sprintf(sl->name, "DR_HOOK_SIG#%d", j);
1831  sl->active = 0;
1832  sl->ignore_atexit = 0;
1833  }
1834  ignore_signals(silent); /* These signals will not be handled by DR_HOOK */
1835  restore_default_signals(silent); /* These signals will be restored with SIG_DFL status (regardless if to-be-caught with DrHook or ATP or anyhing else) */
1836  SETSIG(SIGABRT,0); /* Good to be first */
1837  SETSIG(SIGBUS,0);
1838  SETSIG(SIGSEGV,0);
1839 #if defined(SIGEMT)
1840  SETSIG(SIGEMT,0);
1841 #endif
1842 #if defined(SIGSTKFLT)
1843  SETSIG(SIGSTKFLT,0); /* Stack fault */
1844 #endif
1845 #if !defined(NECSX)
1846  /* For the moment turn off these on NEC SX ... */
1847  SETSIG(SIGFPE,0);
1848  SETSIG(SIGILL,0);
1849 #endif
1850  SETSIG(SIGTRAP,0); /* Should be switched off when used with debuggers */
1851  SETSIG(SIGINT,0);
1852  if (atp_enabled) {
1853  /* We let ATP to catch SIGQUIT (it uses this for non-failed tasks, we think) -- thus commented out */
1854  /* SETSIG(SIGQUIT,0); */
1855  /* Unless ATP ignores SIGTERM, we ignore it from DrHook -- thus conditionally commented out */
1856  if (atp_ignore_sigterm) SETSIG(SIGTERM,0); /* Means: DrHook does NOT ignore SIGTERM -- ATP does */
1857  }
1858  else {
1859  SETSIG(SIGQUIT,0);
1860  SETSIG(SIGTERM,0);
1861  }
1862 #if defined(SIGIOT)
1863  SETSIG(SIGIOT,0); /* Same as SIGABRT; Used to be a typo SIGIO ;-( */
1864 #endif
1865  SETSIG(SIGXCPU,1); /* ignore_atexit == 1 i.e. no profile info via atexit() */
1866 #if defined(SIGXFSZ)
1867  SETSIG(SIGXFSZ,0);
1868 #endif
1869 #if defined(SIGDANGER)
1870  SETSIG(SIGDANGER,1); /* To catch the place where paging space gets dangerously low */
1871 #endif
1872  SETSIG(SIGSYS,0);
1873  /* SETSIG(SIGCHLD); we may not want to catch this either; may interfere parallel processing */
1874  /* -- not active
1875  SETSIG(SIGCHLD);
1876  SETSIG(SIGHUP);
1877  SETSIG(SIGCONT);
1878  */
1879 #if defined(SIGCORE)
1880  SETSIG(SIGCORE,0); /* NEC SX core dumping */
1881 #endif
1882 #if defined(SIGDEAD)
1883  SETSIG(SIGDEAD,0); /* NEC SX dead lock */
1884 #endif
1885 #if defined(SIGXMEM)
1886  SETSIG(SIGXMEM,0); /* NEC SX exceeded memory size limit */
1887 #endif
1888 #if defined(SIGXDSZ)
1889  SETSIG(SIGXDSZ,0); /* NEC SX exceeded data size limit */
1890 #endif
1891 #if defined(SIGMEM32)
1892  SETSIG(SIGMEM32,0); /* NEC SX exceeded memory size limit of 32KB */
1893 #endif
1894 #if defined(SIGNMEM)
1895  SETSIG(SIGNMEM,0); /* NEC SX exce error for no memory */
1896 #endif
1897 #if defined(SIGXABT)
1898  SETSIG(SIGXABT,0); /* NEC SX distributed parallel program aborted */
1899 #endif
1900  /*
1901  #if defined(SIG)
1902  SETSIG(SIG,0);
1903  #endif
1904  */
1905  catch_signals(silent); /* Additional signals to be seen by DR_HOOK */
1906  if (opt_gencore > 0 && opt_gencore_signal >= 1 && opt_gencore_signal <= NSIG) {
1907  drhook_sigfunc_t u;
1908  u.func3args = signal_gencore;
1909  signal(opt_gencore_signal, u.func1args); /* A facility to dump core */
1910  }
1911  signals_set = 1; /* Signals are set now */
1912 }
1913 
1914 /*--- get_mon_out ---*/
1915 
1916 static char *
1918 {
1919  char *s = mon_out;
1920  if (mon_out_procs == me || (mon_out_procs == -1 && me >= 1 && me <= nproc)) {
1921  if (!mon_out) mon_out = strdup_drhook("drhook.prof.%d");
1922  s = malloc_drhook((strlen(mon_out) + 20) * sizeof(*s));
1923  sprintf(s,mon_out,me);
1924  }
1925  if (!s) s = strdup_drhook("drhook.prof.0");
1926  return s;
1927 }
1928 
1929 /*--- get_memmon_out ---*/
1930 
1931 static char *
1933 {
1934  char *s = NULL;
1935  char *p = get_mon_out(me);
1936  if (p) {
1937  s = malloc_drhook((strlen(p) + 5) * sizeof(*s));
1938  sprintf(s,"%s-mem",p);
1939  }
1940  if (!s) s = strdup_drhook("drhook.prof.0-mem");
1941  return s;
1942 }
1943 
1944 /*--- random_memstat ---*/
1945 
1946 static void
1947 random_memstat(int tid, int enforce)
1948 {
1949  if (tid == 1 && opt_random_memstat > 0 && opt_random_memstat <= RAND_MAX) {
1950  int random_number = rand();
1951  if (enforce || random_number % opt_random_memstat == 0) {
1952  getmaxhwm_();
1953  getmaxstk_();
1954  }
1955  }
1956 }
1957 
1958 /*--- process_options ---*/
1959 
1960 static void do_prof();
1961 
1962 void /* Fortran callable */
1963 c_drhook_process_options_(const int *lhook, const int *Myproc, const int *Nproc)
1964 {
1965  c_drhook_set_lhook_(lhook);
1966  if (Myproc) myproc = *Myproc;
1967  if (Nproc) nproc = *Nproc;
1968  process_options();
1969 }
1970 
1971 #define OPTPRINT(fp,...) if (fp) fprintf(fp,__VA_ARGS__)
1972 
1973 static void
1975 {
1976  char *pfx = "";
1977  char *env;
1978  FILE *fp = NULL;
1979  int tid, ienv, newline;
1980  static int processed = 0;
1981  if (processed) return;
1982 
1983  tid = get_thread_id_();
1984 
1985  env = getenv("DR_HOOK_SHOW_PROCESS_OPTIONS");
1986  ienv = env ? atoi(env) : 1;
1987  if (ienv == -1 || ienv == myproc) fp = stderr;
1988  if (fp) pfx = PREFIX(tid);
1989 
1990  OPTPRINT(fp,"%s %s [%s@%s:%d] fp = %p\n",pfx,TIMESTR(tid),FFL,fp);
1991 
1992  env = getenv("DR_HOOK_ALLOW_COREDUMP");
1993  if (env) {
1994  ienv = atoi(env);
1995  allow_coredump = (ienv == -1 || ienv == myproc) ? ienv : 0;
1996  }
1997  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_ALLOW_COREDUMP=%d\n",pfx,TIMESTR(tid),FFL,allow_coredump);
1998  if (allow_coredump) {
1999  unsigned long long int hardlimit = 0;
2000  int rc = set_unlimited_corefile(&hardlimit);
2001  if (rc == 0) {
2002  OPTPRINT(fp,"%s %s [%s@%s:%d] Hardlimit for core file is now %llu (0x%llx)\n",
2003  pfx,TIMESTR(tid),FFL,hardlimit,hardlimit);
2004  }
2005  }
2006 
2007  env = getenv("DR_HOOK_PROFILE");
2008  if (env) {
2009  char *s = calloc_drhook(strlen(env) + 15, sizeof(*s));
2010  strcpy(s,env);
2011  if (!strchr(env,'%')) strcat(s,".%d");
2012  mon_out = strdup_drhook(s);
2013  free_drhook(s);
2014  }
2015  if (mon_out) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_PROFILE=%s\n",pfx,TIMESTR(tid),FFL,mon_out);
2016 
2017  env = getenv("DR_HOOK_PROFILE_PROC");
2018  if (env) {
2019  mon_out_procs = atoi(env);
2020  }
2021  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_PROFILE_PROC=%d\n",pfx,TIMESTR(tid),FFL,mon_out_procs);
2022 
2023  env = getenv("DR_HOOK_PROFILE_LIMIT");
2024  if (env) {
2025  percent_limit = atof(env);
2026  }
2027  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_PROFILE_LIMIT=%.3f\n",pfx,TIMESTR(tid),FFL,percent_limit);
2028 
2029  env = getenv("DR_HOOK_FUNCENTER");
2030  if (env) {
2031  opt_funcenter = atoi(env);
2032  }
2033  if (opt_funcenter) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_FUNCENTER=%d\n",pfx,TIMESTR(tid),FFL,opt_funcenter);
2034 
2035  env = getenv("DR_HOOK_FUNCEXIT");
2036  if (env) {
2037  opt_funcexit = atoi(env);
2038  }
2039  if (opt_funcexit) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_FUNCEXIT=%d\n",pfx,TIMESTR(tid),FFL,opt_funcexit);
2040 
2041  if (opt_funcenter || opt_funcexit) {
2042  opt_gethwm = opt_getstk = 1;
2043  }
2044 
2045  env = getenv("DR_HOOK_TIMELINE");
2046  if (env) {
2047  opt_timeline = atoi(env);
2048  }
2049 
2050  if (opt_timeline) {
2051  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline);
2052 
2053  env = getenv("DR_HOOK_TIMELINE_THREAD");
2054  if (env) {
2055  opt_timeline_thread = atoi(env);
2056  }
2057  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_THREAD=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline_thread);
2058 
2059  env = getenv("DR_HOOK_TIMELINE_FORMAT");
2060  if (env) {
2061  opt_timeline_format = atoi(env);
2062  }
2063  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_FORMAT=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline_format);
2064 
2065  env = getenv("DR_HOOK_TIMELINE_UNITNO");
2066  if (env) {
2067  opt_timeline_unitno = atoi(env);
2068  }
2069  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_UNITNO=%d\n",pfx,TIMESTR(tid),FFL,opt_timeline_unitno);
2070 
2071  env = getenv("DR_HOOK_TIMELINE_FREQ");
2072  if (env) {
2073  opt_timeline_freq = atoi(env);
2074  }
2075  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_FREQ=%lld\n",pfx,TIMESTR(tid),FFL,opt_timeline_freq);
2076 
2077  env = getenv("DR_HOOK_TIMELINE_MB");
2078  if (env) {
2079  opt_timeline_MB = atof(env);
2080  if (opt_timeline_MB < 0) opt_timeline_MB = 1.0;
2081  }
2082  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMELINE_MB=%g\n",pfx,TIMESTR(tid),FFL,opt_timeline_MB);
2083  }
2084 
2085  env = getenv("DR_HOOK_RANDOM_MEMSTAT");
2086  if (env) {
2087  opt_random_memstat = atoi(env);
2089  if (opt_random_memstat > RAND_MAX) opt_random_memstat = RAND_MAX;
2090  random_memstat(1,1);
2091  }
2092 
2093  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_RANDOM_MEMSTAT=%d (RAND_MAX=%d)\n",pfx,TIMESTR(tid),FFL,opt_random_memstat,RAND_MAX);
2094 
2095  env = getenv("DR_HOOK_HASHBITS");
2096  if (env) {
2097  int value = atoi(env);
2098  if (value < 1) value = 1;
2099  else if (value > NHASHMAX) value = NHASHMAX;
2100  nhash = value;
2101  hashsize = HASHSIZE(nhash);
2102  hashmask = HASHMASK(nhash);
2103  }
2104  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_HASHBITS=%d\n",pfx,TIMESTR(tid),FFL,nhash);
2105 
2106  env = getenv("DR_HOOK_NCALLSTACK");
2107  if (env) {
2108  int value = atoi(env);
2109  if (value < 1) value = NCALLSTACK;
2110  cstklen = value;
2111  }
2112  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_NCALLSTACK=%d\n",pfx,TIMESTR(tid),FFL,cstklen);
2113 
2114  env = getenv("DR_HOOK_HARAKIRI_TIMEOUT");
2115  if (env) {
2116  int value = atoi(env);
2117  if (value < 1) value = drhook_harakiri_timeout_default;
2118  drhook_harakiri_timeout = value;
2119  }
2120  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_HARAKIRI_TIMEOUT=%d\n",pfx,TIMESTR(tid),FFL,drhook_harakiri_timeout);
2121 
2122  env = getenv("DR_HOOK_TRAPFPE");
2123  if (env) {
2124  int value = atoi(env);
2125  drhook_trapfpe = (value != 0) ? 1 : 0; /* currently accept just 0 or 1 */
2126  }
2127  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TRAPFPE=%d\n",pfx,TIMESTR(tid),FFL,drhook_trapfpe);
2128 
2129  env = getenv("DR_HOOK_TIMED_KILL");
2130  if (env) {
2132  }
2133  if (drhook_timed_kill) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_TIMED_KILL=%s\n",pfx,TIMESTR(tid),FFL,drhook_timed_kill);
2134 
2135  env = getenv("DR_HOOK_DUMP_SMAPS");
2136  if (env) {
2137  ienv = atoi(env);
2138  drhook_dump_smaps = (ienv != 0) ? 1 : 0;
2139  }
2140  if (drhook_dump_smaps) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_DUMP_SMAPS=%d\n",pfx,TIMESTR(tid),FFL,drhook_dump_smaps);
2141 
2142  env = getenv("DR_HOOK_DUMP_BUDDYINFO");
2143  if (env) {
2144  ienv = atoi(env);
2145  drhook_dump_buddyinfo = (ienv != 0) ? 1 : 0;
2146  }
2147  if (drhook_dump_buddyinfo) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_DUMP_BUDDYINFO=%d\n",pfx,TIMESTR(tid),FFL,drhook_dump_buddyinfo);
2148 
2149  env = getenv("DR_HOOK_DUMP_HUGEPAGES");
2150  if (env) {
2151  double freq;
2152  int nel = sscanf(env,"%d,%lf",&ienv,&freq);
2153  if (nel == 2) {
2154  drhook_dump_hugepages = (freq > 0 && (ienv == -1 || ienv == myproc)) ? ienv : 0;
2156  }
2157  }
2158  if (drhook_dump_hugepages) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_DUMP_HUGEPAGES=%d,%.6f\n",pfx,TIMESTR(tid),FFL,
2160 
2161  env = getenv("DR_HOOK_GENCORE");
2162  if (env) {
2163  opt_gencore = atoi(env);
2164  }
2165 
2166  if (opt_gencore) {
2167  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_GENCORE=%d\n",pfx,TIMESTR(tid),FFL,opt_gencore);
2168 
2169  env = getenv("DR_HOOK_GENCORE_SIGNAL");
2170  if (env) {
2171  int itmp = atoi(env);
2172  if (itmp >= 1 && itmp <= NSIG && itmp != SIGABRT) {
2173  opt_gencore_signal = itmp;
2174  }
2175  }
2176  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_GENCORE_SIGNAL=%d\n",pfx,TIMESTR(tid),FFL,opt_gencore_signal);
2177  }
2178 
2179  env = getenv("DR_HOOK_HPMSTOP");
2180  if (env) {
2181  char *s = strdup_drhook(env);
2182  long long int a;
2183  double b;
2184  int n = 0;
2185  env = s;
2186  while (*env) {
2187  if (isspace(*env) || *env == ',') *env = ' ';
2188  env++;
2189  }
2190  n = sscanf(s,"%lld %lf",&a,&b);
2191  if (n >= 1) opt_hpmstop_threshold = a;
2192  if (n >= 2) opt_hpmstop_mflops = b;
2193  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_HPMSTOP=%lld,%.15g\n",
2194  pfx,TIMESTR(tid),FFL,opt_hpmstop_threshold,opt_hpmstop_mflops);
2195  free_drhook(s);
2196  }
2197 
2198  newline = 0;
2199  env = getenv("DR_HOOK_OPT");
2200  if (env) {
2201  const char delim[] = ", \t/";
2202  char *comma = " DR_HOOK_OPT=\"";
2203  char *s = strdup_drhook(env);
2204  char *p = s;
2205  while (*p) {
2206  if (islower(*p)) *p = toupper(*p);
2207  p++;
2208  }
2209  p = strtok(s,delim);
2210  /* if (p) OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_OPT=\"",pfx,TIMESTR(tid)); */
2211  if (p && fp) {
2212  fprintf(fp,"%s %s [%s@%s:%d]",pfx,TIMESTR(tid),FFL);
2213  newline = 1;
2214  }
2215  while (p) {
2216  /* Assume that everything is OFF by default */
2217  if (strequ(p,"ALL")) { /* all except profiler data */
2219  opt_calls = 1;
2220  any_memstat++;
2221  OPTPRINT(fp,"%s%s",comma,"ALL"); comma = ",";
2222  }
2223  else if (strequ(p,"MEM") || strequ(p,"MEMORY")) {
2225  opt_calls = 1;
2226  any_memstat++;
2227  OPTPRINT(fp,"%s%s",comma,"MEMORY"); comma = ",";
2228  }
2229  else if (strequ(p,"TIME") || strequ(p,"TIMES")) {
2230  opt_walltime = opt_cputime = 1;
2231  opt_calls = 1;
2232  OPTPRINT(fp,"%s%s",comma,"TIMES"); comma = ",";
2233  }
2234  else if (strequ(p,"HWM") || strequ(p,"HEAP")) {
2235  opt_gethwm = 1;
2236  opt_calls = 1;
2237  any_memstat++;
2238  OPTPRINT(fp,"%s%s",comma,"HEAP"); comma = ",";
2239  }
2240  else if (strequ(p,"STK") || strequ(p,"STACK")) {
2241  opt_getstk = 1;
2242  opt_calls = 1;
2243  any_memstat++;
2244  OPTPRINT(fp,"%s%s",comma,"STACK"); comma = ",";
2245  }
2246  else if (strequ(p,"RSS")) {
2247  opt_getrss = 1;
2248  opt_calls = 1;
2249  any_memstat++;
2250  OPTPRINT(fp,"%s%s",comma,"RSS"); comma = ",";
2251  }
2252  else if (strequ(p,"PAG") || strequ(p,"PAGING")) {
2253  opt_getpag = 1;
2254  opt_calls = 1;
2255  any_memstat++;
2256  OPTPRINT(fp,"%s%s",comma,"PAGING"); comma = ",";
2257  }
2258  else if (strequ(p,"WALL") || strequ(p,"WALLTIME")) {
2259  opt_walltime = 1;
2260  opt_calls = 1;
2261  OPTPRINT(fp,"%s%s",comma,"WALLTIME"); comma = ",";
2262  }
2263  else if (strequ(p,"CPU") || strequ(p,"CPUTIME")) {
2264  opt_cputime = 1;
2265  opt_calls = 1;
2266  OPTPRINT(fp,"%s%s",comma,"CPUTIME"); comma = ",";
2267  }
2268  else if (strequ(p,"CALLS") || strequ(p,"COUNT")) {
2269  opt_calls = 1;
2270  OPTPRINT(fp,"%s%s",comma,"CALLS"); comma = ",";
2271  }
2272  else if (strequ(p,"MEMPROF")) {
2273  opt_memprof = 1;
2274  drhook_memtrace = 1;
2276  opt_getpag = 1;
2277  opt_calls = 1;
2278  any_memstat++;
2279  OPTPRINT(fp,"%s%s",comma,"MEMPROF"); comma = ",";
2280  }
2281  else if (strequ(p,"PROF") || strequ(p,"WALLPROF")) {
2282  opt_wallprof = 1;
2283  opt_walltime = 1;
2284  opt_cpuprof = 0; /* Note: Switches cpuprof OFF */
2285  opt_calls = 1;
2286  OPTPRINT(fp,"%s%s",comma,"WALLPROF"); comma = ",";
2287  }
2288  else if (strequ(p,"CPUPROF")) {
2289  opt_cpuprof = 1;
2290  opt_cputime = 1;
2291  opt_wallprof = 0; /* Note: Switches walprof OFF */
2292  opt_calls = 1;
2293  OPTPRINT(fp,"%s%s",comma,"CPUPROF"); comma = ",";
2294  }
2295  else if (strequ(p,"HPM") || strequ(p,"HPMPROF") || strequ(p,"MFLOPS")) {
2296  opt_hpmprof = 1;
2297  opt_wallprof = 1; /* Note: Implies wallprof (or prof), not cpuprof */
2298  opt_walltime = 1;
2299  opt_cpuprof = 0; /* Note: Switches cpuprof OFF */
2300  opt_calls = 1;
2301  OPTPRINT(fp,"%s%s",comma,"HPMPROF"); comma = ",";
2302  }
2303  else if (strequ(p,"TRIM")) {
2304  opt_trim = 1;
2305  OPTPRINT(fp,"%s%s",comma,"TRIM"); comma = ",";
2306  }
2307  else if (strequ(p,"SELF")) {
2308  opt_self = 2;
2309  OPTPRINT(fp,"%s%s",comma,"SELF"); comma = ",";
2310  }
2311  else if (strequ(p,"NOSELF")) {
2312  opt_self = 0;
2313  OPTPRINT(fp,"%s%s",comma,"NOSELF"); comma = ",";
2314  }
2315  else if (strequ(p,"NOPROP") || strequ(p,"NOPROPAGATE") ||
2316  strequ(p,"NOPROPAGATE_SIGNALS")) {
2318  OPTPRINT(fp,"%s%s",comma,"NOPROPAGATE_SIGNALS"); comma = ",";
2319  }
2320  else if (strequ(p,"NOSIZE") || strequ(p,"NOSIZEINFO")) {
2321  opt_sizeinfo = 0;
2322  OPTPRINT(fp,"%s%s",comma,"NOSIZEINFO"); comma = ",";
2323  }
2324  else if (strequ(p,"CLUSTER") || strequ(p,"CLUSTERINFO")) {
2325  opt_clusterinfo = 1;
2326  OPTPRINT(fp,"%s%s",comma,"CLUSTERINFO"); comma = ",";
2327  }
2328  else if (strequ(p,"CALLPATH")) {
2329  opt_callpath = 1;
2330  OPTPRINT(fp,"%s%s",comma,"CALLPATH"); comma = ",";
2331  }
2332  p = strtok(NULL,delim);
2333  }
2334  free_drhook(s);
2335  if (*comma == ',') {
2336  OPTPRINT(fp,"\"\n");
2337  newline = 0;
2338  }
2339  if (newline) OPTPRINT(fp,"\n");
2340 
2341  if (opt_callpath) {
2342  env = getenv("DR_HOOK_CALLPATH_INDENT");
2343  if (env) {
2344  callpath_indent = atoi(env);
2345  if (callpath_indent < 1 || callpath_indent > 8) callpath_indent = callpath_indent_default;
2346  }
2347  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_INDENT=%d\n",pfx,TIMESTR(tid),FFL,callpath_indent);
2348 
2349  env = getenv("DR_HOOK_CALLPATH_DEPTH");
2350  if (env) {
2351  callpath_depth = atoi(env);
2352  if (callpath_depth < 0) callpath_depth = callpath_depth_default;
2353  }
2354  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_DEPTH=%d\n",pfx,TIMESTR(tid),FFL,callpath_depth);
2355 
2356  env = getenv("DR_HOOK_CALLPATH_PACKED");
2357  if (env) {
2358  callpath_packed = atoi(env);
2359  }
2360  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLPATH_PACKED=%d\n",pfx,TIMESTR(tid),FFL,callpath_packed);
2361 
2362  env = getenv("DR_HOOK_CALLTRACE");
2363  if (env) {
2364  opt_calltrace = atoi(env);
2365  }
2366  OPTPRINT(fp,"%s %s [%s@%s:%d] DR_HOOK_CALLTRACE=%d\n",pfx,TIMESTR(tid),FFL,opt_calltrace);
2367  }
2368 
2370  atexit(do_prof);
2371  }
2372  }
2373  else {
2374  if (opt_timeline) atexit(do_prof);
2375  } /* if (env) */
2376 
2377  processed = 1;
2378 }
2379 
2380 /*--- trim ---*/
2381 
2382 static const char *
2383 trim(const char *name, int *n)
2384 {
2385  const char *from;
2386  int len;
2387  int name_len = *n;
2388  while (*name && isspace(*name) && name_len > 0) {
2389  /* skip leading blanks */
2390  name++;
2391  name_len--;
2392  }
2393  len = 0;
2394  from = name;
2395  while (*from && !isspace(*from) && name_len > 0) {
2396  /* find first space point, if any */
2397  from++;
2398  len++;
2399  name_len--;
2400  }
2401  *n = len;
2402  if (!name) {
2403  /* Never actually called (unless a true fatality) */
2404  ABOR1("***Fatal error in drhook.c:trim()-function");
2405  }
2406  return name;
2407 }
2408 
2409 /*--- insertkey ---*/
2410 
2411 static drhook_key_t *
2412 insertkey(int tid, const drhook_key_t *keyptr_in)
2413 {
2414  drhook_key_t *keyptr = NULL;
2415  if (tid >= 1 && tid <= numthreads) {
2416  /* no trimming available for this; just raw eval & insert */
2417  unsigned int hash = hashfunc(keyptr_in->name, keyptr_in->name_len);
2418  keyptr = &keydata[tid-1][hash];
2419  for (;;) {
2420  if (!keyptr->name) { /* A free slot */
2421  memcpy(keyptr,keyptr_in,sizeof(*keyptr));
2422  keyptr->next = NULL;
2423  break;
2424  }
2425  else {
2426  if (!keyptr->next) {
2427  keyptr->next = calloc_drhook(1, sizeof(drhook_key_t)); /* chaining */
2428  }
2429  keyptr = keyptr->next;
2430  } /* if (!keyptr->name) ... else ... */
2431  } /* for (;;) */
2432  } /* if (tid >= 1 && tid <= numthreads) */
2433  return keyptr;
2434 }
2435 
2436 /*--- getkey ---*/
2437 
2438 static drhook_key_t *
2439 getkey(int tid, const char *name, int name_len,
2440  const char *filename, int filename_len,
2441  const double *walltime, const double *cputime,
2442  const equivalence_t *callpath, int callpath_len,
2443  int *free_callpath)
2444 {
2445  drhook_key_t *keyptr = NULL;
2446  if (tid >= 1 && tid <= numthreads) {
2447  unsigned int hash, fullhash;
2448  if (opt_trim) name = trim(name, &name_len);
2449  hash = hashfunc(name, name_len);
2450  if (callpath) {
2451  callpath_hashfunc(hash, callpath, callpath_len, &fullhash);
2452 #ifdef DEBUG
2453  fprintf(stderr,
2454  "getkey: name='%.*s', name_len=%d, callpath_len=%d, fullhash=%u\n",
2455  name_len, name, name_len, callpath_len, fullhash);
2456 #endif
2457  }
2458  keyptr = &keydata[tid-1][hash];
2459  for (;;) {
2460  int found = 0;
2461  if (!keyptr->name) { /* A free slot */
2462  keyptr->name = malloc_drhook((name_len+1)*sizeof(*name));
2463  keyptr->name_len = name_len;
2464  if (opt_trim) {
2465  const char *from = name;
2466  char *to = keyptr->name;
2467  int len = name_len;
2468  for (; len>0; from++, len--) {
2469  *to++ = islower(*from) ? toupper(*from) : *from;
2470  }
2471  *to = 0;
2472  }
2473  else {
2474  memcpy(keyptr->name, name, name_len);
2475  keyptr->name[name_len] = 0;
2476  }
2477  if (filename_len > 0 &&
2478  filename &&
2479  *filename) {
2480  char *psave = NULL;
2481  char *p = psave = malloc_drhook((filename_len+1)*sizeof(*filename));
2482  memcpy(p, filename, filename_len);
2483  p[filename_len] = 0;
2484  { /* Strip out dirname */
2485  char *s = strrchr(p,'/');
2486  if (s) p = s+1;
2487  }
2488  keyptr->filename = strdup_drhook(p);
2489  free_drhook(psave);
2490  }
2491  if (callpath) {
2492  if (free_callpath) *free_callpath = 0;
2493  keyptr->callpath = callpath;
2494  keyptr->callpath_len = callpath_len;
2495  keyptr->callpath_fullhash = fullhash;
2496  }
2497  found = 1;
2498  }
2499  if (found ||
2500  (keyptr->name_len == name_len &&
2501  (!callpath || (callpath && keyptr->callpath &&
2502  keyptr->callpath_len == callpath_len &&
2503  keyptr->callpath_fullhash == fullhash)) &&
2504  ((!opt_trim && *keyptr->name == *name && strnequ(keyptr->name, name, name_len)) ||
2505  (opt_trim && strncasecmp(keyptr->name, name, name_len) == 0)))) {
2506  if (opt_walltime) keyptr->wall_in = walltime ? *walltime : WALLTIME();
2507  if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME();
2508  if (any_memstat) memstat(keyptr,&tid,1);
2509  if (opt_calls) {
2510  keyptr->calls++;
2511  keyptr->status++;
2512  }
2513  insert_calltree(tid, keyptr);
2514  break; /* for (;;) */
2515  }
2516  else {
2517  if (!keyptr->next) {
2518  keyptr->next = calloc_drhook(1, sizeof(drhook_key_t)); /* chaining */
2519  }
2520  keyptr = keyptr->next;
2521  } /* if (found ...) else ... */
2522  } /* for (;;) */
2523  curkeyptr[tid-1] = keyptr;
2524  } /* if (tid >= 1 && tid <= numthreads) */
2525  return keyptr;
2526 }
2527 
2528 /*--- putkey ---*/
2529 
2530 static void
2531 putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len,
2532  int sizeinfo,
2533  double *walltime, double *cputime)
2534 {
2535  const int sig = SIGABRT;
2536  const char sl_name[] = "SIGABRT";
2537  drhook_calltree_t *treeptr = (tid >= 1 && tid <= numthreads) ? thiscall[tid-1] : NULL;
2538  if (!treeptr || !treeptr->active || treeptr->keyptr != keyptr) {
2539  char *pfx = PREFIX(tid);
2540  char *s;
2541  unsigned int hash;
2542  if (opt_trim) name = trim(name, &name_len);
2543  hash = hashfunc(name, name_len);
2544  s = strdup2_drhook(name,name_len);
2545  if (opt_trim) {
2546  char *p = s;
2547  while (*p) {
2548  if (islower(*p)) *p = toupper(*p);
2549  p++;
2550  }
2551  }
2552  fprintf(stderr,
2553  "%s %s [%s@%s:%d] [signal#%d(%s)]: Dr.Hook has detected an invalid"
2554  " key-pointer/handle while leaving the routine '%s' [hash=%u]\n",
2555  pfx,TIMESTR(tid),FFL,
2556  sig,sl_name,s,hash);
2557 
2558  if (treeptr) {
2559  equivalence_t u;
2560 
2561  u.keyptr = treeptr->keyptr;
2562  hash = (u.keyptr && u.keyptr->name) ? hashfunc(u.keyptr->name,u.keyptr->name_len) : 0;
2563  fprintf(stderr,
2564  "%s %s [%s@%s:%d] [signal#%d(%s)]: Expecting the key-pointer=%p"
2565  " and treeptr->active-flag = 1\n",
2566  pfx,TIMESTR(tid),FFL,
2567  sig,sl_name,u.keyptr);
2568  fprintf(stderr,
2569  "%s %s [%s@%s:%d] [signal#%d(%s)]: A probable routine missing the closing"
2570  " DR_HOOK-call is '%s' [hash=%u]\n",
2571  pfx,TIMESTR(tid),FFL,
2572  sig,sl_name,
2573  (u.keyptr && u.keyptr->name) ? u.keyptr->name : NIL, hash);
2574 
2575  u.keyptr = keyptr;
2576  hash = (u.keyptr && u.keyptr->name) ? hashfunc(u.keyptr->name,u.keyptr->name_len) : 0;
2577  fprintf(stderr,
2578  "%s %s [%s@%s:%d] [signal#%d(%s)]: Got a key-pointer=%p"
2579  " and treeptr->active-flag = %d\n",
2580  pfx,TIMESTR(tid),FFL,
2581  sig,sl_name,u.keyptr,treeptr->active);
2582  fprintf(stderr,
2583  "%s %s [%s@%s:%d] [signal#%d(%s)]: This key-pointer maybe associated with"
2584  " the routine '%s' [hash=%u]\n",
2585  pfx,TIMESTR(tid),FFL,
2586  sig,sl_name,
2587  (u.keyptr && u.keyptr->name) ? u.keyptr->name : NIL, hash);
2588 
2589  u.keyptr = curkeyptr[tid-1];
2590  hash = (u.keyptr && u.keyptr->name) ? hashfunc(u.keyptr->name,u.keyptr->name_len) : 0;
2591  fprintf(stderr,
2592  "%s %s [%s@%s:%d] [signal#%d(%s)]: The current key-pointer (=%p) thinks"
2593  " it maybe associated with the routine '%s' [hash=%u]\n",
2594  pfx,TIMESTR(tid),FFL,
2595  sig,sl_name,
2596  u.keyptr,
2597  (u.keyptr && u.keyptr->name) ? u.keyptr->name : NIL, hash);
2598  }
2599  free_drhook(s);
2600  fprintf(stderr,
2601  "%s %s [%s@%s:%d] [signal#%d(%s)]: Aborting...\n",
2602  pfx,TIMESTR(tid),FFL,
2603  sig,sl_name);
2604  RAISE(SIGABRT);
2605  }
2606  else if (tid >= 1 && tid <= numthreads) {
2607  double delta_wall = 0;
2608  double delta_cpu = 0;
2609  if (any_memstat) memstat(keyptr,&tid,0);
2610  if (opt_calls) keyptr->status--;
2611  if (opt_sizeinfo && sizeinfo > 0) {
2612  if (keyptr->sizeinfo == 0) { /* First time */
2613  keyptr->min_sizeinfo = sizeinfo;
2614  keyptr->max_sizeinfo = sizeinfo;
2615  }
2616  else {
2617  keyptr->min_sizeinfo = MIN(keyptr->min_sizeinfo, sizeinfo);
2618  keyptr->max_sizeinfo = MAX(keyptr->max_sizeinfo, sizeinfo);
2619  }
2620  keyptr->sizeinfo += sizeinfo;
2621  }
2622  if (opt_cputime && cputime) {
2623  *cputime = CPUTIME();
2624  delta_cpu = *cputime - keyptr->cpu_in;
2625  }
2626  if (opt_walltime && walltime) {
2627  *walltime = WALLTIME();
2628  delta_wall = *walltime - keyptr->wall_in;
2629  }
2630  if (opt_walltime) keyptr->delta_wall_all += delta_wall;
2631  if (opt_cputime) keyptr->delta_cpu_all += delta_cpu;
2632  remove_calltree(tid, keyptr, &delta_wall, &delta_cpu);
2633  }
2634 }
2635 
2636 /*--- init_drhook ---*/
2637 
2638 static void
2639 init_drhook(int ntids)
2640 {
2641  if (numthreads == 0 || !keydata || !calltree || !keyself || !overhead || !curkeyptr || !cstk) {
2642  int j;
2643  if (pid == -1) { /* Ensure that just called once */
2644  {
2645  /* Invoke once : timers, memory counters etc. to "wake them up" */
2646  (void) WALLTIME();
2647  (void) CPUTIME();
2648  (void) gethwm_();
2649  (void) getmaxhwm_();
2650  (void) getrss_();
2651  (void) getmaxrss_();
2652  (void) getstk_();
2653  (void) getmaxstk_();
2654  (void) getpag_();
2655  }
2656 #ifdef RS6K
2657  irtc_start = irtc();
2658 #endif
2659 #ifdef CRAYXT
2660  dclock_start = dclock();
2661 #endif
2662 #if defined(SV2) || defined(XD1) || defined(XT3)
2663 #if defined(SV2)
2664  irtc_start = _rtc();
2665 #else
2666  irtc_start = irtc_();
2667 #endif
2670 #endif
2671  start_stamp = timestamp();
2672  {
2673  char *env = getenv("DR_HOOK_SHOW_LOCK"); /* export DR_HOOK_SHOW_LOCK=1 to show the lock-info */
2674  int konoff = env ? atoi(env) : 0;
2675  int kret = 0;
2676  if (konoff == 1) coml_set_debug_(&konoff, &kret);
2677  INIT_LOCKID_WITH_NAME(&DRHOOK_lock,"drhook.c:DRHOOK_lock");
2678  if (kret != 0) {
2679  konoff = 0;
2680  coml_set_debug_(&konoff, &kret);
2681  }
2682  }
2683 #if defined(NECSX)
2684  { /* If C-programs compiled with -traceback, then NEC/F90
2685  MESPUT-call will also includes C-routines in the traceback if
2686  in addition 'export C_TRACEBACK=YES' */
2687  char *env = getenv("C_TRACEBACK");
2688  if (!env) {
2689  /* Override only if C_TRACEBACK hadn't already been defined */
2690  static char s[] = "C_TRACEBACK=YES"; /* note: must be static */
2691  putenv(s);
2692  }
2693  }
2694 #endif
2695  ec_set_umask_();
2696  pid = getpid();
2697  signal_drhook_init(1); /* myproc gets set .. if not earlier */
2698  process_options();
2699 #ifndef DARWIN
2700  set_timed_kill();
2701 #endif
2702  drhook_lhook = 1;
2703  }
2704  if (!keydata) {
2705  keydata = malloc_drhook(sizeof(**keydata) * ntids);
2706  for (j=0; j<ntids; j++) {
2707  keydata[j] = calloc_drhook(hashsize, sizeof(drhook_key_t));
2708  }
2709  }
2710  if (!cstk) {
2711  cstk = calloc_drhook(ntids, sizeof(**cstk));
2712  }
2713  if (!calltree) {
2714  calltree = malloc_drhook(sizeof(**calltree) * ntids);
2715  thiscall = malloc_drhook(sizeof(**thiscall) * ntids);
2716  for (j=0; j<ntids; j++) {
2717  thiscall[j] = calltree[j] = calloc_drhook(1,sizeof(drhook_calltree_t));
2718  }
2719  }
2720  if (!keyself && opt_self && (opt_wallprof || opt_cpuprof || opt_hpmprof)) {
2721  const char *name = "$drhook";
2722  int name_len = strlen(name);
2723  keyself = malloc_drhook(sizeof(**keyself) * ntids);
2724  for (j=0; j<ntids; j++) {
2725  drhook_key_t *keyptr = keyself[j] = calloc_drhook(1,sizeof(drhook_key_t));
2726  keyptr->name = strdup_drhook(name);
2727  keyptr->name_len = name_len;
2728  }
2729  }
2730  if (!overhead) {
2731  overhead = calloc_drhook(ntids,sizeof(*overhead));
2732  }
2733  if (!curkeyptr) {
2734  curkeyptr = malloc_drhook(sizeof(**curkeyptr) * ntids);
2735  for (j=0; j<ntids; j++) {
2736  curkeyptr[j] = NULL;
2737  }
2738  }
2739  numthreads = ntids;
2740  if (!timeline) {
2741  if (opt_timeline_unitno >= 0 && opt_timeline_freq >= 1 &&
2742  (opt_timeline == myproc || opt_timeline == -1)) {
2743  timeline = calloc_drhook(ntids, sizeof(*timeline));
2744  }
2745  if (timeline) drhook_memtrace = 1;
2746  if (timeline) {
2747  /* The first timeline-call */
2748  const int ftnunitno = opt_timeline_unitno;
2749  const int master = 1;
2750  const int print_option = +7;
2751  int initlev = 0;
2752  c_drhook_print_(&ftnunitno, &master, &print_option, &initlev);
2753  }
2754  }
2755  init_hpm(1); /* First thread */
2756  }
2757 }
2758 
2759 /*-- overhead-macro --*/
2760 
2761 #define OVERHEAD(tid,walltime_in,cputime_in,delta,calc_delta) \
2762 if (overhead && tid >= 1 && tid <= numthreads) { \
2763  if (calc_delta) { \
2764  if (opt_walltime) delta = WALLTIME() - walltime_in; \
2765  else if (opt_cputime) delta = CPUTIME() - cputime_in; \
2766  else delta = 0; \
2767  } \
2768  overhead[tid-1] += delta; \
2769 }
2770 /*--- itself ---*/
2771 
2772 #define ITSELF_0 \
2773  double delta = 0; \
2774  drhook_key_t *keyptr_self = keyself ? itself(NULL,*thread_id,0,NULL,&walltime,&cputime) : NULL;
2775 
2776 #define ITSELF_1 \
2777  if (keyptr_self) { \
2778  (void) itself(keyptr_self,*thread_id,1,&delta,&walltime,&cputime); \
2779  if (opt_wallprof) u.keyptr->delta_wall_child += delta; \
2780  else u.keyptr->delta_cpu_child += delta; \
2781  OVERHEAD(*thread_id,walltime,cputime,delta,0); \
2782  } \
2783  else { \
2784  OVERHEAD(*thread_id,walltime,cputime,delta,1); \
2785  }
2786 
2787 static drhook_key_t *
2788 itself(drhook_key_t *keyptr_self,
2789  int tid, int opt, double *delta_time,
2790  const double *walltime, const double *cputime)
2791 {
2792  drhook_key_t *keyptr = NULL;
2793  if (keyself) {
2794  keyptr = keyptr_self ? keyptr_self : keyself[tid-1];
2795  if (opt == 0) {
2796  if (opt_wallprof) keyptr->wall_in = walltime ? *walltime : WALLTIME();
2797  else keyptr->cpu_in = cputime ? *cputime : CPUTIME();
2798  keyptr->calls++;
2799  }
2800  else if (opt == 1) {
2801  double delta = 0;
2802  if (opt_wallprof) {
2803  delta = walltime ? (*walltime - keyptr->wall_in) : (WALLTIME() - keyptr->wall_in);
2804  keyptr->delta_wall_all += delta;
2805  }
2806  else {
2807  delta = cputime ? (*cputime - keyptr->cpu_in) : (CPUTIME() - keyptr->cpu_in);
2808  keyptr->delta_cpu_all += delta;
2809  }
2810  if (delta_time) *delta_time = delta;
2811  }
2812  }
2813  return keyptr;
2814 }
2815 
2816 /*--- commie -routines : adds "," i.e. comma after each 3 digit, e.g.:
2817  1234567890 becomes more readable 1,234,567,890 */
2818 
2819 static void
2820 lld_commie(long long int n, char sd[])
2821 {
2822  const char comma = ',';
2823  char s[DRHOOK_STRBUF];
2824  char *p;
2825  int len, ncommas;
2826  sprintf(s,"%lld",n);
2827  len = strlen(s);
2828  ncommas = (len-1)/3;
2829  if (ncommas > 0) {
2830  char *pd = sd + len + ncommas;
2831  *pd-- = 0;
2832  p = s + len - 1;
2833  len = 0;
2834  while (p-s >= 0) {
2835  *pd-- = *p--;
2836  len++;
2837  if (p-s >= 0 && len%3 == 0) *pd-- = comma;
2838  }
2839  }
2840  else {
2841  strcpy(sd,s);
2842  }
2843 }
2844 
2845 static void
2846 dbl_commie(double n, char sd[])
2847 {
2848  const char comma = ',';
2849  char s[DRHOOK_STRBUF];
2850  char *p;
2851  int len, ncommas;
2852  sprintf(s,"%.0f",n);
2853  len = strlen(s);
2854  ncommas = (len-1)/3;
2855  if (ncommas > 0) {
2856  char *pd = sd + len + ncommas;
2857  *pd-- = 0;
2858  p = s + len - 1;
2859  len = 0;
2860  while (p-s >= 0) {
2861  *pd-- = *p--;
2862  len++;
2863  if (p-s >= 0 && len%3 == 0) *pd-- = comma;
2864  }
2865  }
2866  else {
2867  strcpy(sd,s);
2868  }
2869 }
2870 
2871 /*--- callpath as a "pathname" ---*/
2872 
2873 static void
2874 unroll_callpath(FILE *fp, int len,
2875  const equivalence_t *callpath, int callpath_len)
2876 {
2877  if (fp && callpath && callpath_len > 0) {
2878  int j;
2879  for (j=0; j<callpath_len; callpath++, j++) {
2880  if (callpath && callpath->keyptr && callpath->keyptr->name) {
2881  const char *name = callpath->keyptr->name;
2882  int name_len = callpath->keyptr->name_len;
2883  len -= callpath_indent;
2884  if (len < 0) len = 0;
2885  fprintf(fp,"\n%*s%.*s",len," ",name_len,name);
2886  }
2887 #ifdef DEBUG
2888  else {
2889  fprintf(fp,
2890  "\n????callpath=%p, callpath->keyptr=%p, callpath->keyptr->name='%s'",
2891  callpath, callpath ? callpath->keyptr : 0,
2892  (callpath && callpath->keyptr && callpath->keyptr->name) ?
2893  callpath->keyptr->name : NIL);
2894  }
2895 #endif
2896  }
2897  } /* if (fp) */
2898 }
2899 
2900 
2901 static equivalence_t *
2902 get_callpath(int tid, int *callpath_len)
2903 {
2904  int depth = 0;
2905  equivalence_t *callpath = NULL;
2906  if (tid >= 1 && tid <= numthreads) {
2907  const drhook_calltree_t *treeptr = thiscall[tid-1];
2908  while (treeptr && treeptr->active && depth < callpath_depth) {
2909  depth++;
2910  treeptr = treeptr->prev;
2911  }
2912  if (depth > 0) {
2913  int j = 0;
2914  callpath = malloc_drhook(sizeof(*callpath) * depth);
2915  treeptr = thiscall[tid-1];
2916  while (treeptr && treeptr->active && j < callpath_depth) {
2917  callpath[j].keyptr = treeptr->keyptr;
2918  j++;
2919  treeptr = treeptr->prev;
2920  }
2921  } /* if (depth > 0) */
2922  } /* if (tid >= 1 && tid <= numthreads) */
2923  if (callpath_len) *callpath_len = depth;
2924  return callpath;
2925 }
2926 
2927 /*--- profiler output ---*/
2928 
2929 static int do_prof_off = 0;
2930 
2931 static void
2933 {
2934 
2935  /* to avoid recursive signals while atexit() (e.g. SIGXCPU) */
2936  if (signal_handler_ignore_atexit) return;
2937 
2938  if (!do_prof_off && (opt_wallprof || opt_cpuprof)) {
2939  /* CPU, wall-clock and/or MFlop/s profiling */
2940  const int ftnunitno = 0;
2941  const int master = 1;
2942  const int print_option = 3;
2943  int initlev = 0;
2944  c_drhook_print_(&ftnunitno, &master, &print_option, &initlev);
2945  }
2946 
2947  if (!do_prof_off && opt_memprof) {
2948  /* Memory profiling */
2949  const int ftnunitno = 0;
2950  const int master = 1;
2951  const int print_option = 4;
2952  int initlev = 0;
2953  c_drhook_print_(&ftnunitno, &master, &print_option, &initlev);
2954  }
2955 
2956  if (!do_prof_off && timeline) {
2957  /* The last timeline-call */
2958  const int ftnunitno = opt_timeline_unitno;
2959  const int master = 1;
2960  const int print_option = -7;
2961  int initlev = 0;
2962  c_drhook_print_(&ftnunitno, &master, &print_option, &initlev);
2963  }
2964 }
2965 
2966 /*--- Check watch points ---*/
2967 
2968 typedef enum { /* See dr_hook_watch_mod.F90 */
2969  KEYNONE = 0,
2970  KEYLOG = 1,
2971  KEYCHAR = 2,
2972  KEY_I4 = 4,
2973  KEY_I8 = 8,
2974  KEY_R4 = 16,
2975  KEY_R8 = 32
2977 
2978 static void print_watch(int ftnunitno, int key, const void *ptr, int n)
2979 {
2980  if (ptr && key > KEYNONE && n > 0) {
2981  int nmax = n;
2982  if (key == KEYLOG) {
2983  dr_hook_prt_logical_(&ftnunitno, ptr, &nmax);
2984  }
2985  else if (key == KEYCHAR) {
2986  dr_hook_prt_char_(&ftnunitno, ptr, &nmax);
2987  }
2988  else if (key == KEY_I4) {
2989  dr_hook_prt_i4_(&ftnunitno, ptr, &nmax);
2990  }
2991  else if (key == KEY_I8) {
2992  dr_hook_prt_i8_(&ftnunitno, ptr, &nmax);
2993  }
2994  else if (key == KEY_R4) {
2995  dr_hook_prt_r4_(&ftnunitno, ptr, &nmax);
2996  }
2997  else if (key == KEY_R8) {
2998  dr_hook_prt_r8_(&ftnunitno, ptr, &nmax);
2999  }
3000  }
3001 }
3002 
3003 static void
3004 check_watch(const char *label,
3005  const char *name,
3006  int name_len,
3007  int allow_abort)
3008 {
3009  if (watch) {
3010  int print_traceback = 1;
3011  drhook_watch_t *p = watch;
3013  while (p) {
3014  if (p->active) {
3015  unsigned int crc32 = 0;
3016  int calc_crc = 0;
3017  const char *first_nbytes = p->ptr;
3018  int changed = memcmp(first_nbytes,p->ptr,p->watch_first_nbytes);
3019  if (!changed) {
3020  /* The first nbytes were still the same; checking if crc has changed ... */
3021  crc32_(p->ptr, &p->nbytes, &crc32);
3022  changed = (crc32 != p->crc32);
3023  calc_crc = 1;
3024  }
3025  if (changed) {
3026  int tid = get_thread_id_();
3027  char *pfx = PREFIX(tid);
3028  if (!calc_crc) crc32_(p->ptr, &p->nbytes, &crc32);
3029  fprintf(stderr,
3030  "%s %s [%s@%s:%d] ***%s: Changed watch point '%s' at %p (%d bytes [#%d values])"
3031  " -- %s %.*s : new crc32=%u\n",
3032  pfx,TIMESTR(tid),FFL,
3033  p->abort_if_changed ? "Error" : "Warning",
3034  p->name, p->ptr, p->nbytes, p->nvals,
3035  label, name_len, name, crc32);
3036  print_watch(0, p->printkey, p->ptr, p->nvals);
3037  if (print_traceback) {
3038  LinuxTraceBack(pfx,TIMESTR(tid),NULL);
3039  print_traceback = 0;
3040  }
3041  if (allow_abort && p->abort_if_changed) {
3042  coml_unset_lockid_(&DRHOOK_lock); /* An important unlocking on Linux; otherwise hangs (until time-out) */
3043  RAISE(SIGABRT);
3044  }
3045 #if 0
3046  p->active = 0; /* No more these messages for this array */
3047  watch_count--;
3048 #else
3049  p->crc32 = crc32;
3050 #endif
3051  }
3052  }
3053  p = p->next;
3054  } /* while (p) */
3056  }
3057 }
3058 
3059 void
3060 c_drhook_check_watch_(const char *where,
3061  const int *allow_abort
3062  /* Hidden length */
3063  , int where_len)
3064 {
3065  if (watch && watch_count > 0) check_watch("whilst at", where, where_len, *allow_abort);
3066 }
3067 
3068 /*** PUBLIC ***/
3069 
3070 #define TIMERS \
3071  double walltime = opt_walltime ? WALLTIME() : 0; \
3072  double cputime = opt_cputime ? CPUTIME() : 0; \
3073  long long int hwm = opt_gethwm ? gethwm_() : 0; \
3074  long long int stk = opt_getstk ? getstk_() : 0
3075 
3076 
3077 /*=== c_drhook_set_lhook_ ===*/
3078 
3079 void
3081 {
3082  if (lhook) drhook_lhook = *lhook;
3083 }
3084 
3085 /*=== c_drhook_getenv_ ===*/
3086 
3087 void
3088 c_drhook_getenv_(const char *s,
3089  char *value,
3090  /* Hidden arguments */
3091  int slen,
3092  const int valuelen)
3093 {
3094  char *env = NULL;
3095  char *p = malloc_drhook(slen+1);
3096  if (!p) {
3097  fprintf(stderr,"c_drhook_getenv_(): Unable to allocate %d bytes of memory\n", slen+1);
3098  RAISE(SIGABRT);
3099  }
3100  memcpy(p,s,slen);
3101  p[slen]='\0';
3102  memset(value, ' ', valuelen);
3103  env = getenv(p);
3104  if (env) {
3105  int len = strlen(env);
3106  if (valuelen < len) len = valuelen;
3107  memcpy(value,env,len);
3108  }
3109  free_drhook(p);
3110 }
3111 
3112 
3113 /*=== c_drhook_init_ ===*/
3114 
3115 void
3116 c_drhook_init_(const char *progname,
3117  const int *num_threads
3118  /* Hidden length */
3119  ,int progname_len)
3120 {
3121  init_drhook(*num_threads);
3122  max_threads = MAX(1,*num_threads);
3123  if (a_out) free_drhook(a_out);
3124  progname = trim(progname, &progname_len);
3125  if (progname_len > 0) {
3126  a_out = calloc_drhook(progname_len+1,sizeof(*progname));
3127  memcpy(a_out, progname, progname_len);
3128  }
3129  else {
3130  /* progname is a blank string;
3131  this is most likely due to a Fortran-call to getarg
3132  from program that has a C-main program, thus Fortran getarg
3133  may return a blank string */
3134 
3135  const char *arg0 = ec_GetArgs(0);
3136  if (arg0) {
3137  const char *pc = arg0;
3138  progname_len = strlen(pc);
3139  pc = trim(pc, &progname_len);
3140  a_out = strdup_drhook(pc);
3141  }
3142  }
3143  if (!a_out) {
3144  a_out = strdup_drhook("a.out"); /* Failed to obtain the name of the executing program */
3145  }
3146 }
3147 
3148 
3149 /*=== c_drhook_watch_ ===*/
3150 
3151 void
3152 c_drhook_watch_(const int *onoff,
3153  const char *array_name,
3154  const void *array_ptr,
3155  const int *nbytes,
3156  const int *abort_if_changed,
3157  const int *printkey,
3158  const int *nvals,
3159  const int *print_traceback_when_set
3160  /* Hidden length */
3161  ,int array_name_len)
3162 {
3163  int tid = get_thread_id_();
3164  drhook_watch_t *p = NULL;
3165  if (!drhook_lhook) return;
3166 
3168 
3169  /* check whether this array_ptr is already registered, but maybe inactive */
3170  p = watch;
3171  while (p) {
3172  if (p->ptr == array_ptr) {
3173  if (p->active) watch_count--;
3174  free_drhook(p->name);
3175  break;
3176  }
3177  p = p->next;
3178  }
3179 
3180  if (!p) {
3181  /* create new branch */
3182  p = calloc_drhook(1, sizeof(*p)); /* Implies p->next = NULL */
3183  if (!last_watch) {
3184  last_watch = watch = p;
3185  }
3186  else {
3187  last_watch->next = p;
3188  last_watch = p;
3189  }
3190  }
3191 
3192  p->name = strdup2_drhook(array_name,array_name_len);
3193  p->tid = tid;
3194  p->active = *onoff;
3195  if (p->active) watch_count++;
3196  p->abort_if_changed = *abort_if_changed;
3197  p->ptr = array_ptr;
3198  p->nbytes = *nbytes;
3199  p->watch_first_nbytes = MIN(p->nbytes, MAX_WATCH_FIRST_NBYTES);
3200  memcpy(p->first_nbytes,p->ptr,p->watch_first_nbytes);
3201  p->crc32 = 0;
3202  crc32_(p->ptr, &p->nbytes, &p->crc32);
3203  p->printkey = *printkey;
3204  p->nvals = *nvals;
3205  {
3206  char *pfx = PREFIX(p->tid);
3207  int ftnunitno = 0;
3208  int textlen = strlen(pfx) + strlen(p->name) + 256;
3209  char *text = malloc_drhook(textlen * sizeof(*text));
3210  snprintf(text,textlen,
3211  "%s ***Warning: Set watch point '%s' at %p (%d bytes [%d values]) : crc32=%u",
3212  pfx, p->name, p->ptr, p->nbytes, p->nvals, p->crc32);
3213  dr_hook_prt_(&ftnunitno, text, strlen(text));
3214  print_watch(ftnunitno, p->printkey, p->ptr, p->nvals);
3215  free_drhook(text);
3216  if (*print_traceback_when_set) LinuxTraceBack(pfx,TIMESTR(p->tid),NULL);
3217  }
3218 
3220 }
3221 
3222 /*=== c_drhook_start_ ===*/
3223 
3224 void
3225 c_drhook_start_(const char *name,
3226  const int *thread_id,
3227  double *key,
3228  const char *filename,
3229  const int *sizeinfo
3230  /* Hidden length */
3231  ,int name_len, int filename_len)
3232 {
3233  TIMERS;
3234  equivalence_t u;
3235  ITSELF_0;
3237  if (name_len > 0 && opt_funcenter == *thread_id) {
3238  fprintf(stdout,"<e> %d %d %.*s %lld %lld\n",myproc,*thread_id,name_len,name,hwm,stk);
3239  fflush(stdout);
3240  }
3241  if (watch && watch_count > 0) check_watch("when entering routine", name, name_len, 1);
3242  if (drhook_dump_hugepages) {
3243  int tid = *thread_id;
3244  char *pfx = PREFIX(tid);
3245  dump_hugepages(0,pfx,tid,0,-1);
3246  }
3247  /* if (opt_random_memstat > 0) random_memstat(*thread_id,0); */
3248  if (!opt_callpath) {
3249  u.keyptr = getkey(*thread_id, name, name_len,
3250  filename, filename_len,
3251  &walltime, &cputime,
3252  NULL, 0, NULL);
3253  }
3254  else { /* (Much) more overhead */
3255  int free_callpath = 1;
3256  int callpath_len = 0;
3257  equivalence_t *callpath = get_callpath(*thread_id, &callpath_len);
3258  u.keyptr = getkey(*thread_id, name, name_len,
3259  filename, filename_len,
3260  &walltime, &cputime,
3261  callpath, callpath_len, &free_callpath);
3262  if (free_callpath) free_drhook(callpath);
3263  }
3264  if (cstklen == 0) {
3265  /* Double precision */
3266  *key = u.d;
3267  }
3268  else {
3269  /* Single precision : The variable "*key" is treated like max 4-byte entity -- "an index" */
3270  (void) callstack(*thread_id, key, u.keyptr);
3271  }
3272  ITSELF_1;
3273  if (opt_calltrace) {
3275  {
3276  const int ftnunitno = 0; /* stderr */
3277  const int print_option = 2; /* calling tree */
3278  int level = 0;
3279  c_drhook_print_(&ftnunitno, thread_id, &print_option, &level);
3280  /* fprintf(stderr,"%d#%d> %*.*s [%llu]\n",myproc,*thread_id,name_len,name_len,name,u.ull); */
3281  }
3283  }
3284  if (timeline) {
3285  int tid = *thread_id;
3286  if (opt_timeline_thread <= 0 || tid <= opt_timeline_thread) {
3287  drhook_timeline_t *tl = &timeline[tid-1];
3288  int bigjump = 1;
3289  unsigned long long int mod = (tl->calls[0]++)%opt_timeline_freq;
3290  double rss = (double)(getrss_()/1048576.0); /* in MBytes */
3291  double curheap = (opt_timeline_thread == 1 && tid == 1) ?
3292  (double)(getcurheap_()/1048576.0) : (double)(getcurheap_thread_(&tid)/1048576.0); /* in MBytes */
3293  if (mod != 0) {
3294  double inc_MB;
3295  inc_MB = tl->last_rss_MB - rss;
3296  if (ABS(inc_MB) < opt_timeline_MB) {
3297  inc_MB = tl->last_curheap_MB - curheap;
3298  }
3299  if (ABS(inc_MB) < opt_timeline_MB) bigjump = 0;
3300  }
3301  if (mod == 0 || bigjump) {
3303  {
3304  int ftnunitno = opt_timeline_unitno;
3305  const int print_option = 5; /* calling "tree" with just the current entry */
3306  int level = 0;
3307  tl->last_rss_MB = rss;
3308  tl->last_curheap_MB = curheap;
3309  c_drhook_print_(&ftnunitno, &tid, &print_option, &level);
3310  }
3312  }
3313  } /* if (opt_timeline_thread <= 0 || tid <= opt_timeline_thread) */
3314  }
3315 }
3316 
3317 /*=== c_drhook_end_ ===*/
3318 
3319 void
3320 c_drhook_end_(const char *name,
3321  const int *thread_id,
3322  const double *key,
3323  const char *filename,
3324  const int *sizeinfo
3325  /* Hidden length */
3326  ,int name_len, int filename_len)
3327 {
3328  TIMERS;
3329  equivalence_t u;
3330  ITSELF_0;
3331  if (cstklen == 0) {
3332  /* Double precision */
3333  u.d = *key;
3334  }
3335  else {
3336  /* Single precision : The variable "*key" is treated like max 4-byte entity -- "an index" */
3337  u.keyptr = callstack(*thread_id, (void *)key, NULL);
3338  }
3339  /*
3340  if (opt_calltrace) {
3341  coml_set_lockid_(&DRHOOK_lock);
3342  fprintf(stderr,"%d#%d< %*.*s [%llu]\n",myproc,*thread_id,name_len,name_len,name,u.ull);
3343  coml_unset_lockid_(&DRHOOK_lock);
3344  }
3345  */
3346  if (name_len > 0 && opt_funcexit == *thread_id) {
3347  fprintf(stdout,"<x> %d %d %.*s %lld %lld\n",myproc,*thread_id,name_len,name,hwm,stk);
3348  fflush(stdout);
3349  }
3350  if (opt_random_memstat > 0) random_memstat(*thread_id,0);
3351  if (timeline) {
3352  int tid = *thread_id;
3353  if (opt_timeline_thread <= 0 || tid <= opt_timeline_thread) {
3354  drhook_timeline_t *tl = &timeline[tid-1];
3355  int bigjump = 1;
3356  unsigned long long int mod = (tl->calls[1]++)%opt_timeline_freq;
3357  double rss = (double)(getrss_()/1048576.0); /* in MBytes */
3358  double curheap = (opt_timeline_thread == 1 && tid == 1) ?
3359  (double)(getcurheap_()/1048576.0) : (double)(getcurheap_thread_(&tid)/1048576.0); /* in MBytes */
3360  if (mod != 0) {
3361  double inc_MB;
3362  inc_MB = tl->last_rss_MB - rss;
3363  if (ABS(inc_MB) < opt_timeline_MB) {
3364  inc_MB = tl->last_curheap_MB - curheap;
3365  }
3366  if (ABS(inc_MB) < opt_timeline_MB) bigjump = 0;
3367  }
3368  if (mod == 0 || bigjump) {
3370  {
3371  int ftnunitno = opt_timeline_unitno;
3372  const int print_option = -5; /* calling "tree" with just the current entry */
3373  int level = 0;
3374  tl->last_rss_MB = rss;
3375  tl->last_curheap_MB = curheap;
3376  c_drhook_print_(&ftnunitno, &tid, &print_option, &level);
3377  }
3379  }
3380  } /* if (opt_timeline_thread <= 0 || tid <= opt_timeline_thread) */
3381  }
3382  if (watch && watch_count > 0) check_watch("when leaving routine", name, name_len, 1);
3383  putkey(*thread_id, u.keyptr, name, name_len,
3384  *sizeinfo,
3385  &walltime, &cputime);
3386  ITSELF_1;
3387 }
3388 
3389 /*=== c_drhook_memcounter_ ===*/
3390 
3391 void
3392 c_drhook_memcounter_(const int *thread_id,
3393  const long long int *size,
3394  long long int *keyptr_addr)
3395 {
3396  int tid = (thread_id && (*thread_id >= 1) && (*thread_id <= numthreads))
3397  ? *thread_id : get_thread_id_();
3398  int has_timeline = (timeline && size) ? opt_timeline : 0;
3399  if (has_timeline) {
3400  if (opt_timeline_thread <= 1 || tid <= opt_timeline_thread) {
3401  double size_MB = (double)((*size)/1048576.0); /* In MBytes */
3402  if (ABS(size_MB) < opt_timeline_MB) has_timeline = 0; /* Do not report */
3403  }
3404  else {
3405  has_timeline = 0; /* Do not report */
3406  }
3407  } /* if (has_timeline) */
3408  if (opt_memprof) {
3409  if (size) {
3410  union {
3411  long long int keyptr_addr;
3412  drhook_key_t *keyptr;
3413  } u;
3414  long long int alldelta;
3415  if (*size > 0) { /* Memory is being allocated */
3416  if (curkeyptr[tid-1]) {
3417  drhook_key_t *keyptr = curkeyptr[tid-1];
3418  keyptr->mem_curdelta += *size;
3419  alldelta = keyptr->mem_curdelta + keyptr->mem_child;
3420  if (alldelta > keyptr->maxmem_alldelta) keyptr->maxmem_alldelta = alldelta;
3421  if (keyptr->mem_curdelta > keyptr->maxmem_selfdelta)
3422  keyptr->maxmem_selfdelta = keyptr->mem_curdelta;
3423  if (keyptr_addr) {
3424  u.keyptr = keyptr;
3425  *keyptr_addr = u.keyptr_addr;
3426  }
3427  keyptr->alloc_count++;
3428  }
3429  else {
3430  if (keyptr_addr) *keyptr_addr = 0;
3431  } /* if (curkeyptr[tid-1]) */
3432  /*
3433  fprintf(stderr,
3434  "memcounter: allocated %lld bytes ; *keyptr_addr = %lld\n",
3435  *size, *keyptr_addr);
3436  */
3437  }
3438  else { /* Memory is being freed */
3439  drhook_key_t *keyptr;
3440  if (keyptr_addr && (*keyptr_addr)) {
3441  u.keyptr_addr = *keyptr_addr;
3442  keyptr = u.keyptr;
3443  }
3444  else
3445  keyptr = curkeyptr[tid-1];
3446  /*
3447  fprintf(stderr,
3448  "memcounter: DE-allocated %lld bytes ; *keyptr_addr = %lld\n",
3449  *size, *keyptr_addr);
3450  */
3451  if (keyptr) {
3452  long long int prev_curdelta = keyptr->mem_curdelta;
3453  keyptr->mem_curdelta += *size;
3454  alldelta = prev_curdelta + keyptr->mem_child;
3455  if (alldelta > keyptr->maxmem_alldelta) keyptr->maxmem_alldelta = alldelta;
3456  if (*size < 0) keyptr->free_count++;
3457  } /* if (keyptr) */
3458  } /* if (*size > 0) ... else */
3459  } /* if (size) */
3460  } /* if (opt_memprof) */
3461  if (has_timeline) {
3462  double curheap = (opt_timeline_thread == 1 && tid == 1) ?
3463  (double)(getcurheap_()/1048576.0) : (double)(getcurheap_thread_(&tid)/1048576.0); /* in MBytes */
3464  double rss = (double)(getrss_()/1048576.0); /* in MBytes */
3466  {
3467  int ftnunitno = opt_timeline_unitno;
3468  double size_MB = (double)((*size)/1048576.0); /* In MBytes */
3469  int print_option = (size_MB > 0) ? 6 : -6; /* timeline upon c_drhook_memcounter_ & (big) ALLOCATE or DEALLOCATE */
3470  int level = 0;
3471  drhook_timeline_t *tl = &timeline[tid-1];
3472  tl->last_curheap_MB = curheap;
3473  tl->last_rss_MB = rss;
3474  c_drhook_print_(&ftnunitno, &tid, &print_option, &level);
3475  }
3477  } /* if (has_timeline) */
3478 }
3479 
3480 /*=== c_drhook_print_ ===*/
3481 
3482 #define PRINT_HWM() \
3483 if (opt_gethwm) { sprintf(s,",hwm=%lldK",keyptr->hwm/1024); s += strlen(s); }
3484 
3485 #define PRINT_RSS() \
3486 if (opt_getrss) { \
3487  sprintf(s,",rss/max=%lldK/%lldK",keyptr->rssnow/1024, keyptr->maxrss/1024); \
3488  s += strlen(s); \
3489 }
3490 
3491 #define PRINT_STK() \
3492 if (opt_getstk) { \
3493  sprintf(s,",stack/max=%lldK/%lldK",keyptr->stack/1024, keyptr->maxstack/1024); \
3494  s += strlen(s); \
3495 }
3496 
3497 #define PRINT_PAG() \
3498 if (opt_getpag) { \
3499  sprintf(s,",pag=%lld",keyptr->paging); \
3500  s += strlen(s); \
3501 }
3502 
3503 #define PRINT_WALL() \
3504 if (opt_walltime) { \
3505  double self = keyptr->delta_wall_all-keyptr->delta_wall_child; \
3506  if (self < 0) self = 0; \
3507  sprintf(s,",wall=%.3fs/%.3fs", \
3508  keyptr->delta_wall_all, self); \
3509  s += strlen(s); \
3510 }
3511 
3512 #define PRINT_CPU() \
3513 if (opt_cputime) { \
3514  double self = keyptr->delta_cpu_all-keyptr->delta_cpu_child; \
3515  if (self < 0) self = 0; \
3516  sprintf(s,",cpu=%.3fs/%.3fs", \
3517  keyptr->delta_cpu_all, self); \
3518  s += strlen(s); \
3519 }
3520 
3521 #define PRINT_CALLS() \
3522 if (opt_calls) { \
3523  sprintf(s,",#%llu,st=%d",keyptr->calls,keyptr->status); \
3524  s += strlen(s); \
3525 }
3526 
3527 static int
3528 prof_name_comp(const void *v1, const void *v2)
3529 {
3530  const drhook_prof_t *p1 = v1;
3531  const drhook_prof_t *p2 = v2;
3532  return strcmp(p1->name,p2->name);
3533 }
3534 
3535 static int
3536 memprof_name_comp(const void *v1, const void *v2)
3537 {
3538  const drhook_memprof_t *p1 = v1;
3539  const drhook_memprof_t *p2 = v2;
3540  return strcmp(p1->name,p2->name);
3541 }
3542 
3543 static int
3544 prof_pc_comp_desc(const void *v1, const void *v2)
3545 {
3546  const drhook_prof_t *p1 = v1;
3547  const drhook_prof_t *p2 = v2;
3548  if (p1->pc < p2->pc) return 1;
3549  else if (p1->pc > p2->pc) return -1;
3550  else return 0;
3551 }
3552 
3553 static int
3554 memprof_pc_comp_desc(const void *v1, const void *v2)
3555 {
3556  const drhook_memprof_t *p1 = v1;
3557  const drhook_memprof_t *p2 = v2;
3558  if (p1->pc < p2->pc) return 1;
3559  else if (p1->pc > p2->pc) return -1;
3560  else return 0;
3561 }
3562 
3563 static const char *
3564 trim_and_adjust_left(const char *p, int *name_len)
3565 {
3566  int len = strlen(p);
3567  if (len > 0) {
3568  const char *back = &p[len-1];
3569  while (len > 0 && *back-- == ' ') len--;
3570  while (len > 0 && *p == ' ') { p++; len--; }
3571  }
3572  if (name_len) *name_len = len;
3573  return p;
3574 }
3575 
3576 static void print_routine_name0(FILE * fp, const char * p_name, int p_tid, const char * p_filename, int p_cluster,
3577  const equivalence_t * p_callpath, int p_callpath_len, int len, int cluster_size)
3578 {
3579  int name_len = 0;
3580  const char *name = trim_and_adjust_left(p_name,&name_len);
3581 
3582  if (callpath_packed) {
3583 
3584  if (p_callpath && p_callpath_len > 0) {
3585  const equivalence_t * callpath = &p_callpath[p_callpath_len-1];
3586  int j;
3587  for (j=0; j<p_callpath_len; callpath--, j++)
3588  if (callpath && callpath->keyptr && callpath->keyptr->name) {
3589  const char *name = callpath->keyptr->name;
3590  int name_len = callpath->keyptr->name_len;
3591  fprintf(fp,"%.*s/",name_len,name);
3592  }
3593  }
3594  }
3595 
3596  fprintf(fp,"%.*s@%d%s%s",
3597  name_len, name,
3598  p_tid,
3599  p_filename ? ":" : "",
3600  p_filename ? p_filename : "");
3601 
3602  if (opt_clusterinfo) {
3603  fprintf(fp," [%d,%d]",
3604  p_cluster, ABS(cluster_size));
3605  }
3606 
3607  if (!callpath_packed)
3608  unroll_callpath(fp, len, p_callpath, p_callpath_len);
3609 
3610 
3611 }
3612 
3613 #define print_routine_name(fp, p, len, cluster_size) \
3614  if (fp && p) { \
3615  print_routine_name0(fp, p->name, p->tid, p->filename, p->cluster, \
3616  p->callpath, p->callpath_len, len, cluster_size);\
3617  } /* if (fp && p) */
3618 
3619 
3620 #ifdef NECSX
3621 /* We need this because NEC SX refuses to write no more than 132 character for Fortran unit = 0 */
3622 static void
3623 DrHookPrint(int ftnunitno, const char *line)
3624 {
3625  if (line) {
3626  FILE *fp = NULL;
3627  if (ftnunitno <= 0)
3628  fp = stderr;
3629  else if (ftnunitno == 6)
3630  fp = stdout;
3631  else
3632  dr_hook_prt_(&ftnunitno, line, strlen(line));
3633  OPTPRINT(fp,"%s\n",line);
3634  }
3635 }
3636 #endif
3637 
3638 void
3639 c_drhook_print_(const int *ftnunitno,
3640  const int *thread_id,
3641  const int *print_option, /*
3642  1=raw call counts
3643  2=calling tree
3644  3=profiling info
3645  4=memory profiling
3646  5=timeline upon entering the routine
3647  -5=timeline upon leaving the routine
3648  6=timeline upon c_drhook_memcounter_ & (big) ALLOCATE
3649  -6=timeline upon c_drhook_memcounter_ & (big) DEALLOCATE
3650  7=timeline : the very first call (upon setup or dr.hook)
3651  -7=timeline : the very last call (in atexit())
3652  */
3653  int *level
3654  )
3655 {
3656  static int first_time = 0;
3657  int tid = (thread_id && (*thread_id >= 1) && (*thread_id <= numthreads))
3658  ? *thread_id : get_thread_id_();
3659  int mytid = get_thread_id_();
3660  char *pfx = PREFIX(tid);
3661  if (ftnunitno && keydata && calltree) {
3662  char line[4096];
3663  int abs_print_option = ABS(*print_option);
3664  int j;
3665 
3666  /* Mod to call traceback and continue if called with level=99 */
3667  if(*level == 99) {
3668  *level=0;
3669  }
3670  else {
3671  if(*print_option == 2) {
3672  if(first_time == 1) return;
3673  first_time = 1;
3674  }
3675  }
3676  /* end of Mod */
3677 
3678  if (*print_option == 1) { /* raw call counts */
3679  for (j=0; j<hashsize; j++) {
3680  int nestlevel = 0;
3681  drhook_key_t *keyptr = &keydata[tid-1][j];
3682  while (keyptr) {
3683  if (keyptr->name) {
3684  char *s = line;
3685  sprintf(s,
3686  "%s %s [%s@%s:%d] [hash#%d,nest=%d] '%s'",
3687  pfx,TIMESTR(tid),FFL,
3688  j,nestlevel,keyptr->name);
3689  s += strlen(s);
3690  PRINT_CALLS();
3691  PRINT_HWM();
3692  PRINT_RSS();
3693  PRINT_STK();
3694  PRINT_PAG();
3695  PRINT_WALL();
3696  PRINT_CPU();
3697  *s = 0;
3698 #ifdef NECSX
3699  DrHookPrint(*ftnunitno, line);
3700 #else
3701  dr_hook_prt_(ftnunitno, line, strlen(line));
3702 #endif
3703  }
3704  keyptr = keyptr->next;
3705  nestlevel++;
3706  } /* while (keyptr) */
3707  } /* for (j=0; j<hashsize; j++) */
3708  }
3709 
3710  else if (*print_option == 2 ||
3711  abs_print_option == 5 ||
3712  abs_print_option == 6 ||
3713  abs_print_option == 7
3714  ) { /* the current calling tree */
3715  drhook_calltree_t *treeptr = calltree[tid-1];
3716 
3717  if (*print_option == 2) {
3718  long long int hwm = getmaxhwm_()/1048576;
3719  long long int rss = getmaxrss_()/1048576;
3720  long long int maxstack = getmaxstk_()/1048576;
3721  snprintf(line,sizeof(line),
3722  "%s %s [%s@%s:%d] %lld MB (maxheap), %lld MB (maxrss), %lld MB (maxstack)",
3723  pfx,TIMESTR(tid),FFL,
3724  hwm,rss,maxstack);
3725 #ifdef NECSX
3726  DrHookPrint(*ftnunitno, line);
3727 #else
3728  dr_hook_prt_(ftnunitno, line, strlen(line));
3729 #endif
3730  }
3731 
3732  if (tid > 1) {
3733  if (*print_option == 2) {
3734  /* I'm not a master thread, but my master has the beginning of the calltree */
3735  int initlev = 0;
3736  const int master = 1;
3737  first_time = 0;
3738  c_drhook_print_(ftnunitno, &master, print_option, &initlev);
3739  *level += initlev;
3740  }
3741  else if (tid > opt_timeline_thread) {
3742  return;
3743  }
3744  }
3745 
3746  if (abs_print_option == 7) {
3747  treeptr = NULL;
3748  }
3749  else if (abs_print_option == 5 || abs_print_option == 6) {
3750  treeptr = thiscall[tid-1];
3751  }
3752  else {
3753  treeptr = calltree[tid-1];
3754  }
3755 
3756  while (abs_print_option == 7 || (treeptr && treeptr->active)) {
3757  int do_print = (*print_option == 2 ||
3758  abs_print_option == 7 ||
3759  abs_print_option == 5 || abs_print_option == 6);
3760  if (do_print) {
3761  drhook_key_t *keyptr = (abs_print_option == 7) ? NULL : treeptr->keyptr;
3762  char *s = line;
3763  char is_timeline = 1, kind;
3764  switch (*print_option) {
3765  case -5: kind = '<'; break;
3766  case -6: kind = '-'; break;
3767  case -7: kind = 'E'; break;
3768  case 5: kind = '>'; break;
3769  case 6: kind = '+'; break;
3770  case 7: kind = 'B'; break;
3771  default:
3772  case 2: kind = ':'; is_timeline = 0; break;
3773  }
3774  if (*print_option == 2 ||
3775  (is_timeline && tid > 1 && tid <= opt_timeline_thread)) {
3776  sprintf(s,"%s %s [%s@%s:%d] %s%c ",
3777  pfx,TIMESTR(tid),FFL,
3778  is_timeline ? "tl:" : "",
3779  kind);
3780  }
3781  else if (is_timeline && opt_timeline_thread == 1 && tid == 1) {
3782  sprintf(s,"%s %s [%s@%s:%d] %s%c ",
3783  pfx,TIMESTR(tid),FFL,
3784  is_timeline ? "tl:" : "",
3785  kind);
3786  }
3787  s += strlen(s);
3788  (*level)++;
3789  for (j=0; j<(*level); j++) *s++ = ' ';
3790  if (*print_option == 2) {
3791  if(mytid != tid) { /* We are printing the master call tree as far as >OMP*/
3792  if(strncmp(">OMP",keyptr->name,4) == 0) {
3793  (*level)--;
3794  return;
3795  }
3796  }
3797  sprintf(s,"%s ",keyptr->name);
3798  s += strlen(s);
3799  }
3800  if (is_timeline) {
3801  double wall = WALLTIME();
3802  double rss, curheap;
3803  drhook_timeline_t *tl = &timeline[tid-1];
3804  if (abs_print_option == 5 || abs_print_option == 6) { /* when called via drhook_begin/_end or memcounter */
3805  curheap = tl->last_curheap_MB;
3806  rss = tl->last_rss_MB;
3807  }
3808  else {
3809  rss = (double)(getrss_()/1048576.0); /* in MBytes */
3810  curheap = (opt_timeline_thread == 1 && tid == 1) ?
3811  (double)(getcurheap_()/1048576.0) : (double)(getcurheap_thread_(&tid)/1048576.0); /* in MBytes */
3812  tl->last_curheap_MB = curheap;
3813  tl->last_rss_MB = rss;
3814  }
3815  if (opt_timeline_format == 1) {
3816  sprintf(s, "%.6f %.4g %.4g", wall, rss, curheap);
3817  }
3818  else {
3819  sprintf(s,
3820  "wall=%.6f cpu=%.4g hwm=%.4g rss=%.4g curheap=%.4g stack=%.4g pag=%lld",
3821  wall, CPUTIME(),
3822  (double)(gethwm_()/1048576.0), rss,
3823  curheap,
3824  (double)(getstk_()/1048576.0),
3825  getpag_());
3826  }
3827  s += strlen(s);
3828  *s++ = ' ';
3829  if (keyptr) {
3830  sprintf(s,"'%s'",keyptr->name);
3831  }
3832  else {
3833  sprintf(s,"'#PROGRAM %s'",(*print_option == 7) ? "BEGIN" : "END");
3834  }
3835  s += strlen(s);
3836  }
3837  else {
3838  PRINT_CALLS();
3839  PRINT_HWM();
3840  PRINT_RSS();
3841  PRINT_STK();
3842  PRINT_PAG();
3843  PRINT_WALL();
3844  PRINT_CPU();
3845  }
3846  *s = 0;
3847 #ifdef NECSX
3848  DrHookPrint(*ftnunitno, line);
3849 #else
3850  dr_hook_prt_(ftnunitno, line, strlen(line));
3851 #endif
3852  }
3853  if (abs_print_option == 7 || abs_print_option == 5 || abs_print_option == 6) break;
3854  if (treeptr) treeptr = treeptr->next;
3855  } /* while (abs_print_option == 7 || (treeptr && treeptr->active)) */
3856  }
3857 
3858  else if (*print_option == 3) { /* profiling (CPU, wall-clock and/or MFlop/s) */
3859  int len;
3860  int t;
3861  double cumul;
3862  double tottime = 0, max_overhead_pc = 0;
3863  double *tot = NULL;
3864  int nprof = 0;
3865  drhook_prof_t *prof = NULL;
3866  drhook_prof_t *p;
3867  double flop_tot = 0, instr_tot = 0;
3868  double *flop = NULL, *instr = NULL;
3869 
3870  if (!opt_wallprof && !opt_cpuprof) return; /* no profiling info available */
3871  if (tid > 1) return; /* just master thread allowed ; takes care of siblings, too */
3872  if (numthreads<=0) return;
3873  if (do_prof_off) return;
3874  do_prof_off = 1;
3875 
3876  /* Insert "$drhook" */
3877  if (keyself && opt_self > 1) {
3878  for (t=0; t<numthreads; t++) (void) insertkey(t+1,keyself[t]);
3879  }
3880 
3881  flop = calloc_drhook(numthreads, sizeof(*flop));
3882  instr = calloc_drhook(numthreads, sizeof(*instr));
3883  tot = calloc_drhook(numthreads, sizeof(*tot));
3884 
3885  for (t=0; t<numthreads; t++) {
3886  for (j=0; j<hashsize; j++) {
3887  drhook_key_t *keyptr = &keydata[t][j];
3888  while (keyptr) {
3889  if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) {
3890  double self;
3891  if (opt_wallprof) {
3892  self = keyptr->delta_wall_all - keyptr->delta_wall_child;
3893  }
3894  else {
3895  self = keyptr->delta_cpu_all - keyptr->delta_cpu_child;
3896  }
3897  /* if (self < 0) self = 0; */
3898  tot[t] += self;
3899 #ifdef HPM
3900  flop[t] += keyptr->avg_mflops * self; /* mflop_count(keyptr); */
3901  instr[t] += keyptr->avg_mipsrate * self; /* mip_count(keyptr); */
3902 #endif
3903  nprof++;
3904  }
3905  keyptr = keyptr->next;
3906  } /* while (keyptr && keyptr->status == 0) */
3907  } /* for (t=0; t<numthreads; t++) */
3908  } /* for (j=0; j<hashsize; j++) */
3909 
3910  if (opt_wallprof) { /* a bit unreliable; had not taken max. value of threads wall yet; will be recalculated */
3911  tottime = tot[0] + ((keyself && opt_self > 1) ? keyself[0]->delta_wall_all : 0);
3912  for (t=1; t<numthreads; t++) {
3913  double tmp = tot[t] + ((keyself && opt_self > 1) ? keyself[t]->delta_wall_all : 0);
3914  tottime = MAX(tottime,tmp);
3915  }
3916  }
3917  else { /* ok & reliable (for cpuprof) */
3918  tottime = 0;
3919  for (t=0; t<numthreads; t++) tottime += (tot[t] + ((keyself && opt_self > 1) ? keyself[t]->delta_cpu_all : 0));
3920  }
3921 
3922  if (tottime <= 0) tottime = 1e-10;
3923 
3924  p = prof = calloc_drhook(nprof + 1, sizeof(*prof)); /* Make sure there is at least one entry */
3925 
3926  for (t=0; t<numthreads; t++) {
3927  for (j=0; j<hashsize; j++) {
3928  drhook_key_t *keyptr = &keydata[t][j];
3929  while (keyptr) {
3930  if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) {
3931  p->self = opt_wallprof ?
3932  keyptr->delta_wall_all - keyptr->delta_wall_child :
3933  keyptr->delta_cpu_all - keyptr->delta_cpu_child;
3934  p->total = opt_wallprof ?
3935  keyptr->delta_wall_all :
3936  keyptr->delta_cpu_all;
3937  p->calls = keyptr->calls;
3938  p->name = keyptr->name;
3939  p->pc = (p->self/tottime) * 100.0;
3940  if (p->calls > 0) {
3941  p->percall_ms_self = (p->self/p->calls) * 1000.0;
3942  p->percall_ms_total = (p->total/p->calls) * 1000.0;
3943  }
3944  p->tid = t+1;
3945  p->index = p - prof;
3946 #ifdef HPM
3947  if (opt_hpmprof) {
3948  p->mflops = keyptr->avg_mflops; /* mflops_hpm(keyptr); */
3949  p->mipsrate = keyptr->avg_mipsrate; /* mips_hpm(keyptr); */
3950  p->divpc = divpc_hpm(keyptr);
3951  }
3952 #endif
3953  p->filename = keyptr->filename;
3954  p->sizeinfo = keyptr->sizeinfo;
3955  p->min_sizeinfo = keyptr->min_sizeinfo;
3956  p->max_sizeinfo = keyptr->max_sizeinfo;
3957  p->sizespeed = (p->self > 0 && p->sizeinfo > 0) ? p->sizeinfo/p->self : 0;
3958  p->sizeavg = (p->calls > 0 && p->sizeinfo > 0) ? p->sizeinfo/p->calls : 0;
3959  p->callpath = keyptr->callpath;
3960  p->callpath_len = keyptr->callpath_len;
3961  p++;
3962  }
3963  keyptr = keyptr->next;
3964  } /* while (keyptr && keyptr->status == 0) */
3965  } /* for (j=0; j<hashsize; j++) */
3966  } /* for (t=0; t<numthreads; t++) */
3967 
3968  do {
3969  double mflop_rate = 0;
3970  double mip_rate = 0;
3971  int numroutines = 0;
3972  int cluster;
3973  double *maxval = calloc_drhook(nprof+1, sizeof(*maxval)); /* make sure at least 1 element */
3974  int *clusize = calloc_drhook(nprof+1, sizeof(*clusize)); /* make sure at least 1 element */
3975  char *prevname = NULL;
3976  const char *fmt1 = "%5d %8.2f %12.3f %12.3f %12.3f %14llu %11.2f %11.2f %s";
3977  const char *fmt2 = "%5d %8.2f %12.3f %12.3f %12.3f %14llu %7.0f %7.0f %7.1f %s";
3978  const char *fmt = opt_hpmprof ? fmt2 : fmt1;
3979  char *filename = get_mon_out(myproc);
3980  FILE *fp = NULL;
3981 
3982  if (!filename) break;
3983 
3984  if ((myproc == 1 && mon_out_procs == -1) || mon_out_procs == myproc) {
3985  fprintf(stderr,
3986  "%s %s [%s@%s:%d] Writing profiling information of proc#%d into file '%s'\n",
3987  pfx,TIMESTR(tid),FFL,
3988  myproc,filename);
3989  }
3990 
3991  fp = fopen(filename,"w");
3992  if (!fp) goto finish_3;
3993 
3994  /* alphanumerical sorting to find out clusters of the same routine but on different threads */
3995  /* also find out total wall clock time */
3996  /* calculate percentage values */
3997 
3998  p = prof;
3999  qsort(p, nprof, sizeof(*p), prof_name_comp);
4000 
4001  cluster = 0;
4002  maxval[cluster] = p->self;
4003  p->maxval = &maxval[cluster];
4004  clusize[cluster] = 1;
4005  prevname = p->name;
4006  p++;
4007  for (j=1; j<nprof; j++) {
4008  if (!strequ(prevname,p->name)) {
4009  (p-1)->cluster = cluster;
4010  (p-1)->maxval = &maxval[cluster];
4011  prevname = p->name;
4012  cluster++;
4013  }
4014  if (p->self > maxval[cluster]) maxval[cluster] = p->self;
4015  p->cluster = cluster;
4016  p->maxval = &maxval[cluster];
4017  clusize[cluster]++;
4018  p++;
4019  } /* for (j=1; j<nprof; j++) */
4020 
4021  numroutines = (nprof > 0) ? (cluster + 1) : 0; /* Active no. of routines */
4022 
4023  if (opt_wallprof) tottime = 0;
4024  p = prof;
4025  for (j=0; j<nprof; j++) {
4026  int use_this = 0;
4027  cluster = p->cluster;
4028  if (clusize[cluster] > 1) { /* multiple threads <= numthreads indeed called this routine */
4029  p->is_max = (p->self == *p->maxval);
4030  if (p->is_max) { /* first max found will be used for total time */
4031  clusize[cluster] = -clusize[cluster]; /* ensures that max has been found for this cluster */
4032  use_this = opt_wallprof;
4033  }
4034  }
4035  else if (clusize[cluster] == 1) {
4036  use_this = opt_wallprof;
4037  }
4038  if (use_this && opt_wallprof) tottime += p->self;
4039  p++;
4040  }
4041 
4042  if (tottime <= 0) tottime = 1e-10;
4043 
4044  if (opt_wallprof) { /* use re-calculated tottime to define percentages */
4045  p = prof;
4046  for (j=0; j<nprof; j++) {
4047  p->pc = (p->self/tottime) * 100.0;
4048  p++;
4049  }
4050  }
4051 
4052  /* sorting with respect to percentage value */
4053 
4054  p = prof;
4055  qsort(p, nprof, sizeof(*p), prof_pc_comp_desc);
4056 
4057  flop_tot = 0;
4058  instr_tot = 0;
4059  max_overhead_pc = 0;
4060  for (t=0; t<numthreads; t++) {
4061  flop_tot += flop[t];
4062  instr_tot += instr[t];
4063  if (overhead) {
4064  max_overhead_pc = MAX(max_overhead_pc,overhead[t]);
4065 #ifdef DEBUG
4066  fprintf(fp,"tid#%d: overhead = %.15g s\n",t+1,overhead[t]);
4067 #endif
4068  }
4069  }
4070 #ifdef DEBUG
4071  fprintf(fp,"max overhead = %.15g s, tottime = %.15g s\n",
4072  max_overhead_pc, tottime);
4073 #endif
4074  if (tottime - max_overhead_pc > 0) {
4075  max_overhead_pc = 100.0*(max_overhead_pc/(tottime - max_overhead_pc));
4076  }
4077  else {
4078  max_overhead_pc = 100;
4079  }
4080 
4081  fprintf(fp,
4082  "Profiling information for program='%s', proc#%d:\n",a_out, myproc);
4083  fprintf(fp,"\tNo. of instrumented routines called : %d\n", numroutines);
4084  fprintf(fp,"\tInstrumentation started : %s\n",start_stamp ? start_stamp : "N/A");
4085  end_stamp = timestamp();
4086  fprintf(fp,"\tInstrumentation ended : %s\n",end_stamp ? end_stamp : "N/A");
4087  fprintf(fp,"\tInstrumentation overhead: %.2f%%\n",max_overhead_pc);
4088  {
4089  long long int hwm = getmaxhwm_()/1048576;
4090  long long int rss = getmaxrss_()/1048576;
4091  long long int maxstack = getmaxstk_()/1048576;
4092  long long int pag = getpag_();
4093  fprintf(fp,
4094  "\tMemory usage : %lld MBytes (heap), %lld MBytes (rss), %lld MBytes (stack), %lld (paging)\n",
4095  hwm,rss,maxstack,pag);
4096  }
4097  if (opt_hpmprof) {
4098  mflop_rate = flop_tot / tottime;
4099  mip_rate = instr_tot / tottime;
4100  fprintf(fp,
4101  "\t%s-time is %.2f sec on proc#%d, %.0f MFlops (ops#%.0f*10^6), %.0f MIPS (ops#%.0f*10^6) (%d procs, %d threads)\n",
4102  opt_wallprof ? "Wall" : "Total CPU", tottime, myproc,
4103  mflop_rate, flop_tot, mip_rate, instr_tot,
4104  nproc, numthreads);
4105  }
4106  else {
4107  fprintf(fp,
4108  "\t%s-time is %.2f sec on proc#%d (%d procs, %d threads)\n",
4109  opt_wallprof ? "Wall" : "Total CPU", tottime, myproc,
4110  nproc, numthreads);
4111  }
4112 
4113  if (myproc == 1) {
4114  fprintf(stderr,
4115  "Profiling information for program='%s', proc#%d:\n",a_out, myproc);
4116  fprintf(stderr,"\tNo. of instrumented routines called : %d\n", numroutines);
4117  fprintf(stderr,"\tInstrumentation started : %s\n",start_stamp ? start_stamp : "N/A");
4118  fprintf(stderr,"\tInstrumentation ended : %s\n",end_stamp ? end_stamp : "N/A");
4119  fprintf(stderr,"\tInstrumentation overhead: %.2f%%\n",max_overhead_pc);
4120  if (opt_hpmprof) {
4121  fprintf(stderr,
4122  "\t%s-time is %.2f sec on proc#%d, %.0f MFlops (ops#%.0f*10^6), %.0f MIPS (ops#%.0f*10^6) (%d procs, %d threads)\n",
4123  opt_wallprof ? "Wall" : "Total CPU", tottime, myproc,
4124  mflop_rate, flop_tot, mip_rate, instr_tot,
4125  nproc, numthreads);
4126  }
4127  else {
4128  fprintf(stderr,
4129  "\t%s-time is %.2f sec on proc#%d (%d procs, %d threads)\n",
4130  opt_wallprof ? "Wall" : "Total CPU", tottime, myproc,
4131  nproc, numthreads);
4132  }
4133  } /* if (myproc == 1) */
4134 
4135  free_drhook(end_stamp);
4136 
4137  for (t=0; t<numthreads; t++) {
4138  double tmp = 100.0*(tot[t]/tottime);
4139  if (opt_hpmprof && tot[t] > 0) {
4140  mflop_rate = flop[t]/tot[t];
4141  mip_rate = instr[t]/tot[t];
4142  }
4143  else {
4144  mflop_rate = 0;
4145  mip_rate = 0;
4146  }
4147  fprintf( fp,"\tThread#%d: %11.2f sec (%.2f%%)",t+1,tot[t],tmp);
4148  if (opt_hpmprof) fprintf( fp,", %.0f MFlops (ops#%.0f*10^6), %.0f MIPS (ops#%.0f*10^6)", mflop_rate, flop[t], mip_rate, instr[t]);
4149  fprintf( fp,"\n");
4150  if (myproc == 1) {
4151  fprintf(stderr,"\tThread#%d: %11.2f sec (%.2f%%)",t+1,tot[t],tmp);
4152  if (opt_hpmprof) fprintf(stderr,", %.0f MFlops (ops#%.0f*10^6), %.0f MIPS (ops#%.0f*10^6)", mflop_rate, flop[t], mip_rate, instr[t]);
4153  fprintf(stderr,"\n");
4154  }
4155  }
4156 
4157  fprintf(fp,"\n");
4158  if (opt_hpmprof) {
4159  len =
4160  fprintf(fp," # %% Time Cumul Self Total # of calls MIPS MFlops Div-%% ");
4161  }
4162  else {
4163  len =
4164  fprintf(fp," # %% Time Cumul Self Total # of calls Self Total ");
4165  }
4166  fprintf(fp,"Routine@<thread-id>");
4167  if (opt_clusterinfo) fprintf(fp," [Cluster:(id,size)]");
4168  fprintf(fp,"\n");
4169  if (opt_sizeinfo) fprintf(fp,"%*s %s\n",len-20," ","(Size; Size/sec; Size/call; MinSize; MaxSize)");
4170  if (opt_hpmprof) {
4171  fprintf(fp, " (self) (sec) (sec) (sec) \n");
4172  }
4173  else {
4174  fprintf(fp, " (self) (sec) (sec) (sec) ms/call ms/call\n");
4175  }
4176  fprintf(fp,"\n");
4177 
4178  cumul = 0;
4179  for (j=0; j<nprof; ) {
4180  int cluster_size = clusize[p->cluster];
4181  if (p->pc < percent_limit) break;
4182  if (opt_cputime) {
4183  cumul += p->self;
4184  }
4185  else {
4186  if (p->is_max || cluster_size == 1) cumul += p->self;
4187  }
4188  if (opt_hpmprof) {
4189  fprintf(fp, fmt,
4190  ++j, p->pc, cumul, p->self, p->total, p->calls,
4191  p->mipsrate, p->mflops, p->divpc,
4192  p->is_max ? "*" : " ");
4193  }
4194  else {
4195  fprintf(fp, fmt,
4196  ++j, p->pc, cumul, p->self, p->total, p->calls,
4197  p->percall_ms_self, p->percall_ms_total,
4198  p->is_max ? "*" : " ");
4199  }
4200 
4201  print_routine_name(fp, p, len, cluster_size);
4202 
4203  if (opt_sizeinfo && p->sizeinfo > 0) {
4204  char s1[DRHOOK_STRBUF], s2[DRHOOK_STRBUF], s3[DRHOOK_STRBUF];
4205  char s4[DRHOOK_STRBUF], s5[DRHOOK_STRBUF];
4206  lld_commie(p->sizeinfo,s1);
4207  dbl_commie(p->sizespeed,s2);
4208  dbl_commie(p->sizeavg,s3);
4209  lld_commie(p->min_sizeinfo,s4);
4210  lld_commie(p->max_sizeinfo,s5);
4211  fprintf(fp,"\n%*s (%s; %s; %s; %s; %s)",len-20," ",s1,s2,s3,s4,s5);
4212  }
4213  fprintf(fp,"\n");
4214  p++;
4215  } /* for (j=0; j<nprof; ) */
4216 
4217  fclose(fp);
4218  finish_3:
4219  free_drhook(filename);
4220  free_drhook(maxval);
4221  free_drhook(clusize);
4222  } while (0);
4223 
4224  free_drhook(instr);
4225  free_drhook(flop);
4226  free_drhook(tot);
4227  free_drhook(prof);
4228  do_prof_off = 0;
4229  }
4230 
4231  else if (*print_option == 4) { /* Memory profiling */
4232  int t, len;
4233  int nprof = 0;
4234  drhook_memprof_t *prof = NULL;
4235  drhook_memprof_t *p;
4236  long long int *tot;
4237  long long int *maxseen_tot;
4238  double totmaxmem_delta;
4239 
4240  if (!opt_memprof) return; /* no profiling info available */
4241  if (tid > 1) return; /* just master thread allowed ; takes care of siblings, too */
4242  if (numthreads<=0) return;
4243  if (do_prof_off) return;
4244  do_prof_off = 1;
4245 
4246  tot = calloc_drhook(numthreads, sizeof(*tot));
4247  maxseen_tot = calloc_drhook(numthreads, sizeof(*maxseen_tot));
4248 
4249  for (t=0; t<numthreads; t++) {
4250  for (j=0; j<hashsize; j++) {
4251  drhook_key_t *keyptr = &keydata[t][j];
4252  while (keyptr) {
4253  if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) {
4254 
4255  long long int self;
4256  self = keyptr->maxmem_selfdelta;
4257  if (self < 0) self = 0;
4258  tot[t] += self;
4259  maxseen_tot[t] = MAX(maxseen_tot[t], keyptr->mem_seenmax);
4260  nprof++;
4261  }
4262  keyptr = keyptr->next;
4263  } /* while (keyptr && keyptr->status == 0) */
4264  } /* for (t=0; t<numthreads; t++) */
4265  } /* for (j=0; j<hashsize; j++) */
4266 
4267  totmaxmem_delta = tot[0];
4268  for (t=1; t<numthreads; t++) {
4269  long long int tmp = tot[t];
4270  totmaxmem_delta = MAX(totmaxmem_delta,tmp);
4271  }
4272 
4273  if (totmaxmem_delta <= 0) totmaxmem_delta = 1e-10; /* To avoid divide-by-zero */
4274 
4275  p = prof = calloc_drhook(nprof + 1, sizeof(*prof)); /* Make sure there is at least one entry */
4276 
4277  for (t=0; t<numthreads; t++) {
4278  for (j=0; j<hashsize; j++) {
4279  drhook_key_t *keyptr = &keydata[t][j];
4280  while (keyptr) {
4281  if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) {
4282  p->self = keyptr->maxmem_selfdelta;
4283  p->children = keyptr->mem_child;
4284  p->hwm = keyptr->mem_maxhwm;
4285  p->rss = keyptr->mem_maxrss;
4286  p->stk = keyptr->mem_maxstk;
4287  p->pag = keyptr->mem_maxpagdelta;
4288  p->leaked = keyptr->mem_curdelta;
4289  p->calls = keyptr->calls;
4290  p->alloc_count += keyptr->alloc_count;
4291  p->free_count += keyptr->free_count;
4292  p->name = keyptr->name;
4293  p->pc = (p->self/totmaxmem_delta) * 100.0;
4294  p->tid = t+1;
4295  p->index = p - prof;
4296  p->filename = keyptr->filename;
4297  p->callpath = keyptr->callpath;
4298  p->callpath_len = keyptr->callpath_len;
4299  p++;
4300  }
4301  keyptr = keyptr->next;
4302  } /* while (keyptr && keyptr->status == 0) */
4303  } /* for (t=0; t<numthreads; t++) */
4304  } /* for (j=0; j<hashsize; j++) */
4305 
4306  do {
4307  int numroutines = 0;
4308  int cluster;
4309  long long int *maxval = calloc_drhook(nprof+1, sizeof(*maxval)); /* make sure at least 1 element */
4310  int *clusize = calloc_drhook(nprof+1, sizeof(*clusize)); /* make sure at least 1 element */
4311  char *prevname = NULL;
4312  const char *fmt1 = "%5d %9.2f %14lld %14lld %14lld %14lld %14lld %10lld %10llu %10llu%s%10llu %s";
4313  const char *fmt = fmt1;
4314  char *filename = get_memmon_out(myproc);
4315  FILE *fp = NULL;
4316 
4317  if (!filename) break;
4318 
4319  if ((myproc == 1 && mon_out_procs == -1) || mon_out_procs == myproc) {
4320  fprintf(stderr,"Writing memory-profiling information of proc#%d into file '%s'\n",myproc,filename);
4321  }
4322 
4323  fp = fopen(filename,"w");
4324  if (!fp) goto finish_4;
4325 
4326  /* alphanumerical sorting to find out clusters of the same routine but on different threads */
4327 
4328  p = prof;
4329  qsort(p, nprof, sizeof(*p), memprof_name_comp);
4330 
4331  cluster = 0;
4332  maxval[cluster] = p->self;
4333  p->maxval = &maxval[cluster];
4334  clusize[cluster] = 1;
4335  prevname = p->name;
4336  p++;
4337  for (j=1; j<nprof; j++) {
4338  if (!strequ(prevname,p->name)) {
4339  (p-1)->cluster = cluster;
4340  (p-1)->maxval = &maxval[cluster];
4341  prevname = p->name;
4342  cluster++;
4343  }
4344  if (p->self > maxval[cluster]) maxval[cluster] = p->self;
4345  p->cluster = cluster;
4346  p->maxval = &maxval[cluster];
4347  clusize[cluster]++;
4348  p++;
4349  } /* for (j=1; j<nprof; j++) */
4350 
4351  numroutines = (nprof > 0) ? (cluster + 1) : 0; /* Active no. of routines */
4352 
4353  totmaxmem_delta = 0;
4354  p = prof;
4355  for (j=0; j<nprof; j++) {
4356  int use_this = 0;
4357  cluster = p->cluster;
4358  if (clusize[cluster] > 1) { /* multiple threads <= numthreads indeed called this routine */
4359  p->is_max = (p->self == *p->maxval);
4360  if (p->is_max) { /* first max found will be used for total time */
4361  clusize[cluster] = -clusize[cluster]; /* ensures that max has been found for this cluster */
4362  use_this = 1;
4363  }
4364  }
4365  else if (clusize[cluster] == 1) {
4366  use_this = 1;
4367  }
4368  if (use_this) totmaxmem_delta += p->self;
4369  p++;
4370  }
4371 
4372  if (totmaxmem_delta <= 0) totmaxmem_delta = 1e-10; /* To avoid divide-by-zero */
4373 
4374  /* use re-calculated totmaxmem_delta to define percentages */
4375  p = prof;
4376  for (j=0; j<nprof; j++) {
4377  p->pc = (p->self/totmaxmem_delta) * 100.0;
4378  p++;
4379  }
4380 
4381  /* sorting with respect to percentage value */
4382 
4383  p = prof;
4384  qsort(p, nprof, sizeof(*p), memprof_pc_comp_desc);
4385 
4386  fprintf(fp,
4387  "Memory-profiling information for program='%s', proc#%d:\n",a_out, myproc);
4388  fprintf(fp,"\tNo. of instrumented routines called : %d\n", numroutines);
4389  fprintf(fp,"\tInstrumentation started : %s\n",start_stamp ? start_stamp : "N/A");
4390  end_stamp = timestamp();
4391  fprintf(fp,"\tInstrumentation ended : %s\n",end_stamp ? end_stamp : "N/A");
4392  {
4393  long long int hwm = gethwm_()/1048576;
4394  long long int rss = getrss_()/1048576;
4395  long long int maxstack = getmaxstk_()/1048576;
4396  long long int pag = getpag_();
4397  long long int maxseen = 0;
4398  long long int leaked = 0;
4399  p = prof;
4400  for (j=0; j<nprof; j++) {
4401  if (p->leaked > 0) leaked += p->leaked;
4402  p++;
4403  }
4404  for (t=0; t<numthreads; t++) {
4405  maxseen += maxseen_tot[t];
4406  }
4407  maxseen /= 1048576;
4408  leaked /= 1048576;
4409  fprintf(fp,
4410  "\tMemory usage : %lld MBytes (max.seen), %lld MBytes (leaked), %lld MBytes (heap), %lld MBytes (max.rss), %lld MBytes (max.stack), %lld (paging)\n",
4411  maxseen,leaked,hwm,rss,maxstack,pag);
4412  fprintf(fp,"\tNo. of procs/threads: %d procs, %d threads\n",nproc,numthreads);
4413  }
4414 
4415  if (myproc == 1) {
4416  fprintf(stderr,
4417  "Memory-profiling information for program='%s', proc#%d:\n",a_out, myproc);
4418  fprintf(stderr,"\tNo. of instrumented routines called : %d\n", numroutines);
4419  fprintf(stderr,"\tInstrumentation started : %s\n",start_stamp ? start_stamp : "N/A");
4420  fprintf(stderr,"\tInstrumentation ended : %s\n",end_stamp ? end_stamp : "N/A");
4421  } /* if (myproc == 1) */
4422 
4423  free_drhook(end_stamp);
4424 
4425  fprintf(fp,"\n");
4426  len =
4427  fprintf(fp," # Memory-%% Self-alloc + Children Self-Leaked Heap Max.Stack Paging #Calls #Allocs #Frees ");
4428  /*"12345-1234567899-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-123456789012-123456789012"*/
4429  fprintf(fp,"Routine@<thread-id>");
4430  if (opt_clusterinfo) fprintf(fp," [Cluster:(id,size)]");
4431  fprintf(fp,"\n");
4432  fprintf(fp, " (self) (bytes) (bytes) (bytes) (bytes) (bytes) (delta)");
4433  /*"12345-1234567899-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-12345678901234-123456789012-123456789012"*/
4434  fprintf(fp,"\n");
4435 
4436  p = prof;
4437  for (j=0; j<nprof; ) {
4438  int cluster_size = clusize[p->cluster];
4439  if (p->pc < percent_limit) break;
4440  t = p->tid - 1;
4441  if (p->children > maxseen_tot[t]) p->children = maxseen_tot[t]; /* adjust */
4442  fprintf(fp, fmt,
4443  ++j, p->pc,
4444  p->self, p->children, p->leaked,
4445  p->hwm, p->stk, p->pag,
4446  p->calls, p->alloc_count,
4447  (p->alloc_count - p->free_count != 0) ? "*" : " ", p->free_count,
4448  p->is_max ? "*" : " ");
4449 
4450  print_routine_name(fp, p, len, cluster_size);
4451 
4452  fprintf(fp,"\n");
4453  p++;
4454  } /* for (j=0; j<nprof; ) */
4455 
4456  fclose(fp);
4457  finish_4:
4458  free_drhook(filename);
4459  free_drhook(maxval);
4460  free_drhook(clusize);
4461  } while (0);
4462 
4463  free_drhook(tot);
4464  free_drhook(maxseen_tot);
4465  free_drhook(prof);
4466  do_prof_off = 0;
4467  }
4468  }
4469 }
4470 
4471 /*=== c_drhook_init_signals_ ===*/
4472 
4473 void
4474 c_drhook_init_signals_(const int *enforce)
4475 {
4476  signal_drhook_init(*enforce);
4477 }
4478 
4479 /*=== c_drhook_raise_ ===*/
4480 
4481 /*
4482  Just a convenience function for Fortran90 which may not have raise()-signal function
4483  CALL c_drhook_raise(10) ! Raise signal#10
4484 */
4485 
4486 void
4487 c_drhook_raise_(const int *sig)
4488 {
4489  fflush(NULL);
4490  raise(*sig);
4491 }
4492 
4493 /**** C-interface to Dr.Hook ****/
4494 
4495 void
4496 Dr_Hook(const char *name, int option, double *handle,
4497  const char *filename, int sizeinfo,
4498  int name_len, int filename_len)
4499 {
4500  static int first_time = 1;
4501  static int value = 1; /* ON by default */
4502  if (first_time) { /* Not thread safe */
4503  extern void *cdrhookinit_(int *value); /* from ifsaux/support/cdrhookinit.F90 */
4504  cdrhookinit_(&value);
4505  first_time = 0;
4506  }
4507  if (value == 0) return; /* Immediate return if OFF */
4508  if (value != 0) {
4509  int tid = get_thread_id_();
4510  if (option == 0) {
4511  c_drhook_start_(name, &tid, handle,
4512  filename, &sizeinfo,
4513  name_len > 0 ? name_len : strlen(name),
4514  filename_len > 0 ? filename_len : strlen(filename));
4515  }
4516  else if (option == 1) {
4517  c_drhook_end_(name, &tid, handle,
4518  filename, &sizeinfo,
4519  name_len > 0 ? name_len : strlen(name),
4520  filename_len > 0 ? filename_len : strlen(filename));
4521  }
4522  }
4523 }
4524 
4525 
4526 /**** Interface to HPM ****/
4527 
4528 /*<<< experimental >>>*/
4529 
4530 #ifdef HPM
4531 
4532 #ifdef RS6K
4533 /**** Interface to HPM (RS6K) ****/
4534 
4535 #include <pmapi.h>
4536 
4537 static pthread_mutex_t hpm_lock = PTHREAD_MUTEX_INITIALIZER;
4538 
4539 static int *hpm_tid_init = NULL;
4540 static double cycles = 1300000000.0; /* 1.3GHz ; changed via pm_cycles() in init_hpm() */
4541 
4542 #define MCYCLES (cycles * 1e-6)
4543 
4544 #define TEST_PM_ERROR(name, rc) \
4545  if (rc != 0) { \
4546  fprintf(stderr,"PM_ERROR(tid#%d, pthread_self()=%d): rc=%d at %s(), line=%d, file=%s\n",\
4547  tid,pthread_self(),rc,name,__LINE__,__FILE__); \
4548  pm_error((char *)name, rc); \
4549  spin(tid); \
4550  RAISE(SIGABRT); \
4551  }
4552 
4553 static void
4554 init_hpm(int tid)
4555 {
4556  const char *name = "init_hpm";
4557  int rc;
4558 
4559  if (!hpm_tid_init) {
4561  cycles = pm_cycles();
4562  }
4563 
4564  if (!hpm_tid_init[tid-1]) {
4565 #ifdef PMAPI_POST_P4
4566  pm_info2_t pminfo;
4567 #else
4568  pm_info_t pminfo;
4569 #endif
4570  pm_groups_info_t pmgroupsinfo;
4571 
4572  /*------------------------------------*/
4573  /* initialize the performance monitor */
4574  /*------------------------------------*/
4575 #ifdef PMAPI_POST_P4
4576  rc = pm_initialize(PM_VERIFIED | PM_UNVERIFIED | PM_CAVEAT | PM_GET_GROUPS,
4577  &pminfo, &pmgroupsinfo, PM_CURRENT);
4578 #else
4579  rc = pm_init(PM_VERIFIED | PM_UNVERIFIED | PM_CAVEAT | PM_GET_GROUPS,
4580  &pminfo, &pmgroupsinfo);
4581 #endif
4582  TEST_PM_ERROR((char *)name, rc);
4583 
4584  if (myproc <= 1) fprintf(stderr,
4585  ">>>pm_init() for ECMWF/OpenMP-tid#%d, pthread_self()=%d\n",
4586  tid,pthread_self());
4587  }
4588 
4589  if (!hpm_tid_init[tid-1]) {
4590 #if defined(PMAPI_P7)
4591  char *env = getenv("HPM_GROUP");
4592  hpm_grp = atoi(env);
4593  int group;
4594  fprintf(stderr,"hpm_group = %d\n",hpm_grp);
4595  if (hpm_grp == 150) group = 150;
4596  if (hpm_grp == 141) group = 141;
4597  /*-- counters --
4598  case 150:
4599  strcpy(group_label, "pm_vsu23, VSU Execution");
4600  strcpy(label[0], "four flops operation (fdiv,fsqrt) Scalar Instructions only (PM_VSU_FSQRT_FDIV)");
4601  strcpy(label[1], "VSU0 Finished an instruction (PM_VSU_FIN)");
4602  strcpy(label[2], "two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only (PM_VSU_FMA)");
4603  strcpy(label[3], "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished (PM_VSU_1FLOP)");
4604  strcpy(label[4], "Run instructions completed(PM_RUN_INST_CMPL)");
4605  strcpy(label[5], "Run cycles (PM_RUN_CYC)");
4606  strcpy(label[6], "Nothing");
4607  strcpy(label[7], "Nothing");
4608  */
4609  /*-- counters --
4610  case 141:
4611  strcpy(group_label, "pm_vsu14, VSU Execution");
4612  strcpy(label[0], "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished (PM_VSU_1FLOP)");
4613  strcpy(label[1], "four flops operation (scalar fdiv, fsqrt; DP vector version of fmadd, fnmadd, fmsub, SP vector versions of single flop instructions) (PM_VSU_4FLOP)");
4614  strcpy(label[2], "eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub) (PM_VSU_8FLOP)");
4615  strcpy(label[3], "two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions) (PM_VSU_2FLOP)");
4616  strcpy(label[4], "Run instructions completed(PM_RUN_INST_CMPL)");
4617  strcpy(label[5], "Run cycles (PM_RUN_CYC)");
4618  strcpy(label[6], "Nothing");
4619  strcpy(label[7], "Nothing");
4620  */
4621 #elif defined(PMAPI_P6)
4622  const int group = 186; /* pm_hpm1 */
4623  /*-- counters --
4624  case 186:
4625  strcpy(group_label, "HPM group");
4626  strcpy(label[0], "FPU executed one flop instruction (PM_FPU_1FLOP)");
4627  strcpy(label[1], "FPU executed multiply-add instruction (PM_FPU_FMA)");
4628  strcpy(label[2], "FPU executed FSQRT or FDIV instruction (PM_FPU_SQRT_FDIV)");
4629  strcpy(label[3], "Processor Cycles (PM_CYC [shared chip])");
4630  strcpy(label[4], "Run instructions completed(PM_RUN_INST_CMPL)");
4631  strcpy(label[5], "Run cycles (PM_RUN_CYC)");
4632  strcpy(label[6], "Nothing");
4633  strcpy(label[7], "Nothing");
4634  */
4635 #elif defined(PMAPI_P5_PLUS)
4636  /* IBM Power 5+ specific */
4637  const int group = 150; /* pm_hpmcount2 */
4638  /*-- counters -- (from John Hague, IBM/UK, 22-Aug-2006 : Thanx!!)
4639  case 150:
4640  strcpy(group_label, "pm_flop, Floating point operations");
4641  strcpy(label[0], "FPU executed FDIV instruction (PM_FPU_FDIV)");
4642  strcpy(label[1], "FPU executed multiply-add instruction (PM_FPU_FMA)");
4643  strcpy(label[2], "FPU executed FSQRT instruction (PM_FPU_SQRT)");
4644  strcpy(label[3], "FPU executed one flop instruction (PM_FPU_1FLOP)");
4645  strcpy(label[4], "Run instructions completed(PM_RUN_INST_CMPL)");
4646  strcpy(label[5], "Run cycles (PM_RUN_CYC)");
4647  strcpy(label[6], "Nothing");
4648  strcpy(label[7], "Nothing");
4649  */
4650 #else
4651  const int group = 60; /* pm_hpmcount2 */
4652  /*-- counters --
4653  case 60:
4654  strcpy(group_label, "pm_hpmcount2, Hpmcount group for computation intensity analysis");
4655  strcpy(label[0], "FPU executed FDIV instruction (PM_FPU_FDIV)");
4656  strcpy(label[1], "FPU executed multiply-add instruction (PM_FPU_FMA)");
4657  strcpy(label[2], "FPU0 produced a result (PM_FPU0_FIN)");
4658  strcpy(label[3], "FPU1 produced a result (PM_FPU1_FIN)");
4659  strcpy(label[4], "Processor cycles (PM_CYC)");
4660  strcpy(label[5], "FPU executed store instruction (PM_FPU_STF)");
4661  strcpy(label[6], "Instructions completed (PM_INST_CMPL)");
4662  strcpy(label[7], "LSU executed Floating Point load instruction (PM_LSU_LDF)");
4663  */
4664 #endif
4665 
4666  if (myproc <= 1) fprintf(stderr,"group = %d\n",group);
4667 
4668  pm_prog_t pmprog;
4669  pm_data_t pmdata;
4670  int i;
4671 
4672  /*---------------------*/
4673  /* set a default group */
4674  /*---------------------*/
4675  for (i=0; i<MAX_COUNTERS; i++) {
4676  pmprog.events[i] = COUNT_NOTHING;
4677  }
4678  pmprog.events[0] = group;
4679 
4680  /*-------------------------------------------------------------*/
4681  /* set the mode for user (not kernel) and thread (not process) */
4682  /*-------------------------------------------------------------*/
4683  pmprog.mode.w = 0;
4684  pmprog.mode.b.user = 1;
4685  pmprog.mode.b.process = 0;
4686  /* pmprog.mode.b.process = 1; */
4687 
4688  /*------------------------------------------*/
4689  /* for power-4 you have to use event groups */
4690  /*------------------------------------------*/
4691  pmprog.mode.b.is_group = 1;
4692 
4693  /*---------------------------------------------------*/
4694  /* set the mode to not to start counting immediately */
4695  /*---------------------------------------------------*/
4696  /* pmprog.mode.b.count = 1; */
4697  pmprog.mode.b.count = 0;
4698 
4699  /*-----------------------------------------*/
4700  /* initialize the group and start counting */
4701  /*-----------------------------------------*/
4702  hpm_tid_init[tid-1] = pthread_self(); /* Always > 0 */
4703 
4704  rc = pm_set_program_mythread(&pmprog);
4705  TEST_PM_ERROR((char *)name, rc);
4706 
4707  rc = pm_start_mythread();
4708  TEST_PM_ERROR((char *)name, rc);
4709  }
4710 }
4711 
4712 static void
4713 stop_only_hpm(int tid, drhook_key_t *pstop)
4714 {
4715  const char *name = "stop_only_hpm";
4716  pm_data_t pmdata;
4717  int i, rc;
4718 
4719  /* if (numthreads > 1) pthread_mutex_lock(&hpm_lock); */
4720 
4721  if (!hpm_tid_init || !hpm_tid_init[tid-1]) init_hpm(tid);
4722 
4723  /*
4724  rc = pm_stop_mythread();
4725  TEST_PM_ERROR((char *)name, rc);
4726  */
4727 
4728  if (pstop && !pstop->counter_stopped) {
4729  rc = pm_get_data_mythread(&pmdata);
4730  TEST_PM_ERROR((char *)name, rc);
4731 
4732  if (pstop && pstop->counter_in && !pstop->counter_stopped) {
4733  for (i=0; i<MAX_COUNTERS; i++) {
4734  pstop->counter_sum[i] += (pmdata.accu[i] - pstop->counter_in[i]);
4735  }
4736  pstop->counter_stopped = 1;
4737  }
4738  }
4739 
4740  /*
4741  rc = pm_start_mythread();
4742  TEST_PM_ERROR((char *)name, rc);
4743  */
4744 
4745  /* if (numthreads > 1) pthread_mutex_unlock(&hpm_lock); */
4746 }
4747 
4748 static void
4749 stopstart_hpm(int tid, drhook_key_t *pstop, drhook_key_t *pstart)
4750 {
4751  const char *name = "stopstart_hpm";
4752  pm_data_t pmdata;
4753  int i, rc;
4754 
4755  /* if (numthreads > 1) pthread_mutex_lock(&hpm_lock); */
4756 
4757  if (!hpm_tid_init || !hpm_tid_init[tid-1]) init_hpm(tid);
4758 
4759  /*
4760  rc = pm_stop_mythread();
4761  TEST_PM_ERROR((char *)name, rc);
4762  */
4763 
4764  rc = pm_get_data_mythread(&pmdata);
4765  TEST_PM_ERROR((char *)name, rc);
4766 
4767  if (pstop && pstop->counter_in && !pstop->counter_stopped) {
4768  for (i=0; i<MAX_COUNTERS; i++) {
4769  pstop->counter_sum[i] += (pmdata.accu[i] - pstop->counter_in[i]);
4770  }
4771  pstop->counter_stopped = 1;
4772  }
4773 
4774  if (pstart) {
4775  if (!pstart->counter_in ) pstart->counter_in = calloc_drhook(MAX_COUNTERS, sizeof(*pstart->counter_in ));
4776  if (!pstart->counter_sum) pstart->counter_sum = calloc_drhook(MAX_COUNTERS, sizeof(*pstart->counter_sum));
4777  for (i=0; i<MAX_COUNTERS; i++) {
4778  pstart->counter_in[i] = pmdata.accu[i];
4779  }
4780  pstart->counter_stopped = 0;
4781  }
4782 
4783  /*
4784  rc = pm_start_mythread();
4785  TEST_PM_ERROR((char *)name, rc);
4786  */
4787 
4788  /* if (numthreads > 1) pthread_mutex_unlock(&hpm_lock); */
4789 }
4790 
4791 #else
4792 
4793 /**** Interface to HPM (CRAY SV2, XD1 and XT3) ****/
4794 
4795 static int *hpm_tid_init = NULL;
4796 static double cycles = 0;
4797 
4798 #define MCYCLES (cycles * 1e-6)
4799 
4800 #define TEST_PM_ERROR(name, rc) \
4801  if (rc != 0) { \
4802  fprintf(stderr,"PM_ERROR(tid#%d, pthread_self()=%d): rc=%d at %s(), line=%d, file=%s\n",\
4803  tid,pthread_self(),rc,name,__LINE__,__FILE__); \
4804  pm_error((char *)name, rc); \
4805  spin(tid); \
4806  RAISE(SIGABRT); \
4807  }
4808 
4809 static void
4810 init_hpm(int tid)
4811 {
4812  const char *name = "init_hpm";
4813  int rc;
4814 
4815  cycles = irtc_rate_();
4816 }
4817 
4818 static void
4819 stop_only_hpm(int tid, drhook_key_t *pstop)
4820 {
4821  const char *name = "stop_only_hpm";
4822  int i, rc;
4823 
4824  if (!hpm_tid_init || !hpm_tid_init[tid-1]) init_hpm(tid);
4825 
4826  if (pstop && !pstop->counter_stopped) {
4827 
4828  if (pstop && pstop->counter_in && !pstop->counter_stopped) {
4829 #if defined(DT_FLOP)
4830  pstop->counter_sum[0] += ((long long int) flop_() - pstop->counter_in[0]);
4831 #if defined(SV2)
4832  pstop->counter_sum[ENTRY_4] += (_rtc() - pstop->counter_in[ENTRY_4]);
4833 #else
4834  pstop->counter_sum[ENTRY_4] += (irtc_() - pstop->counter_in[ENTRY_4]);
4835 #endif
4836 #endif
4837  pstop->counter_stopped = 1;
4838  }
4839  }
4840 }
4841 
4842 
4843 static void
4844 stopstart_hpm(int tid, drhook_key_t *pstop, drhook_key_t *pstart)
4845 {
4846  const char *name = "stopstart_hpm";
4847  int i, rc;
4848 
4849  if (!hpm_tid_init || !hpm_tid_init[tid-1]) init_hpm(tid);
4850 
4851  if (pstop && pstop->counter_in && !pstop->counter_stopped) {
4852 #if defined(DT_FLOP)
4853  pstop->counter_sum[0] += ((long long int) flop_() - pstop->counter_in[0]);
4854 #if defined(SV2)
4855  pstop->counter_sum[ENTRY_4] += (_rtc() - pstop->counter_in[ENTRY_4]);
4856 #else
4857  pstop->counter_sum[ENTRY_4] += (irtc_() - pstop->counter_in[ENTRY_4]);
4858 #endif
4859 #endif
4860  pstop->counter_stopped = 1;
4861  }
4862 
4863  if (pstart) {
4864  if (!pstart->counter_in ) pstart->counter_in = calloc_drhook(MAX_COUNTERS, sizeof(*pstart->counter_in ));
4865  if (!pstart->counter_sum) pstart->counter_sum = calloc_drhook(MAX_COUNTERS, sizeof(*pstart->counter_sum));
4866 #if defined(DT_FLOP)
4867  pstart->counter_in[0] = (long long int) flop_();
4868 #if defined(SV2)
4869  pstart->counter_in[ENTRY_4] = _rtc();
4870 #else
4871  pstart->counter_in[ENTRY_4] = irtc_();
4872 #endif
4873 #endif
4874  pstart->counter_stopped = 0;
4875  }
4876 }
4877 
4878 #endif /*Interface to RS6K and SV2, XD1, XT3 */
4879 
4880 static double
4881 mflops_hpm(const drhook_key_t *keyptr)
4882 {
4883  double mflops = 0;
4884  if (keyptr && keyptr->counter_sum && keyptr->counter_sum[ENTRY_4] > 0) {
4885  long long int sum = 0;
4886 #if defined(DT_FLOP)
4887  sum = keyptr->counter_sum[0];
4888 #elif defined(PMAPI_P7)
4889  /* IBM Power 7 specific */
4890  if(hpm_grp == 150) {
4891  sum = 2 * keyptr->counter_sum[2] + keyptr->counter_sum[3];
4892  }
4893  if(hpm_grp == 141) {
4894  sum = 2 * keyptr->counter_sum[0] + 4 * keyptr->counter_sum[1] + 2 * keyptr->counter_sum[3];
4895  }
4896 #elif defined(PMAPI_P6)
4897  /* IBM Power 6 specific */
4898  sum = keyptr->counter_sum[0] + 2 * keyptr->counter_sum[1];
4899 #elif defined(PMAPI_P5_PLUS)
4900  /* IBM Power 5+ specific */
4901  sum = 2 * keyptr->counter_sum[1] + keyptr->counter_sum[3];
4902 #else
4903  sum = keyptr->counter_sum[1] + keyptr->counter_sum[2] + keyptr->counter_sum[3] - keyptr->counter_sum[5];
4904 #endif
4905  if (sum > 0)
4906  mflops = (sum * MCYCLES)/keyptr->counter_sum[ENTRY_4];
4907  }
4908  return mflops;
4909 }
4910 
4911 static double
4912 mips_hpm(const drhook_key_t *keyptr)
4913 {
4914  double mipsrate = 0;
4915 #if defined(DT_FLOP)
4916  mipsrate = 0;
4917 #else
4918  if (keyptr && keyptr->counter_sum && keyptr->counter_sum[ENTRY_4] > 0) {
4919  mipsrate = (keyptr->counter_sum[ENTRY_6] * MCYCLES)/keyptr->counter_sum[ENTRY_4];
4920  }
4921 #endif
4922  return mipsrate;
4923 }
4924 
4925 static double
4926 divpc_hpm(const drhook_key_t *keyptr)
4927 {
4928  double divpc = 0;
4929 #if defined(DT_FLOP)
4930  divpc = 0;
4931 #else
4932  if (keyptr && keyptr->counter_sum) {
4933  long long int sum = 0;
4934 #if defined(PMAPI_P7)
4935  /* IBM Power 7 specific */
4936  if(hpm_grp == 150) {
4937  sum = 2 * keyptr->counter_sum[2] + keyptr->counter_sum[3];
4938  if (sum > 0) divpc = (keyptr->counter_sum[0]*100.0)/sum;
4939  }
4940  if(hpm_grp == 141) {
4941  sum = 2 * keyptr->counter_sum[0] + 4 * keyptr->counter_sum[1] + 2 * keyptr->counter_sum[3];
4942  if (sum > 0) divpc = (keyptr->counter_sum[1]*100.0)/sum;
4943  }
4944 #elif defined(PMAPI_P6)
4945  /* IBM Power 6 specific */
4946  sum = keyptr->counter_sum[0] + 2 * keyptr->counter_sum[1];
4947  if (sum > 0) divpc = (keyptr->counter_sum[2]*100.0)/sum;
4948 #elif defined(PMAPI_P5_PLUS)
4949  /* IBM Power 5+ specific */
4950  sum = 2 * keyptr->counter_sum[1] + keyptr->counter_sum[3];
4951  if (sum > 0) divpc = (keyptr->counter_sum[0]*100.0)/sum;
4952 #else
4953  sum = keyptr->counter_sum[1] + keyptr->counter_sum[2] + keyptr->counter_sum[3] - keyptr->counter_sum[5];
4954  if (sum > 0) divpc = (keyptr->counter_sum[0]*100.0)/sum;
4955 #endif
4956  }
4957 #endif
4958  return divpc;
4959 }
4960 
4961 static double
4963 {
4964  double sum = 0;
4965  if (keyptr && keyptr->counter_sum && keyptr->counter_sum[ENTRY_4] > 0) {
4966 #if defined(DT_FLOP)
4967  sum = (keyptr->counter_sum[0]) * 1e-6;
4968 #elif defined(PMAPI_P7)
4969  /* IBM Power 7 specific */
4970  if(hpm_grp == 150) {
4971  sum = (2 * keyptr->counter_sum[2] + keyptr->counter_sum[3]) * 1e-6;
4972  }
4973  if(hpm_grp == 141) {
4974  sum = (2 * keyptr->counter_sum[0] + 4 * keyptr->counter_sum[1] + 2 * keyptr->counter_sum[3]) * 1e-6;
4975  }
4976 #elif defined(PMAPI_P6)
4977  /* IBM Power 6 specific */
4978  sum = (keyptr->counter_sum[0] + 2 * keyptr->counter_sum[1]) * 1e-6;
4979 #elif defined(PMAPI_P5_PLUS)
4980  /* IBM Power 5+ specific */
4981  sum = (2 * keyptr->counter_sum[1] + keyptr->counter_sum[3]) * 1e-6;
4982 #else
4983  sum = (keyptr->counter_sum[1] + keyptr->counter_sum[2] + keyptr->counter_sum[3] - keyptr->counter_sum[5]) * 1e-6;
4984 #endif
4985  if (sum < 0) sum = 0;
4986  }
4987  return sum;
4988 }
4989 
4990 static double
4991 mip_count(const drhook_key_t *keyptr)
4992 {
4993  double sum = 0;
4994 #if defined(DT_FLOP)
4995  sum = 0;
4996 #else
4997  if (keyptr && keyptr->counter_sum && keyptr->counter_sum[ENTRY_4] > 0) {
4998  sum = keyptr->counter_sum[ENTRY_6] * 1e-6;
4999  }
5000 #endif
5001  return sum;
5002 }
5003 
5004 #endif /* HPM */
5005 
5006 
5007 /*
5008  this is result of moving some code from libodb.a
5009  (odb/aux/util_ccode.c) for use by libifsaux.a
5010  directly ; simplifies linking sequences.
5011 */
5012 
5013 #include <stdio.h>
5014 #include <string.h>
5015 /* #include <malloc.h> */
5016 #include <stdlib.h>
5017 #include <signal.h>
5018 
5019 #define FORTRAN_CALL
5020 
5021 #if defined(CRAY) && !defined(SV2)
5022 #define util_cputime_ UTIL_CPUTIME
5023 #define util_walltime_ UTIL_WALLTIME
5024 #endif
5025 
5026 /* Portable CPU-timer (User + Sys) ; also WALL CLOCK-timer */
5027 
5028 #include <unistd.h>
5029 #include <sys/types.h>
5030 #include <sys/times.h>
5031 #undef MIN
5032 #undef MAX
5033 #include <sys/param.h>
5034 
5035 #include <sys/time.h>
5036 
5037 #if !defined(VPP)
5038 
5039 FORTRAN_CALL
5041 {
5042  static double time_init = -1;
5043  double time_in_secs;
5044 #if !defined(CRAYXT)
5045  struct timeval tbuf;
5046  if (gettimeofday(&tbuf,NULL) == -1) perror("UTIL_WALLTIME");
5047 
5048  if (time_init == -1) time_init =
5049  (double) tbuf.tv_sec + (tbuf.tv_usec / 1000000.0);
5050 
5051  time_in_secs =
5052  (double) tbuf.tv_sec + (tbuf.tv_usec / 1000000.0) - time_init;
5053 #else
5054  if (time_init == -1) time_init = dclock();
5055  time_in_secs = dclock() - time_init;
5056 #endif
5057  return time_in_secs;
5058 }
5059 
5060 #if defined(CRAYXT)
5061 /* Cray XT3/XT4 with catamount microkernel */
5062 
5063 FORTRAN_CALL
5065 {
5066  return util_walltime_(); /* In absence of anything better */
5067 }
5068 
5069 #else
5070 
5071 extern clock_t times (struct tms *buffer);
5072 
5073 FORTRAN_CALL
5074 double util_cputime_()
5075 {
5076  struct tms tbuf;
5077  static int first_time = 1;
5078  static double clock_ticks = 0;
5079 
5080  (void) times(&tbuf);
5081 
5082  if (first_time) {
5083  clock_ticks = (double) sysconf(_SC_CLK_TCK);
5084  first_time = 0;
5085  }
5086 
5087  return (tbuf.tms_utime + tbuf.tms_stime +
5088  tbuf.tms_cutime + tbuf.tms_cstime) / clock_ticks;
5089 }
5090 #endif
5091 
5092 #else
5093 /* VPP */
5094 FORTRAN_CALL
5095 double util_walltime_()
5096 {
5097  double w, time_in_secs;
5098  static double wallref = 0;
5099  extern FORTRAN_CALL gettod_(double *);
5100  if (wallref == 0) gettod_(&wallref);
5101  gettod_(&w);
5102  time_in_secs = (w - wallref) * 0.000001;
5103  return time_in_secs;
5104 }
5105 #endif
5106 
5107 #ifdef VPP
5108 
5109 #include <sys/types.h>
5110 #include <sys/param.h>
5111 #include <sys/signal.h>
5112 #include <sys/fault.h>
5113 #include <sys/syscall.h>
5114 #include <sys/procfs.h>
5115 #include <sys/proc.h>
5116 #include <fcntl.h>
5117 
5118 static int fujitsu_getrusage(int who, struct rusage *rusage)
5119 {
5120  int rc = -1;
5121 
5122  if (rusage) rusage->ru_maxrss = 0;
5123 
5124  if (who == RUSAGE_SELF && rusage) {
5125  static int maxrss = 0;
5126  static int oldpid = -1;
5127  static char procfile[20] = "";
5128  static char *pf = NULL;
5129  /* static prpsinfo_t ps; */
5130  static proc_t proc;
5131  int pid = getpid();
5132  static int fildes = -1;
5133  unsigned int size;
5134 
5135  if (oldpid != pid) {
5136  oldpid = pid;
5137  maxrss = 0;
5138  pf = NULL;
5139  }
5140 
5141  if (!pf) {
5142  sprintf(procfile,"/proc/%d",pid);
5143  pf = procfile;
5144  fildes = open(procfile, O_RDONLY);
5145  }
5146 
5147  if (fildes == -1) return rc;
5148 
5149  /*
5150  if (ioctl(fildes, PIOCPSINFO, &ps) == -1) {
5151  perror("ioctl@fujitsu_getrusage(PIOCPSINFO)");
5152  return rc;
5153  }
5154  */
5155 
5156  if (ioctl(fildes, PIOCGETPR, &proc) == -1) {
5157  perror("ioctl@fujitsu_getrusage(PIOCGETPR)");
5158  return rc;
5159  }
5160 
5161  size = /* ps.pr_usevpmem + */ proc.p_brksize + proc.p_stksize;
5162  if (size > maxrss) maxrss = size;
5163  rusage->ru_maxrss = maxrss;
5164 
5165  /* close(fildes); */
5166  rc = 0;
5167  }
5168  return rc;
5169 }
5170 #endif /* VPP */
5171 
5172 FORTRAN_CALL
5173 int util_ihpstat_(int *option)
5174 {
5175  int ret_value = 0;
5176 
5177 #if defined(SGI) || defined(VPP)
5178  if (*option == 1) {
5179  struct rusage rusage;
5180 #ifdef SGI
5181  int pagesize = 1024;
5182  getrusage(0, &rusage);
5183 #endif
5184 #ifdef VPP
5185  int pagesize = 1; /* getpagesize() */
5186  fujitsu_getrusage(0, &rusage);
5187 #endif
5188 #if defined(SV2)
5189  int pagesize = getpagesize();
5190  getrusage(0, &rusage);
5191 #endif
5192 #if defined(XT3)
5193  int pagesize = getpagesize();
5194  getrusage(0, &rusage);
5195 #endif
5196 #if defined(XD1)
5197  int pagesize = getpagesize();
5198  getrusage(0, &rusage);
5199 #endif
5200  ret_value = (rusage.ru_maxrss * pagesize + 7) / 8; /* In 8 byte words */
5201  }
5202 #endif /* SGI or VPP */
5203 
5204  return ret_value;
5205 }
5206 
5207 #ifndef DARWIN
5208 
5209 #define SECS(x) ((int)(x))
5210 #define NSECS(x) ((int)(1000000000 * ((x) - SECS(x))))
5211 
5212 static void set_timed_kill()
5213 {
5214  if (drhook_timed_kill) {
5215  const char delim[] = ", \t/";
5216  char *p, *s = strdup_drhook(drhook_timed_kill);
5217  p = strtok(s,delim);
5218  while (p) {
5219  int target_myproc, target_omptid, target_sig;
5220  double start_time;
5221  int nelems = sscanf(p,"%d:%d:%d:%lf",
5222  &target_myproc, &target_omptid, &target_sig, &start_time);
5223  int ntids = 1;
5224 #ifdef _OPENMP
5225  ntids = omp_get_max_threads();
5226 #endif
5227  if (nelems == 4 &&
5228  (target_myproc == myproc || target_myproc == -1) &&
5229  (target_omptid == -1 || (target_omptid >= 1 && target_omptid <= ntids)) &&
5230  (target_sig >= 1 && target_sig <= NSIG) &&
5231  start_time > 0) {
5232 #pragma omp parallel num_threads(ntids)
5233  {
5234  int tid = get_thread_id_();
5235  if (target_omptid == -1 || target_omptid == tid) {
5236  char *pfx = PREFIX(tid);
5237  timer_t timerid = { 0 };
5238  struct itimerspec its = { 0 } ;
5239  struct sigevent sev = { 0 } ;
5240  sev.sigev_notify = SIGEV_THREAD_ID | SIGEV_SIGNAL;
5241  sev.sigev_signo = target_sig;
5242  /* sev.sigev_notify_thread_id = gettid(); */
5243  sev._sigev_un._tid = gettid();
5244  sev.sigev_value.sival_ptr = &timerid;
5245 
5246  its.it_value.tv_sec = SECS(start_time);
5247  its.it_value.tv_nsec = NSECS(start_time);
5248 
5249  its.it_interval.tv_sec = 0;
5250  its.it_interval.tv_nsec = 0;
5251 
5252  timer_create(CLOCK_MONOTONIC, &sev, &timerid);
5253  /* timer_create(CLOCK_REALTIME, &sev, &timerid); */
5254  timer_settime(timerid, 0, &its, NULL);
5255 
5256 #pragma omp critical (TimedKill)
5257  {
5258  fprintf(stderr,
5259  "%s %s [%s@%s:%d] Developer timer (%s) expires"
5260  " after %.3fs through signal#%d (ntids=%d)\n",
5261  pfx,TIMESTR(tid),FFL,
5262  p,
5263  start_time, target_sig, ntids);
5264  fflush(NULL);
5265  }
5266  } /* if (target_omptid == -1 || target_omptid == tid) */
5267  }
5268  }
5269  p = strtok(NULL,delim);
5270  }
5271  free_drhook(s);
5272  }
5273 }
5274 
5275 #endif
static int callpath_depth
Definition: drhook.c:174
void dr_hook_prt_(const int *ftnunitno, const char *s, int s_len)
const char * ec_GetArgs(int argno)
Definition: cargs.c:138
static int set_default_handler(int sig, int unlimited_corefile, int verbose)
Definition: drhook.c:590
void c_drhook_print_(const int *ftnunitno, const int *thread_id, const int *print_option, int *level)
Definition: drhook.c:3639
static long size
Definition: bytes_io.c:262
void feenableexcept()
Definition: drhook.c:113
static volatile sig_atomic_t signal_handler_called
Definition: drhook.c:443
static int timestr_len
Definition: drhook.c:460
static const char * trim(const char *name, int *n)
Definition: drhook.c:2383
void c_drhook_getenv_(const char *s, char *value, int slen, const int valuelen)
Definition: drhook.c:3088
static drhook_timeline_t * timeline
Definition: drhook.c:212
static int drhook_trapfpe
Definition: drhook.c:144
static void dump_file(const char *pfx, int tid, int sig, int nsigs, const char filename[])
Definition: drhook.c:534
static int opt_getstk
Definition: drhook.c:153
static drhook_key_t * itself(drhook_key_t *keyptr_self, int tid, int opt, double *delta_time, const double *walltime, const double *cputime)
Definition: drhook.c:2788
static int set_unlimited_corefile(unsigned long long int *hardlimit)
Definition: drhook.c:1347
ERROR in a
Definition: ecsort_shared.h:90
struct drhook_prefix_t drhook_prefix_t
static void process_options()
Definition: drhook.c:1974
static int nproc
Definition: drhook.c:436
static volatile sig_atomic_t opt_gencore
Definition: drhook.c:191
static int opt_memprof
Definition: drhook.c:161
struct drhook_key_t drhook_key_t
static char * TimeStr(char *s, int slen)
Definition: drhook.c:775
char pad[CACHELINESIZE - 2 *WORDLEN]
Definition: getcurheap.c:125
struct callstack_t callstack_t
static int prof_pc_comp_desc(const void *v1, const void *v2)
Definition: drhook.c:3544
static drhook_key_t * insertkey(int tid, const drhook_key_t *keyptr_in)
Definition: drhook.c:2412
static void putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, int sizeinfo, double *walltime, double *cputime)
Definition: drhook.c:2531
static void trapfpe(void)
Definition: drhook.c:117
void c_drhook_check_watch_(const char *where, const int *allow_abort, int where_len)
Definition: drhook.c:3060
static int opt_propagate_signals
Definition: drhook.c:167
static void signal_harakiri(int sig SIG_EXTRA_ARGS)
Definition: drhook.c:1424
static drhook_calltree_t ** thiscall
Definition: drhook.c:441
static int opt_funcexit
Definition: drhook.c:179
static void print_routine_name0(FILE *fp, const char *p_name, int p_tid, const char *p_filename, int p_cluster, const equivalence_t *p_callpath, int p_callpath_len, int len, int cluster_size)
Definition: drhook.c:3576
double util_walltime_()
Definition: drhook.c:5040
static int drhook_dump_hugepages
Definition: drhook.c:201
static void catch_signals(int silent)
Definition: drhook.c:1082
static double dclock_start
Definition: drhook.c:274
static double mflops_hpm(const drhook_key_t *keyptr)
Definition: drhook.c:4881
static int callpath_indent
Definition: drhook.c:172
int drhook_memtrace
Definition: drhook.c:79
struct drhook_timeline_t drhook_timeline_t
unsigned int callpath_hashfunc(unsigned int inithash, const equivalence_t *callpath, int callpath_len, unsigned int *fullhash)
Definition: drhook.c:811
static int opt_timeline
Definition: drhook.c:181
long long int getstk_()
Definition: getstk.c:63
static void do_prof()
Definition: drhook.c:2932
static double * overhead
Definition: drhook.c:454
static volatile unsigned long long int saved_corefile_hardlimit
Definition: drhook.c:446
long long int getmaxrss_()
Definition: getmaxrss.c:5
static int spin(int secs)
Definition: drhook.c:525
static void restore_default_signals(int silent)
Definition: drhook.c:1149
static int opt_calltrace
Definition: drhook.c:177
static int myproc
Definition: drhook.c:435
static drhook_sig_t siglist[1+NSIG]
Definition: drhook.c:448
long long int o_lock_t
Definition: drhook.h:50
static int opt_gencore_signal
Definition: drhook.c:192
static volatile sig_atomic_t signal_handler_ignore_atexit
Definition: drhook.c:444
static char * strdup2_drhook(const char *s, int s_len)
Definition: drhook.c:750
static void signal_drhook_init(int enforce)
Definition: drhook.c:1766
static char * get_mon_out(int me)
Definition: drhook.c:1917
static int hpm_grp
Definition: drhook.c:194
static void set_timed_kill()
Definition: drhook.c:5212
static int opt_sizeinfo
Definition: drhook.c:168
quick &counting sorts only inumt inumt name
static int opt_hpmprof
Definition: drhook.c:160
struct drhook_sig_t drhook_sig_t
long long int getmaxcurheap_()
static double mips_hpm(const drhook_key_t *keyptr)
Definition: drhook.c:4912
FORTRAN_CALL int util_ihpstat_(int *option)
Definition: drhook.c:5173
clock_t times(struct tms *buffer)
double util_cputime_()
Definition: drhook.c:5064
static int cstklen
Definition: drhook.c:486
static drhook_watch_t * watch
Definition: drhook.c:456
static void stopstart_hpm(int tid, drhook_key_t *pstop, drhook_key_t *pstart)
Definition: drhook.c:4749
static int opt_random_memstat
Definition: drhook.c:195
void necsx_trbk_(const char *msg, int msglen)
static double cycles
Definition: drhook.c:4540
int get_thread_id_()
static int opt_gethwm
Definition: drhook.c:152
static drhook_key_t * callstack(int tid, void *key, drhook_key_t *keyptr)
Definition: drhook.c:662
static char * start_stamp
Definition: drhook.c:93
static int opt_wallprof
Definition: drhook.c:158
struct drhook_watch_t drhook_watch_t
static unsigned int hashmask
Definition: drhook.c:495
static void * calloc_drhook(size_t nmemb, size_t size)
Definition: drhook.c:638
long long int irtc_rate_()
static void signal_gencore(int sig SIG_EXTRA_ARGS)
Definition: drhook.c:1374
static int opt_walltime
Definition: drhook.c:156
static o_lock_t DRHOOK_lock
Definition: drhook.c:433
static int drhook_harakiri_timeout
Definition: drhook.c:143
static int opt_clusterinfo
Definition: drhook.c:169
static drhook_prefix_t * ec_drhook
Definition: drhook.c:459
FILE * fp
Definition: opfla_perfmon.c:24
static drhook_key_t * getkey(int tid, const char *name, int name_len, const char *filename, int filename_len, const double *walltime, const double *cputime, const equivalence_t *callpath, int callpath_len, int *free_callpath)
Definition: drhook.c:2439
static int opt_cpuprof
Definition: drhook.c:159
static pthread_mutex_t hpm_lock
Definition: drhook.c:4537
static int any_memstat
Definition: drhook.c:151
static double opt_hpmstop_mflops
Definition: drhook.c:217
static unsigned int hashsize
Definition: drhook.c:494
void c_drhook_watch_(const int *onoff, const char *array_name, const void *array_ptr, const int *nbytes, const int *abort_if_changed, const int *printkey, const int *nvals, const int *print_traceback_when_set, int array_name_len)
Definition: drhook.c:3152
long long int gethwm_()
static int fujitsu_getrusage(int who, struct rusage *rusage)
Definition: drhook.c:5118
static void trapfpe_treatment(int sig, int silent)
Definition: drhook.c:1118
static long long int opt_timeline_freq
Definition: drhook.c:188
PrintWatchKeys_t
Definition: drhook.c:2968
integer, dimension(180), parameter nmax
Definition: modd_splines.F90:7
static void random_memstat(int tid, int enforce)
Definition: drhook.c:1947
static drhook_key_t ** curkeyptr
Definition: drhook.c:455
static ll_t maxstack
Definition: getstk.c:11
void fedisableexcept()
Definition: drhook.c:114
static void insert_calltree(int tid, drhook_key_t *keyptr)
Definition: drhook.c:827
void c_drhook_raise_(const int *sig)
Definition: drhook.c:4487
void c_drhook_init_signals_(const int *enforce)
Definition: drhook.c:4474
static int drhook_dump_smaps
Definition: drhook.c:199
static void ignore_signals(int silent)
Definition: drhook.c:1198
long long int getmaxstk_()
Definition: getstk.c:111
static void lld_commie(long long int n, char sd[])
Definition: drhook.c:2820
static size_t pagesize
Definition: gethwm.c:71
static int opt_funcenter
Definition: drhook.c:178
static long long int irtc_start
Definition: drhook.c:267
static double percent_limit
Definition: drhook.c:452
static volatile sig_atomic_t unlimited_corefile_retcode
Definition: drhook.c:445
static void DrHookPrint(int ftnunitno, const char *line)
Definition: drhook.c:3623
static long long int opt_hpmstop_threshold
Definition: drhook.c:216
static int memprof_pc_comp_desc(const void *v1, const void *v2)
Definition: drhook.c:3554
static int opt_self
Definition: drhook.c:164
static int do_prof_off
Definition: drhook.c:2929
static double my_inv_irtc_rate
Definition: drhook.c:288
void crc32_(const void *vbuf, const int *pnbuf, unsigned int *pnCRC)
Definition: crc.c:130
static void * malloc_drhook(size_t size)
Definition: drhook.c:622
static void dump_hugepages(int enforce, const char *pfx, int tid, int sig, int nsigs)
Definition: drhook.c:562
static void remove_calltree(int tid, drhook_key_t *keyptr, const double *delta_wall, const double *delta_cpu)
Definition: drhook.c:870
int drhook_lhook
Definition: drhook.h:38
struct drhook_prof_t drhook_prof_t
static int callpath_packed
Definition: drhook.c:175
void coml_set_lockid_(o_lock_t *mylock)
ERROR in n
Definition: ecsort_shared.h:90
unsigned int hashfunc(const char *s, int s_len)
Definition: drhook.c:790
static void untrapfpe(void)
Definition: drhook.c:123
void c_drhook_set_mpi_()
Definition: drhook.c:1748
static int opt_timeline_thread
Definition: drhook.c:182
static double my_irtc_rate
Definition: drhook.c:287
static char * a_out
Definition: drhook.c:449
static pid_t gettid()
Definition: drhook.c:471
static int opt_timeline_format
Definition: drhook.c:186
static void print_watch(int ftnunitno, int key, const void *ptr, int n)
Definition: drhook.c:2978
static void dbl_commie(double n, char sd[])
Definition: drhook.c:2846
static int opt_calls
Definition: drhook.c:163
static char * timestamp()
Definition: drhook.c:762
void dr_hook_procinfo_(int *myproc, int *nproc)
static int opt_getpag
Definition: drhook.c:155
static void memstat(drhook_key_t *keyptr, const int *thread_id, int in_getkey)
Definition: drhook.c:968
static int numthreads
Definition: drhook.c:434
static equivalence_t * get_callpath(int tid, int *callpath_len)
Definition: drhook.c:2902
static int watch_count
Definition: drhook.c:458
static asection * text
Definition: linuxtrbk.c:492
static void init_drhook(int ntids)
Definition: drhook.c:2639
static double opt_timeline_MB
Definition: drhook.c:189
void LinuxTraceBack(const char *prefix, const char *timestr, void *sigcontextptr)
Definition: linuxtrbk.c:385
long long int getpag_()
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))
static int opt_getrss
Definition: drhook.c:154
logical lhook
Definition: yomhook.F90:15
static int opt_callpath
Definition: drhook.c:170
static drhook_key_t ** keyself
Definition: drhook.c:453
long long int irtc()
static char * safe_llitoa(long long int i, char b[], int blen)
Definition: drhook.c:1400
static int atp_ignore_sigterm
Definition: drhook.c:149
static char * get_memmon_out(int me)
Definition: drhook.c:1932
int snprintf(char *str, size_t size, const char *format,...)
Definition: endian.c:108
static int drhook_dump_buddyinfo
Definition: drhook.c:200
void ec_set_umask_()
Definition: endian.c:76
void c_drhook_init_(const char *progname, const int *num_threads, int progname_len)
Definition: drhook.c:3116
static int * hpm_tid_init
Definition: drhook.c:4539
static char * strdup_drhook(const char *s)
Definition: drhook.c:738
static double mflop_count(const drhook_key_t *keyptr)
Definition: drhook.c:4962
static void gdb__sigdump(int sig SIG_EXTRA_ARGS)
Definition: drhook.c:1250
static char * end_stamp
Definition: drhook.c:94
void Dr_Hook(const char *name, int option, double *handle, const char *filename, int sizeinfo, int name_len, int filename_len)
Definition: drhook.c:4496
static int signals_set
Definition: drhook.c:442
long long int irtc_()
static void init_hpm(int tid)
Definition: drhook.c:4554
static int atp_max_analysis_time
Definition: drhook.c:148
static drhook_calltree_t ** calltree
Definition: drhook.c:440
long long int getcurheap_thread_(const int *tidnum)
void c_drhook_end_(const char *name, const int *thread_id, const double *key, const char *filename, const int *sizeinfo, int name_len, int filename_len)
Definition: drhook.c:3320
long long int getmaxhwm_()
Definition: gethwm.c:120
static int atp_enabled
Definition: drhook.c:146
static int prof_name_comp(const void *v1, const void *v2)
Definition: drhook.c:3528
struct drhook_memprof_t drhook_memprof_t
static int atp_max_cores
Definition: drhook.c:147
static double divpc_hpm(const drhook_key_t *keyptr)
Definition: drhook.c:4926
long long int getmaxcurheap_thread_(const int *tidnum)
static void unroll_callpath(FILE *fp, int len, const equivalence_t *callpath, int callpath_len)
Definition: drhook.c:2874
void c_drhook_start_(const char *name, const int *thread_id, double *key, const char *filename, const int *sizeinfo, int name_len, int filename_len)
Definition: drhook.c:3225
void c_drhook_set_lhook_(const int *lhook)
Definition: drhook.c:3080
static char * mon_out
Definition: drhook.c:450
long long int getcurheap_()
static callstack_t ** cstk
Definition: drhook.c:660
subroutine toupper(CDS)
Definition: distio_mix.F90:575
static void stop_only_hpm(int tid, drhook_key_t *pstop)
Definition: drhook.c:4713
static int memprof_name_comp(const void *v1, const void *v2)
Definition: drhook.c:3536
static double drhook_dump_hugepages_freq
Definition: drhook.c:202
void coml_set_debug_(const int *konoff, int *kret)
static int opt_timeline_unitno
Definition: drhook.c:187
static double mip_count(const drhook_key_t *keyptr)
Definition: drhook.c:4991
static int opt_cputime
Definition: drhook.c:157
static void flptrap(int sig)
Definition: drhook.c:1019
static int opt_trim
Definition: drhook.c:162
static const char * trim_and_adjust_left(const char *p, int *name_len)
Definition: drhook.c:3564
struct drhook_calltree_t drhook_calltree_t
double flop_()
static char * drhook_timed_kill
Definition: drhook.c:198
static int allow_coredump
Definition: drhook.c:447
static int mon_out_procs
Definition: drhook.c:451
ERROR in index
Definition: ecsort_shared.h:90
static void signal_drhook(int sig SIG_EXTRA_ARGS)
Definition: drhook.c:1462
static int max_threads
Definition: drhook.c:437
void c_drhook_not_mpi_()
Definition: drhook.c:1754
static void check_watch(const char *label, const char *name, int name_len, int allow_abort)
Definition: drhook.c:3004
subroutine t(CDPREF, CDSUFF, KCODPA, LDNIVA, PMULTI)
Definition: faicor.F90:567
static pid_t pid
Definition: drhook.c:438
long long int getrss_()
static drhook_watch_t * last_watch
Definition: drhook.c:457
void coml_unset_lockid_(o_lock_t *mylock)
static int nhash
Definition: drhook.c:493
void c_drhook_memcounter_(const int *thread_id, const long long int *size, long long int *keyptr_addr)
Definition: drhook.c:3392
void coml_test_lockid_(int *is_set, o_lock_t *mylock)
static drhook_key_t ** keydata
Definition: drhook.c:439
void c_drhook_process_options_(const int *lhook, const int *Myproc, const int *Nproc)
Definition: drhook.c:1963