SURFEX v8.1
General documentation of Surfex
rsort32.c
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <limits.h>
4 #include <signal.h>
5 #include "intercept_alloc.h"
6 #include "raise.h"
7 
8 /* rsort32_() : 32-bit Fortran-callable RADIX-sort */
9 
10 /*
11  by Sami Saarinen, ECMWF, 3/2/1998
12  - " - 1/2/2000 : BIG_ENDIAN & LITTLE_ENDIAN labels renamed to *_INDIAN
13  since they may conflict with the ones in <sys/endian.h>
14  - " - 3/1/2001 : reference to valloc() removed; ALLOC() modified
15  - " - 25/1/2001 : BIG_INDIAN removed (as label)
16  LITTLE_INDIAN called as LITTLE
17  - " - ??/9?/2001 : Speedup in rsort32
18  - " - 14/3/2002 : rsort32_func implemeted to enable to run alternative sorting
19  routine than rsort32
20  - " - 18/02/2005 : Handle 64-bit (signed) ints
21  IBM malloc() may call __malloc()/__free() [see getcurheap.c]
22  - " - 21/02/2005 : Some optimization & endian detection on-the-fly
23  - " - 07/07/2005 : Mods in index_adj & bitsum
24  Added support for 64-bit unsigned ints
25  - " - 07/02/2007 : Intercepting alloc (IBM & NEC SX) + NEC SX vectorization
26 
27  Thanks to Mike Fisher, ECMWF
28  and Cray SCILIB ORDERS()-function developers
29 */
30 
31 /*
32  Methods:
33 
34  0 : Unsigned 32-bit ints
35  1 : Signed 32-bit ints
36  2 : 64-bit doubles (IEEE) : signbit + 11-bit exp + 52-bits mantissa
37  3 : 32-bit floats (IEEE) : signbit + 8-bit exp + 23-bits mantissa
38  4 : Signed 64-bit ints
39  5 : Unsigned 64-bit ints
40 
41 */
42 
43 typedef unsigned int Uint32;
44 typedef unsigned char Uchar;
45 
46 #ifdef __uxppx__
47 #ifndef VPP
48 #define VPP
49 #endif
50 #endif
51 
52 #ifdef VPP
53 #pragma global noalias
54 #pragma global novrec
55 #elif defined(NECSX)
56 #pragma cdir options -pvctl,nodep
57 #endif
58 
59 #if defined(VPP) || defined(NECSX)
60 /* .. or any vector machine */
61 static int SpeedUp = 0;
62 #else
63 /* scalar prozezzorz */
64 static int SpeedUp = 1;
65 #endif
66 
67 #define SORT_UINT 0
68 #define SORT_INT 1
69 #define SORT_R64 2
70 #define SORT_R32 3
71 #define SORT_I64 4
72 #define SORT_U64 5
73 
74 typedef long long int ll_t;
75 
76 #define ALLOC(x,size) \
77  { ll_t bytes = (ll_t)sizeof(*x) * (size); \
78  bytes = (bytes < 1) ? 1 : bytes; \
79  x = THEmalloc(bytes); \
80  if (!x) { fprintf(stderr, \
81  "malloc() of %s (%lld bytes) failed in file=%s, line=%d\n", \
82  #x, bytes, __FILE__, __LINE__); RAISE(SIGABRT); } }
83 
84 #define FREE(x) if (x) { THEfree(x); x = NULL; }
85 
86 #define BITSUM(x) bitsum[x] += ((item >> x) & 1U)
87 
88 #define SIGNBIT32 0x80000000
89 #define MASKALL32 0xFFFFFFFF
90 #define ZEROALL32 0x00000000
91 
92 #define CVMGM(a,b,c) ( ((c) & SIGNBIT32) ? (a) : (b) )
93 
94 #define N32BITS 32
95 
96 void
97 rsort32_(const int *Mode,
98  const int *N,
99  const int *Inc,
100  const int *Start_addr,
101  Uint32 Data[],
102  int index[],
103  const int *Index_adj,
104  int *retc)
105 {
106  int mode = *Mode;
107  int method = mode%10;
108  int n = *N;
109  int rc = n;
110  int inc = *Inc;
111  int index_adj = *Index_adj;
112  int addr = (*Start_addr) - 1; /* Fortran to C */
113  int i, j, jj;
114  Uchar xorit = 0;
115  Uchar copytmp = 0;
116  Uchar alloc_data = 0;
117  Uint32 *data = NULL;
118  int *tmp = NULL;
119  Uint32 bitsum[N32BITS];
120  int lsw, msw;
121 
122  if (method != SORT_UINT &&
123  method != SORT_INT &&
124  method != SORT_R64 &&
125  method != SORT_R32 &&
126  method != SORT_I64 &&
127  method != SORT_U64 ) {
128  rc = -1;
129  goto finish;
130  }
131 
132  if (n <= 0) {
133  if (n < 0) rc = -2;
134  goto finish;
135  }
136 
137  if (inc < 1) {
138  rc = -3;
139  goto finish;
140  }
141 
142  { /* Little/big-endian selection */
143  extern int ec_is_little_endian();
144  int i_am_little = ec_is_little_endian();
145 
146  if (i_am_little) {
147  /* We are on little-endian machine */
148  lsw = 0;
149  msw = 1;
150  }
151  else {
152  /* We are on big-endian machine */
153  lsw = 1;
154  msw = 0;
155  }
156  }
157 
158  if (method == SORT_R64 ||
159  method == SORT_I64 ||
160  method == SORT_U64) {
161  inc *= 2;
162  addr *= 2;
163  }
164 
165  if (mode < 10) {
166  /* index[] needs to be initialized */
167  for (i=0; i<n; i++) index[i] = i + index_adj;
168  }
169 
170  alloc_data = ((inc > 1)
171  || (method == SORT_R32)
172  || (method == SORT_R64)
173  || (method == SORT_I64)
174  || (method == SORT_U64)
175  );
176  if (alloc_data) ALLOC(data, n);
177 
178  if (method == SORT_R32) {
179  j = addr;
180 #ifdef NECSX
181 #pragma cdir nodep
182 #endif
183  for (i=0; i<n; i++) {
184  Uint32 mask = CVMGM(MASKALL32, SIGNBIT32, Data[j]);
185  data[i] = Data[j] ^ mask;
186  j += inc;
187  }
188 
189  method = SORT_UINT;
190  }
191  else if (method == SORT_R64) {
192  int Method = 10 + SORT_UINT;
193  const int aStart_addr = 1;
194  int aN = n;
195  const int aInc = 1;
196  int aIndex_adj = index_adj;
197 
198  /* Least significant word */
199  j = addr + lsw;
200  jj = addr + msw;
201 #ifdef NECSX
202 #pragma cdir nodep
203 #endif
204  for (i=0; i<n; i++) {
205  Uint32 mask = CVMGM(MASKALL32, ZEROALL32, Data[jj]);
206  data[i] = Data[j] ^ mask;
207  j += inc;
208  jj += inc;
209  }
210 
211  rsort32_(&Method, &aN, &aInc, &aStart_addr, data, index, &aIndex_adj, &rc);
212 
213  if (rc != n) goto finish;
214 
215  /* Most significant word */
216  jj = addr + msw;
217 #ifdef NECSX
218 #pragma cdir nodep
219 #endif
220  for (i=0; i<n; i++) {
221  Uint32 mask = CVMGM(MASKALL32, SIGNBIT32, Data[jj]);
222  data[i] = Data[jj] ^ mask;
223  jj += inc;
224  }
225 
226  method = SORT_UINT;
227  }
228  else if (method == SORT_I64 || method == SORT_U64) {
229  int Method = 10 + SORT_UINT;
230  const int aStart_addr = 1;
231  int aN = n;
232  const int aInc = 1;
233  int aIndex_adj = index_adj;
234 
235  /* Least significant word */
236  jj = addr + lsw;
237  for (i=0; i<n; i++) {
238  data[i] = Data[jj];
239  jj += inc;
240  }
241 
242  rsort32_(&Method, &aN, &aInc, &aStart_addr, data, index, &aIndex_adj, &rc);
243 
244  if (rc != n) goto finish;
245 
246  /* Most significant word */
247  if (method == SORT_I64) {
248  jj = addr + msw;
249  for (i=0; i<n; i++) {
250  data[i] = Data[jj] ^ SIGNBIT32;
251  jj += inc;
252  }
253  }
254  else { /* unsigned 64-bit ints i.e. method == SORT_U64 */
255  jj = addr + msw;
256  for (i=0; i<n; i++) {
257  data[i] = Data[jj];
258  jj += inc;
259  }
260  }
261 
262  method = SORT_UINT;
263  }
264  else if (inc > 1) {
265  j = addr;
266 #ifdef NECSX
267 #pragma cdir nodep
268 #endif
269  for (i=0; i<n; i++) {
270  data[i] = Data[j];
271  j += inc;
272  }
273  }
274  else {
275  data = &Data[addr];
276  }
277 
278  xorit = (method == SORT_INT);
279 
280  /* Check whether particular "bit-columns" are all zero or one */
281 
282  for (j=0; j<N32BITS; j++) bitsum[j] = 0;
283 
284  for (i=0; i<n; i++) {
285  Uint32 item;
286  if (xorit) data[i] ^= SIGNBIT32;
287  item = data[i];
288  /* Unrolled, full vector */
289  BITSUM(0) ; BITSUM(1) ; BITSUM(2) ; BITSUM(3) ;
290  BITSUM(4) ; BITSUM(5) ; BITSUM(6) ; BITSUM(7) ;
291  BITSUM(8) ; BITSUM(9) ; BITSUM(10); BITSUM(11);
292  BITSUM(12); BITSUM(13); BITSUM(14); BITSUM(15);
293  BITSUM(16); BITSUM(17); BITSUM(18); BITSUM(19);
294  BITSUM(20); BITSUM(21); BITSUM(22); BITSUM(23);
295  BITSUM(24); BITSUM(25); BITSUM(26); BITSUM(27);
296  BITSUM(28); BITSUM(29); BITSUM(30); BITSUM(31);
297  }
298 
299  ALLOC(tmp, n);
300 
301  jj = 0;
302  for (j=0; j<N32BITS; j++) {
303  int sum = bitsum[j];
304  if (sum > 0 && sum < n) { /* if 0 or n, then the whole column of bits#j 0's or 1's */
305  Uint32 mask = (1U << j);
306  int *i1, *i2;
307 
308  if (jj%2 == 0) {
309  i1 = index;
310  i2 = tmp;
311  copytmp = 1;
312  }
313  else {
314  i1 = tmp;
315  i2 = index;
316  copytmp = 0;
317  }
318 
319  if (SpeedUp == 0) {
320  int k = 0;
321 #ifdef NECSX
322 #pragma cdir nodep
323 #endif
324  for (i=0; i<n; i++) /* Gather zero bits */
325  if ( (data[i1[i]-index_adj] & mask) == 0 ) i2[k++] = i1[i];
326 
327 #ifdef NECSX
328 #pragma cdir nodep
329 #endif
330  for (i=0; i<n; i++) /* Gather one bits */
331  if ( (data[i1[i]-index_adj] & mask) == mask ) i2[k++] = i1[i];
332  }
333  else
334  {
335  int k1 = 0, k2 = n-sum;
336 #ifdef NECSX
337 #pragma cdir nodep
338 #endif
339  for (i=0; i<n; i++) { /* Gather zero & one bits in a single sweep */
340  Uint32 value = data[i1[i]-index_adj] & mask;
341  i2[value == 0 ? k1++ : k2++] = i1[i];
342  } /* for (i=0; i<n; i++) */
343  if (k1 + sum != n || k2 != n) {
344  fprintf(stderr,
345  "***Programming error in rsort32_(): k1 + sum != n || k2 != n; k1=%d,k2=%d,sum=%d,n=%d\n",
346  k1,k2,sum,n);
347  RAISE(SIGABRT);
348  }
349  }
350 
351  jj++;
352  }
353  }
354 
355  if (copytmp) {
356 #ifdef NECSX
357 #pragma cdir nodep
358 #endif
359  for (i=0; i<n; i++) index[i] = tmp[i];
360  }
361 
362  FREE(tmp);
363 
364  if (!alloc_data && xorit && inc == 1) {
365  /* 32-bit signed ints : backward */
366  for (i=0; i<n; i++) data[i] ^= SIGNBIT32;
367  }
368 
369  if (alloc_data) FREE(data);
370 
371  finish:
372 
373  *retc = rc;
374 }
375 
376 
377 void
378 rsort32_ibm_(const int *Mode,
379  const int *N,
380  const int *Inc,
381  const int *Start_addr,
382  Uint32 Data[],
383  int index[],
384  const int *Index_adj,
385  int *retc)
386 {
387 #ifdef RS6K
388  int mode = *Mode;
389  int method = mode%10;
390  int index_adj = *Index_adj;
391 
392  if (method == SORT_INT && index_adj == 1) { /* 32-bit ints ; Fortran-arrays */
393  jisort_( N, Inc, Start_addr, Data, index ); /* from jsort.F in ifsaux */
394  *retc = *N;
395  }
396  else if (method == SORT_R64 && index_adj == 1) { /* 64-bit reals ; Fortran-arrays */
397  jdsort_( N, Inc, Start_addr, Data, index ); /* from jsort.F in ifsaux */
398  *retc = *N;
399  }
400  else { /* Any other type => revert to the generic rsort32 */
401  rsort32_(Mode, N, Inc, Start_addr, Data, index, Index_adj, retc);
402  }
403 #else
404  /* Any other machine than IBM/RS6000 => revert to the generic rsort32 */
405  rsort32_(Mode, N, Inc, Start_addr, Data, index, Index_adj, retc);
406 #endif
407 }
408 
409 
410 static
411 void (*default_rsort32_func)(const int *Mode,
412  const int *N,
413  const int *Inc,
414  const int *Start_addr,
415  Uint32 Data[],
416  int index[],
417  const int *Index_adj,
418  int *retc) =
419 #ifdef RS6K
421 #else
422  rsort32_
423 #endif
424 ;
425 
426 void
427 rsort32_func_(const int *Mode,
428  const int *N,
429  const int *Inc,
430  const int *Start_addr,
431  Uint32 Data[],
432  int index[],
433  const int *Index_adj,
434  int *retc)
435 {
437  N,
438  Inc,
439  Start_addr,
440  Data,
441  index,
442  Index_adj,
443  retc);
444 }
445 
446 void
447 rsort32_setup_(void (*func)(const int *Mode,
448  const int *N,
449  const int *Inc,
450  const int *Start_addr,
451  Uint32 Data[],
452  int index[],
453  const int *Index_adj,
454  int *retc),
455  int *speedup)
456 {
457  if (func) default_rsort32_func = func;
458  if (speedup) SpeedUp = *speedup;
459 }
void rsort32_ibm_(const int *Mode, const int *N, const int *Inc, const int *Start_addr, Uint32 Data[], int index[], const int *Index_adj, int *retc)
Definition: rsort32.c:378
ERROR in method
Definition: ecsort_shared.h:90
void rsort32_setup_(void(*func)(const int *Mode, const int *N, const int *Inc, const int *Start_addr, Uint32 Data[], int index[], const int *Index_adj, int *retc), int *speedup)
Definition: rsort32.c:447
static void(* default_rsort32_func)(const int *Mode, const int *N, const int *Inc, const int *Start_addr, Uint32 Data[], int index[], const int *Index_adj, int *retc)
Definition: rsort32.c:411
unsigned int Uint32
Definition: rsort32.c:43
unsigned char Uchar
Definition: rsort32.c:44
void rsort32_(const int *Mode, const int *N, const int *Inc, const int *Start_addr, Uint32 Data[], int index[], const int *Index_adj, int *retc)
Definition: rsort32.c:97
long long int ll_t
Definition: rsort32.c:74
int ec_is_little_endian()
Definition: endian.c:19
ERROR in n
Definition: ecsort_shared.h:90
static int SpeedUp
Definition: rsort32.c:61
unsigned int Uint32
Definition: countingsort.c:43
intent(out) overrides sub arrays one Sort by the least significant key first sum(iindex(1:n))
static int mask
Definition: ifssig.c:38
ERROR in index
Definition: ecsort_shared.h:90
void rsort32_func_(const int *Mode, const int *N, const int *Inc, const int *Start_addr, Uint32 Data[], int index[], const int *Index_adj, int *retc)
Definition: rsort32.c:427