LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_io.h"
18 #include "kmp_wrapper_malloc.h"
19 
20 // Disable bget when it is not used
21 #if KMP_USE_BGET
22 
23 /* Thread private buffer management code */
24 
25 typedef int (*bget_compact_t)(size_t, int);
26 typedef void *(*bget_acquire_t)(size_t);
27 typedef void (*bget_release_t)(void *);
28 
29 /* NOTE: bufsize must be a signed datatype */
30 
31 #if KMP_OS_WINDOWS
32 #if KMP_ARCH_X86 || KMP_ARCH_ARM
33 typedef kmp_int32 bufsize;
34 #else
35 typedef kmp_int64 bufsize;
36 #endif
37 #else
38 typedef ssize_t bufsize;
39 #endif
40 
41 /* The three modes of operation are, fifo search, lifo search, and best-fit */
42 
43 typedef enum bget_mode {
44  bget_mode_fifo = 0,
45  bget_mode_lifo = 1,
46  bget_mode_best = 2
47 } bget_mode_t;
48 
49 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
50 static void *bget(kmp_info_t *th, bufsize size);
51 static void *bgetz(kmp_info_t *th, bufsize size);
52 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
53 static void brel(kmp_info_t *th, void *buf);
54 static void bectl(kmp_info_t *th, bget_compact_t compact,
55  bget_acquire_t acquire, bget_release_t release,
56  bufsize pool_incr);
57 
58 #ifdef KMP_DEBUG
59 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
60  bufsize *maxfree, long *nget, long *nrel);
61 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
62  long *npget, long *nprel, long *ndget, long *ndrel);
63 static void bufdump(kmp_info_t *th, void *buf);
64 static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
65 static int bpoolv(kmp_info_t *th, void *pool);
66 #endif
67 
68 /* BGET CONFIGURATION */
69 /* Buffer allocation size quantum: all buffers allocated are a
70  multiple of this size. This MUST be a power of two. */
71 
72 /* On IA-32 architecture with Linux* OS, malloc() does not
73  ensure 16 byte alignmnent */
74 
75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
76 
77 #define SizeQuant 8
78 #define AlignType double
79 
80 #else
81 
82 #define SizeQuant 16
83 #define AlignType _Quad
84 
85 #endif
86 
87 // Define this symbol to enable the bstats() function which calculates the
88 // total free space in the buffer pool, the largest available buffer, and the
89 // total space currently allocated.
90 #define BufStats 1
91 
92 #ifdef KMP_DEBUG
93 
94 // Define this symbol to enable the bpoold() function which dumps the buffers
95 // in a buffer pool.
96 #define BufDump 1
97 
98 // Define this symbol to enable the bpoolv() function for validating a buffer
99 // pool.
100 #define BufValid 1
101 
102 // Define this symbol to enable the bufdump() function which allows dumping the
103 // contents of an allocated or free buffer.
104 #define DumpData 1
105 
106 #ifdef NOT_USED_NOW
107 
108 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
109 // who attempt to use pointers into released buffers.
110 #define FreeWipe 1
111 
112 // Use a best fit algorithm when searching for space for an allocation request.
113 // This uses memory more efficiently, but allocation will be much slower.
114 #define BestFit 1
115 
116 #endif /* NOT_USED_NOW */
117 #endif /* KMP_DEBUG */
118 
119 static bufsize bget_bin_size[] = {
120  0,
121  // 1 << 6, /* .5 Cache line */
122  1 << 7, /* 1 Cache line, new */
123  1 << 8, /* 2 Cache lines */
124  1 << 9, /* 4 Cache lines, new */
125  1 << 10, /* 8 Cache lines */
126  1 << 11, /* 16 Cache lines, new */
127  1 << 12, 1 << 13, /* new */
128  1 << 14, 1 << 15, /* new */
129  1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
130  1 << 21, /* 2MB */
131  1 << 22, /* 4MB */
132  1 << 23, /* 8MB */
133  1 << 24, /* 16MB */
134  1 << 25, /* 32MB */
135 };
136 
137 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
138 
139 struct bfhead;
140 
141 // Declare the interface, including the requested buffer size type, bufsize.
142 
143 /* Queue links */
144 typedef struct qlinks {
145  struct bfhead *flink; /* Forward link */
146  struct bfhead *blink; /* Backward link */
147 } qlinks_t;
148 
149 /* Header in allocated and free buffers */
150 typedef struct bhead2 {
151  kmp_info_t *bthr; /* The thread which owns the buffer pool */
152  bufsize prevfree; /* Relative link back to previous free buffer in memory or
153  0 if previous buffer is allocated. */
154  bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
155 } bhead2_t;
156 
157 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
158 typedef union bhead {
159  KMP_ALIGN(SizeQuant)
160  AlignType b_align;
161  char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
162  bhead2_t bb;
163 } bhead_t;
164 #define BH(p) ((bhead_t *)(p))
165 
166 /* Header in directly allocated buffers (by acqfcn) */
167 typedef struct bdhead {
168  bufsize tsize; /* Total size, including overhead */
169  bhead_t bh; /* Common header */
170 } bdhead_t;
171 #define BDH(p) ((bdhead_t *)(p))
172 
173 /* Header in free buffers */
174 typedef struct bfhead {
175  bhead_t bh; /* Common allocated/free header */
176  qlinks_t ql; /* Links on free list */
177 } bfhead_t;
178 #define BFH(p) ((bfhead_t *)(p))
179 
180 typedef struct thr_data {
181  bfhead_t freelist[MAX_BGET_BINS];
182 #if BufStats
183  size_t totalloc; /* Total space currently allocated */
184  long numget, numrel; /* Number of bget() and brel() calls */
185  long numpblk; /* Number of pool blocks */
186  long numpget, numprel; /* Number of block gets and rels */
187  long numdget, numdrel; /* Number of direct gets and rels */
188 #endif /* BufStats */
189 
190  /* Automatic expansion block management functions */
191  bget_compact_t compfcn;
192  bget_acquire_t acqfcn;
193  bget_release_t relfcn;
194 
195  bget_mode_t mode; /* what allocation mode to use? */
196 
197  bufsize exp_incr; /* Expansion block size */
198  bufsize pool_len; /* 0: no bpool calls have been made
199  -1: not all pool blocks are the same size
200  >0: (common) block size for all bpool calls made so far
201  */
202  bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
203 } thr_data_t;
204 
205 /* Minimum allocation quantum: */
206 #define QLSize (sizeof(qlinks_t))
207 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
208 #define MaxSize \
209  (bufsize)( \
210  ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
211 // Maximun for the requested size.
212 
213 /* End sentinel: value placed in bsize field of dummy block delimiting
214  end of pool block. The most negative number which will fit in a
215  bufsize, defined in a way that the compiler will accept. */
216 
217 #define ESent \
218  ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
219 
220 /* Thread Data management routines */
221 static int bget_get_bin(bufsize size) {
222  // binary chop bins
223  int lo = 0, hi = MAX_BGET_BINS - 1;
224 
225  KMP_DEBUG_ASSERT(size > 0);
226 
227  while ((hi - lo) > 1) {
228  int mid = (lo + hi) >> 1;
229  if (size < bget_bin_size[mid])
230  hi = mid - 1;
231  else
232  lo = mid;
233  }
234 
235  KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
236 
237  return lo;
238 }
239 
240 static void set_thr_data(kmp_info_t *th) {
241  int i;
242  thr_data_t *data;
243 
244  data = (thr_data_t *)((!th->th.th_local.bget_data)
245  ? __kmp_allocate(sizeof(*data))
246  : th->th.th_local.bget_data);
247 
248  memset(data, '\0', sizeof(*data));
249 
250  for (i = 0; i < MAX_BGET_BINS; ++i) {
251  data->freelist[i].ql.flink = &data->freelist[i];
252  data->freelist[i].ql.blink = &data->freelist[i];
253  }
254 
255  th->th.th_local.bget_data = data;
256  th->th.th_local.bget_list = 0;
257 #if !USE_CMP_XCHG_FOR_BGET
258 #ifdef USE_QUEUING_LOCK_FOR_BGET
259  __kmp_init_lock(&th->th.th_local.bget_lock);
260 #else
261  __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
262 #endif /* USE_LOCK_FOR_BGET */
263 #endif /* ! USE_CMP_XCHG_FOR_BGET */
264 }
265 
266 static thr_data_t *get_thr_data(kmp_info_t *th) {
267  thr_data_t *data;
268 
269  data = (thr_data_t *)th->th.th_local.bget_data;
270 
271  KMP_DEBUG_ASSERT(data != 0);
272 
273  return data;
274 }
275 
276 #ifdef KMP_DEBUG
277 
278 static void __kmp_bget_validate_queue(kmp_info_t *th) {
279  /* NOTE: assume that the global_lock is held */
280 
281  void *p = (void *)th->th.th_local.bget_list;
282 
283  while (p != 0) {
284  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
285 
286  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
287  p = (void *)b->ql.flink;
288  }
289 }
290 
291 #endif
292 
293 /* Walk the free list and release the enqueued buffers */
294 static void __kmp_bget_dequeue(kmp_info_t *th) {
295  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
296 
297  if (p != 0) {
298 #if USE_CMP_XCHG_FOR_BGET
299  {
300  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
301  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
302  CCAST(void *, old_value), nullptr)) {
303  KMP_CPU_PAUSE();
304  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
305  }
306  p = CCAST(void *, old_value);
307  }
308 #else /* ! USE_CMP_XCHG_FOR_BGET */
309 #ifdef USE_QUEUING_LOCK_FOR_BGET
310  __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
311 #else
312  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
313 #endif /* USE_QUEUING_LOCK_FOR_BGET */
314 
315  p = (void *)th->th.th_local.bget_list;
316  th->th.th_local.bget_list = 0;
317 
318 #ifdef USE_QUEUING_LOCK_FOR_BGET
319  __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
320 #else
321  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
322 #endif
323 #endif /* USE_CMP_XCHG_FOR_BGET */
324 
325  /* Check again to make sure the list is not empty */
326  while (p != 0) {
327  void *buf = p;
328  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
329 
330  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
331  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
332  (kmp_uintptr_t)th); // clear possible mark
333  KMP_DEBUG_ASSERT(b->ql.blink == 0);
334 
335  p = (void *)b->ql.flink;
336 
337  brel(th, buf);
338  }
339  }
340 }
341 
342 /* Chain together the free buffers by using the thread owner field */
343 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
344 #ifdef USE_QUEUING_LOCK_FOR_BGET
345  ,
346  kmp_int32 rel_gtid
347 #endif
348  ) {
349  bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
350 
351  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
352  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
353  (kmp_uintptr_t)th); // clear possible mark
354 
355  b->ql.blink = 0;
356 
357  KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
358  __kmp_gtid_from_thread(th)));
359 
360 #if USE_CMP_XCHG_FOR_BGET
361  {
362  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
363  /* the next pointer must be set before setting bget_list to buf to avoid
364  exposing a broken list to other threads, even for an instant. */
365  b->ql.flink = BFH(CCAST(void *, old_value));
366 
367  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
368  CCAST(void *, old_value), buf)) {
369  KMP_CPU_PAUSE();
370  old_value = TCR_PTR(th->th.th_local.bget_list);
371  /* the next pointer must be set before setting bget_list to buf to avoid
372  exposing a broken list to other threads, even for an instant. */
373  b->ql.flink = BFH(CCAST(void *, old_value));
374  }
375  }
376 #else /* ! USE_CMP_XCHG_FOR_BGET */
377 #ifdef USE_QUEUING_LOCK_FOR_BGET
378  __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
379 #else
380  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
381 #endif
382 
383  b->ql.flink = BFH(th->th.th_local.bget_list);
384  th->th.th_local.bget_list = (void *)buf;
385 
386 #ifdef USE_QUEUING_LOCK_FOR_BGET
387  __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
388 #else
389  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
390 #endif
391 #endif /* USE_CMP_XCHG_FOR_BGET */
392 }
393 
394 /* insert buffer back onto a new freelist */
395 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
396  int bin;
397 
398  KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
399  KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
400 
401  bin = bget_get_bin(b->bh.bb.bsize);
402 
403  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
404  &thr->freelist[bin]);
405  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
406  &thr->freelist[bin]);
407 
408  b->ql.flink = &thr->freelist[bin];
409  b->ql.blink = thr->freelist[bin].ql.blink;
410 
411  thr->freelist[bin].ql.blink = b;
412  b->ql.blink->ql.flink = b;
413 }
414 
415 /* unlink the buffer from the old freelist */
416 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
417  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
418  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
419 
420  b->ql.blink->ql.flink = b->ql.flink;
421  b->ql.flink->ql.blink = b->ql.blink;
422 }
423 
424 /* GET STATS -- check info on free list */
425 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
426  thr_data_t *thr = get_thr_data(th);
427  int bin;
428 
429  *total_free = *max_free = 0;
430 
431  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
432  bfhead_t *b, *best;
433 
434  best = &thr->freelist[bin];
435  b = best->ql.flink;
436 
437  while (b != &thr->freelist[bin]) {
438  *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
439  if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
440  best = b;
441 
442  /* Link to next buffer */
443  b = b->ql.flink;
444  }
445 
446  if (*max_free < best->bh.bb.bsize)
447  *max_free = best->bh.bb.bsize;
448  }
449 
450  if (*max_free > (bufsize)sizeof(bhead_t))
451  *max_free -= sizeof(bhead_t);
452 }
453 
454 /* BGET -- Allocate a buffer. */
455 static void *bget(kmp_info_t *th, bufsize requested_size) {
456  thr_data_t *thr = get_thr_data(th);
457  bufsize size = requested_size;
458  bfhead_t *b;
459  void *buf;
460  int compactseq = 0;
461  int use_blink = 0;
462  /* For BestFit */
463  bfhead_t *best;
464 
465  if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
466  return NULL;
467  }; // if
468 
469  __kmp_bget_dequeue(th); /* Release any queued buffers */
470 
471  if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
472  size = SizeQ;
473  }
474 #if defined(SizeQuant) && (SizeQuant > 1)
475  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
476 #endif
477 
478  size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
479  KMP_DEBUG_ASSERT(size >= 0);
480  KMP_DEBUG_ASSERT(size % SizeQuant == 0);
481 
482  use_blink = (thr->mode == bget_mode_lifo);
483 
484  /* If a compact function was provided in the call to bectl(), wrap
485  a loop around the allocation process to allow compaction to
486  intervene in case we don't find a suitable buffer in the chain. */
487 
488  for (;;) {
489  int bin;
490 
491  for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
492  /* Link to next buffer */
493  b = (use_blink ? thr->freelist[bin].ql.blink
494  : thr->freelist[bin].ql.flink);
495 
496  if (thr->mode == bget_mode_best) {
497  best = &thr->freelist[bin];
498 
499  /* Scan the free list searching for the first buffer big enough
500  to hold the requested size buffer. */
501  while (b != &thr->freelist[bin]) {
502  if (b->bh.bb.bsize >= (bufsize)size) {
503  if ((best == &thr->freelist[bin]) ||
504  (b->bh.bb.bsize < best->bh.bb.bsize)) {
505  best = b;
506  }
507  }
508 
509  /* Link to next buffer */
510  b = (use_blink ? b->ql.blink : b->ql.flink);
511  }
512  b = best;
513  }
514 
515  while (b != &thr->freelist[bin]) {
516  if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
517 
518  // Buffer is big enough to satisfy the request. Allocate it to the
519  // caller. We must decide whether the buffer is large enough to split
520  // into the part given to the caller and a free buffer that remains
521  // on the free list, or whether the entire buffer should be removed
522  // from the free list and given to the caller in its entirety. We
523  // only split the buffer if enough room remains for a header plus the
524  // minimum quantum of allocation.
525  if ((b->bh.bb.bsize - (bufsize)size) >
526  (bufsize)(SizeQ + (sizeof(bhead_t)))) {
527  bhead_t *ba, *bn;
528 
529  ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
530  bn = BH(((char *)ba) + size);
531 
532  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
533 
534  /* Subtract size from length of free block. */
535  b->bh.bb.bsize -= (bufsize)size;
536 
537  /* Link allocated buffer to the previous free buffer. */
538  ba->bb.prevfree = b->bh.bb.bsize;
539 
540  /* Plug negative size into user buffer. */
541  ba->bb.bsize = -size;
542 
543  /* Mark this buffer as owned by this thread. */
544  TCW_PTR(ba->bb.bthr,
545  th); // not an allocated address (do not mark it)
546  /* Mark buffer after this one not preceded by free block. */
547  bn->bb.prevfree = 0;
548 
549  // unlink buffer from old freelist, and reinsert into new freelist
550  __kmp_bget_remove_from_freelist(b);
551  __kmp_bget_insert_into_freelist(thr, b);
552 #if BufStats
553  thr->totalloc += (size_t)size;
554  thr->numget++; /* Increment number of bget() calls */
555 #endif
556  buf = (void *)((((char *)ba) + sizeof(bhead_t)));
557  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
558  return buf;
559  } else {
560  bhead_t *ba;
561 
562  ba = BH(((char *)b) + b->bh.bb.bsize);
563 
564  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
565 
566  /* The buffer isn't big enough to split. Give the whole
567  shebang to the caller and remove it from the free list. */
568 
569  __kmp_bget_remove_from_freelist(b);
570 #if BufStats
571  thr->totalloc += (size_t)b->bh.bb.bsize;
572  thr->numget++; /* Increment number of bget() calls */
573 #endif
574  /* Negate size to mark buffer allocated. */
575  b->bh.bb.bsize = -(b->bh.bb.bsize);
576 
577  /* Mark this buffer as owned by this thread. */
578  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
579  /* Zero the back pointer in the next buffer in memory
580  to indicate that this buffer is allocated. */
581  ba->bb.prevfree = 0;
582 
583  /* Give user buffer starting at queue links. */
584  buf = (void *)&(b->ql);
585  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
586  return buf;
587  }
588  }
589 
590  /* Link to next buffer */
591  b = (use_blink ? b->ql.blink : b->ql.flink);
592  }
593  }
594 
595  /* We failed to find a buffer. If there's a compact function defined,
596  notify it of the size requested. If it returns TRUE, try the allocation
597  again. */
598 
599  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
600  break;
601  }
602  }
603 
604  /* No buffer available with requested size free. */
605 
606  /* Don't give up yet -- look in the reserve supply. */
607  if (thr->acqfcn != 0) {
608  if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
609  /* Request is too large to fit in a single expansion block.
610  Try to satisy it by a direct buffer acquisition. */
611  bdhead_t *bdh;
612 
613  size += sizeof(bdhead_t) - sizeof(bhead_t);
614 
615  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
616 
617  /* richryan */
618  bdh = BDH((*thr->acqfcn)((bufsize)size));
619  if (bdh != NULL) {
620 
621  // Mark the buffer special by setting size field of its header to zero.
622  bdh->bh.bb.bsize = 0;
623 
624  /* Mark this buffer as owned by this thread. */
625  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
626  // because direct buffer never goes to free list
627  bdh->bh.bb.prevfree = 0;
628  bdh->tsize = size;
629 #if BufStats
630  thr->totalloc += (size_t)size;
631  thr->numget++; /* Increment number of bget() calls */
632  thr->numdget++; /* Direct bget() call count */
633 #endif
634  buf = (void *)(bdh + 1);
635  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
636  return buf;
637  }
638 
639  } else {
640 
641  /* Try to obtain a new expansion block */
642  void *newpool;
643 
644  KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
645 
646  /* richryan */
647  newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
648  KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
649  if (newpool != NULL) {
650  bpool(th, newpool, thr->exp_incr);
651  buf = bget(
652  th, requested_size); /* This can't, I say, can't get into a loop. */
653  return buf;
654  }
655  }
656  }
657 
658  /* Still no buffer available */
659 
660  return NULL;
661 }
662 
663 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
664  the entire contents of the buffer to zero, not just the
665  region requested by the caller. */
666 
667 static void *bgetz(kmp_info_t *th, bufsize size) {
668  char *buf = (char *)bget(th, size);
669 
670  if (buf != NULL) {
671  bhead_t *b;
672  bufsize rsize;
673 
674  b = BH(buf - sizeof(bhead_t));
675  rsize = -(b->bb.bsize);
676  if (rsize == 0) {
677  bdhead_t *bd;
678 
679  bd = BDH(buf - sizeof(bdhead_t));
680  rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
681  } else {
682  rsize -= sizeof(bhead_t);
683  }
684 
685  KMP_DEBUG_ASSERT(rsize >= size);
686 
687  (void)memset(buf, 0, (bufsize)rsize);
688  }
689  return ((void *)buf);
690 }
691 
692 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
693  simply in terms of brel() and bget(). It could be
694  enhanced to allow the buffer to grow into adjacent free
695  blocks and to avoid moving data unnecessarily. */
696 
697 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
698  void *nbuf;
699  bufsize osize; /* Old size of buffer */
700  bhead_t *b;
701 
702  nbuf = bget(th, size);
703  if (nbuf == NULL) { /* Acquire new buffer */
704  return NULL;
705  }
706  if (buf == NULL) {
707  return nbuf;
708  }
709  b = BH(((char *)buf) - sizeof(bhead_t));
710  osize = -b->bb.bsize;
711  if (osize == 0) {
712  /* Buffer acquired directly through acqfcn. */
713  bdhead_t *bd;
714 
715  bd = BDH(((char *)buf) - sizeof(bdhead_t));
716  osize = bd->tsize - (bufsize)sizeof(bdhead_t);
717  } else {
718  osize -= sizeof(bhead_t);
719  };
720 
721  KMP_DEBUG_ASSERT(osize > 0);
722 
723  (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
724  (size_t)((size < osize) ? size : osize));
725  brel(th, buf);
726 
727  return nbuf;
728 }
729 
730 /* BREL -- Release a buffer. */
731 static void brel(kmp_info_t *th, void *buf) {
732  thr_data_t *thr = get_thr_data(th);
733  bfhead_t *b, *bn;
734  kmp_info_t *bth;
735 
736  KMP_DEBUG_ASSERT(buf != NULL);
737  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
738 
739  b = BFH(((char *)buf) - sizeof(bhead_t));
740 
741  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
742  bdhead_t *bdh;
743 
744  bdh = BDH(((char *)buf) - sizeof(bdhead_t));
745  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
746 #if BufStats
747  thr->totalloc -= (size_t)bdh->tsize;
748  thr->numdrel++; /* Number of direct releases */
749  thr->numrel++; /* Increment number of brel() calls */
750 #endif /* BufStats */
751 #ifdef FreeWipe
752  (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
753 #endif /* FreeWipe */
754 
755  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
756 
757  KMP_DEBUG_ASSERT(thr->relfcn != 0);
758  (*thr->relfcn)((void *)bdh); /* Release it directly. */
759  return;
760  }
761 
762  bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
763  ~1); // clear possible mark before comparison
764  if (bth != th) {
765  /* Add this buffer to be released by the owning thread later */
766  __kmp_bget_enqueue(bth, buf
767 #ifdef USE_QUEUING_LOCK_FOR_BGET
768  ,
769  __kmp_gtid_from_thread(th)
770 #endif
771  );
772  return;
773  }
774 
775  /* Buffer size must be negative, indicating that the buffer is allocated. */
776  if (b->bh.bb.bsize >= 0) {
777  bn = NULL;
778  }
779  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
780 
781  /* Back pointer in next buffer must be zero, indicating the same thing: */
782 
783  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
784 
785 #if BufStats
786  thr->numrel++; /* Increment number of brel() calls */
787  thr->totalloc += (size_t)b->bh.bb.bsize;
788 #endif
789 
790  /* If the back link is nonzero, the previous buffer is free. */
791 
792  if (b->bh.bb.prevfree != 0) {
793  /* The previous buffer is free. Consolidate this buffer with it by adding
794  the length of this buffer to the previous free buffer. Note that we
795  subtract the size in the buffer being released, since it's negative to
796  indicate that the buffer is allocated. */
797  bufsize size = b->bh.bb.bsize;
798 
799  /* Make the previous buffer the one we're working on. */
800  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
801  b->bh.bb.prevfree);
802  b = BFH(((char *)b) - b->bh.bb.prevfree);
803  b->bh.bb.bsize -= size;
804 
805  /* unlink the buffer from the old freelist */
806  __kmp_bget_remove_from_freelist(b);
807  } else {
808  /* The previous buffer isn't allocated. Mark this buffer size as positive
809  (i.e. free) and fall through to place the buffer on the free list as an
810  isolated free block. */
811  b->bh.bb.bsize = -b->bh.bb.bsize;
812  }
813 
814  /* insert buffer back onto a new freelist */
815  __kmp_bget_insert_into_freelist(thr, b);
816 
817  /* Now we look at the next buffer in memory, located by advancing from
818  the start of this buffer by its size, to see if that buffer is
819  free. If it is, we combine this buffer with the next one in
820  memory, dechaining the second buffer from the free list. */
821  bn = BFH(((char *)b) + b->bh.bb.bsize);
822  if (bn->bh.bb.bsize > 0) {
823 
824  /* The buffer is free. Remove it from the free list and add
825  its size to that of our buffer. */
826  KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
827  bn->bh.bb.bsize);
828 
829  __kmp_bget_remove_from_freelist(bn);
830 
831  b->bh.bb.bsize += bn->bh.bb.bsize;
832 
833  /* unlink the buffer from the old freelist, and reinsert it into the new
834  * freelist */
835  __kmp_bget_remove_from_freelist(b);
836  __kmp_bget_insert_into_freelist(thr, b);
837 
838  /* Finally, advance to the buffer that follows the newly
839  consolidated free block. We must set its backpointer to the
840  head of the consolidated free block. We know the next block
841  must be an allocated block because the process of recombination
842  guarantees that two free blocks will never be contiguous in
843  memory. */
844  bn = BFH(((char *)b) + b->bh.bb.bsize);
845  }
846 #ifdef FreeWipe
847  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
848  (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
849 #endif
850  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
851 
852  /* The next buffer is allocated. Set the backpointer in it to point
853  to this buffer; the previous free buffer in memory. */
854 
855  bn->bh.bb.prevfree = b->bh.bb.bsize;
856 
857  /* If a block-release function is defined, and this free buffer
858  constitutes the entire block, release it. Note that pool_len
859  is defined in such a way that the test will fail unless all
860  pool blocks are the same size. */
861  if (thr->relfcn != 0 &&
862  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
863 #if BufStats
864  if (thr->numpblk !=
865  1) { /* Do not release the last buffer until finalization time */
866 #endif
867 
868  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
869  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
870  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
871  b->bh.bb.bsize);
872 
873  /* Unlink the buffer from the free list */
874  __kmp_bget_remove_from_freelist(b);
875 
876  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
877 
878  (*thr->relfcn)(b);
879 #if BufStats
880  thr->numprel++; /* Nr of expansion block releases */
881  thr->numpblk--; /* Total number of blocks */
882  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
883 
884  // avoid leaving stale last_pool pointer around if it is being dealloced
885  if (thr->last_pool == b)
886  thr->last_pool = 0;
887  } else {
888  thr->last_pool = b;
889  }
890 #endif /* BufStats */
891  }
892 }
893 
894 /* BECTL -- Establish automatic pool expansion control */
895 static void bectl(kmp_info_t *th, bget_compact_t compact,
896  bget_acquire_t acquire, bget_release_t release,
897  bufsize pool_incr) {
898  thr_data_t *thr = get_thr_data(th);
899 
900  thr->compfcn = compact;
901  thr->acqfcn = acquire;
902  thr->relfcn = release;
903  thr->exp_incr = pool_incr;
904 }
905 
906 /* BPOOL -- Add a region of memory to the buffer pool. */
907 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
908  /* int bin = 0; */
909  thr_data_t *thr = get_thr_data(th);
910  bfhead_t *b = BFH(buf);
911  bhead_t *bn;
912 
913  __kmp_bget_dequeue(th); /* Release any queued buffers */
914 
915 #ifdef SizeQuant
916  len &= ~(SizeQuant - 1);
917 #endif
918  if (thr->pool_len == 0) {
919  thr->pool_len = len;
920  } else if (len != thr->pool_len) {
921  thr->pool_len = -1;
922  }
923 #if BufStats
924  thr->numpget++; /* Number of block acquisitions */
925  thr->numpblk++; /* Number of blocks total */
926  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
927 #endif /* BufStats */
928 
929  /* Since the block is initially occupied by a single free buffer,
930  it had better not be (much) larger than the largest buffer
931  whose size we can store in bhead.bb.bsize. */
932  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
933 
934  /* Clear the backpointer at the start of the block to indicate that
935  there is no free block prior to this one. That blocks
936  recombination when the first block in memory is released. */
937  b->bh.bb.prevfree = 0;
938 
939  /* Create a dummy allocated buffer at the end of the pool. This dummy
940  buffer is seen when a buffer at the end of the pool is released and
941  blocks recombination of the last buffer with the dummy buffer at
942  the end. The length in the dummy buffer is set to the largest
943  negative number to denote the end of the pool for diagnostic
944  routines (this specific value is not counted on by the actual
945  allocation and release functions). */
946  len -= sizeof(bhead_t);
947  b->bh.bb.bsize = (bufsize)len;
948  /* Set the owner of this buffer */
949  TCW_PTR(b->bh.bb.bthr,
950  (kmp_info_t *)((kmp_uintptr_t)th |
951  1)); // mark the buffer as allocated address
952 
953  /* Chain the new block to the free list. */
954  __kmp_bget_insert_into_freelist(thr, b);
955 
956 #ifdef FreeWipe
957  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
958  (size_t)(len - sizeof(bfhead_t)));
959 #endif
960  bn = BH(((char *)b) + len);
961  bn->bb.prevfree = (bufsize)len;
962  /* Definition of ESent assumes two's complement! */
963  KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
964 
965  bn->bb.bsize = ESent;
966 }
967 
968 /* BFREED -- Dump the free lists for this thread. */
969 static void bfreed(kmp_info_t *th) {
970  int bin = 0, count = 0;
971  int gtid = __kmp_gtid_from_thread(th);
972  thr_data_t *thr = get_thr_data(th);
973 
974 #if BufStats
975  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
976  " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
977  " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
978  " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
979  " drel=%" KMP_INT64_SPEC "\n",
980  gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
981  (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
982  (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
983  (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
984 #endif
985 
986  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
987  bfhead_t *b;
988 
989  for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
990  b = b->ql.flink) {
991  bufsize bs = b->bh.bb.bsize;
992 
993  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
994  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
995  KMP_DEBUG_ASSERT(bs > 0);
996 
997  count += 1;
998 
999  __kmp_printf_no_lock(
1000  "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
1001  (long)bs);
1002 #ifdef FreeWipe
1003  {
1004  char *lerr = ((char *)b) + sizeof(bfhead_t);
1005  if ((bs > sizeof(bfhead_t)) &&
1006  ((*lerr != 0x55) ||
1007  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1008  0))) {
1009  __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
1010  "free block have been overstored.)\n",
1011  gtid);
1012  }
1013  }
1014 #endif
1015  }
1016  }
1017 
1018  if (count == 0)
1019  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
1020 }
1021 
1022 #ifdef KMP_DEBUG
1023 
1024 #if BufStats
1025 
1026 /* BSTATS -- Return buffer allocation free space statistics. */
1027 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
1028  bufsize *maxfree, long *nget, long *nrel) {
1029  int bin = 0;
1030  thr_data_t *thr = get_thr_data(th);
1031 
1032  *nget = thr->numget;
1033  *nrel = thr->numrel;
1034  *curalloc = (bufsize)thr->totalloc;
1035  *totfree = 0;
1036  *maxfree = -1;
1037 
1038  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1039  bfhead_t *b = thr->freelist[bin].ql.flink;
1040 
1041  while (b != &thr->freelist[bin]) {
1042  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1043  *totfree += b->bh.bb.bsize;
1044  if (b->bh.bb.bsize > *maxfree) {
1045  *maxfree = b->bh.bb.bsize;
1046  }
1047  b = b->ql.flink; /* Link to next buffer */
1048  }
1049  }
1050 }
1051 
1052 /* BSTATSE -- Return extended statistics */
1053 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
1054  long *npget, long *nprel, long *ndget, long *ndrel) {
1055  thr_data_t *thr = get_thr_data(th);
1056 
1057  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1058  *npool = thr->numpblk;
1059  *npget = thr->numpget;
1060  *nprel = thr->numprel;
1061  *ndget = thr->numdget;
1062  *ndrel = thr->numdrel;
1063 }
1064 
1065 #endif /* BufStats */
1066 
1067 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1068  data pointer, and backs up to the buffer header. It will
1069  dump either a free block or an allocated one. */
1070 static void bufdump(kmp_info_t *th, void *buf) {
1071  bfhead_t *b;
1072  unsigned char *bdump;
1073  bufsize bdlen;
1074 
1075  b = BFH(((char *)buf) - sizeof(bhead_t));
1076  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1077  if (b->bh.bb.bsize < 0) {
1078  bdump = (unsigned char *)buf;
1079  bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t);
1080  } else {
1081  bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t));
1082  bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t);
1083  }
1084 
1085  while (bdlen > 0) {
1086  int i, dupes = 0;
1087  bufsize l = bdlen;
1088  char bhex[50], bascii[20];
1089 
1090  if (l > 16) {
1091  l = 16;
1092  }
1093 
1094  for (i = 0; i < l; i++) {
1095  (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1096  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1097  bascii[i] = bdump[i];
1098  else
1099  bascii[i] = ' ';
1100  }
1101  bascii[i] = 0;
1102  (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1103  bdump += l;
1104  bdlen -= l;
1105  while ((bdlen > 16) &&
1106  (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) {
1107  dupes++;
1108  bdump += 16;
1109  bdlen -= 16;
1110  }
1111  if (dupes > 1) {
1112  (void)__kmp_printf_no_lock(
1113  " (%d lines [%d bytes] identical to above line skipped)\n", dupes,
1114  dupes * 16);
1115  } else if (dupes == 1) {
1116  bdump -= 16;
1117  bdlen += 16;
1118  }
1119  }
1120 }
1121 
1122 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1123  If DUMPALLOC is nonzero, the contents of allocated buffers
1124  are dumped. If DUMPFREE is nonzero, free blocks are
1125  dumped as well. If FreeWipe checking is enabled, free
1126  blocks which have been clobbered will always be dumped. */
1127 static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) {
1128  bfhead_t *b = BFH((char *)buf - sizeof(bhead_t));
1129 
1130  while (b->bh.bb.bsize != ESent) {
1131  bufsize bs = b->bh.bb.bsize;
1132 
1133  if (bs < 0) {
1134  bs = -bs;
1135  (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n",
1136  (long)bs);
1137  if (dumpalloc) {
1138  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1139  }
1140  } else {
1141  const char *lerr = "";
1142 
1143  KMP_DEBUG_ASSERT(bs > 0);
1144  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1145  lerr = " (Bad free list links)";
1146  }
1147  (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1148  (long)bs, lerr);
1149 #ifdef FreeWipe
1150  lerr = ((char *)b) + sizeof(bfhead_t);
1151  if ((bs > sizeof(bfhead_t)) &&
1152  ((*lerr != 0x55) ||
1153  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1154  0))) {
1155  (void)__kmp_printf_no_lock(
1156  "(Contents of above free block have been overstored.)\n");
1157  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1158  } else
1159 #endif
1160  if (dumpfree) {
1161  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1162  }
1163  }
1164  b = BFH(((char *)b) + bs);
1165  }
1166 }
1167 
1168 /* BPOOLV -- Validate a buffer pool. */
1169 static int bpoolv(kmp_info_t *th, void *buf) {
1170  bfhead_t *b = BFH(buf);
1171 
1172  while (b->bh.bb.bsize != ESent) {
1173  bufsize bs = b->bh.bb.bsize;
1174 
1175  if (bs < 0) {
1176  bs = -bs;
1177  } else {
1178 #ifdef FreeWipe
1179  char *lerr = "";
1180 #endif
1181 
1182  KMP_DEBUG_ASSERT(bs > 0);
1183  if (bs <= 0) {
1184  return 0;
1185  }
1186  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1187  (void)__kmp_printf_no_lock(
1188  "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs);
1189  KMP_DEBUG_ASSERT(0);
1190  return 0;
1191  }
1192 #ifdef FreeWipe
1193  lerr = ((char *)b) + sizeof(bfhead_t);
1194  if ((bs > sizeof(bfhead_t)) &&
1195  ((*lerr != 0x55) ||
1196  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1197  0))) {
1198  (void)__kmp_printf_no_lock(
1199  "(Contents of above free block have been overstored.)\n");
1200  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1201  KMP_DEBUG_ASSERT(0);
1202  return 0;
1203  }
1204 #endif /* FreeWipe */
1205  }
1206  b = BFH(((char *)b) + bs);
1207  }
1208  return 1;
1209 }
1210 
1211 #endif /* KMP_DEBUG */
1212 
1213 void __kmp_initialize_bget(kmp_info_t *th) {
1214  KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1215 
1216  set_thr_data(th);
1217 
1218  bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1219  (bufsize)__kmp_malloc_pool_incr);
1220 }
1221 
1222 void __kmp_finalize_bget(kmp_info_t *th) {
1223  thr_data_t *thr;
1224  bfhead_t *b;
1225 
1226  KMP_DEBUG_ASSERT(th != 0);
1227 
1228 #if BufStats
1229  thr = (thr_data_t *)th->th.th_local.bget_data;
1230  KMP_DEBUG_ASSERT(thr != NULL);
1231  b = thr->last_pool;
1232 
1233  /* If a block-release function is defined, and this free buffer constitutes
1234  the entire block, release it. Note that pool_len is defined in such a way
1235  that the test will fail unless all pool blocks are the same size. */
1236 
1237  // Deallocate the last pool if one exists because we no longer do it in brel()
1238  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1239  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1240  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1241  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1242  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1243  b->bh.bb.bsize);
1244 
1245  /* Unlink the buffer from the free list */
1246  __kmp_bget_remove_from_freelist(b);
1247 
1248  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1249 
1250  (*thr->relfcn)(b);
1251  thr->numprel++; /* Nr of expansion block releases */
1252  thr->numpblk--; /* Total number of blocks */
1253  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1254  }
1255 #endif /* BufStats */
1256 
1257  /* Deallocate bget_data */
1258  if (th->th.th_local.bget_data != NULL) {
1259  __kmp_free(th->th.th_local.bget_data);
1260  th->th.th_local.bget_data = NULL;
1261  }; // if
1262 }
1263 
1264 void kmpc_set_poolsize(size_t size) {
1265  bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1266  (bget_release_t)free, (bufsize)size);
1267 }
1268 
1269 size_t kmpc_get_poolsize(void) {
1270  thr_data_t *p;
1271 
1272  p = get_thr_data(__kmp_get_thread());
1273 
1274  return p->exp_incr;
1275 }
1276 
1277 void kmpc_set_poolmode(int mode) {
1278  thr_data_t *p;
1279 
1280  if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1281  mode == bget_mode_best) {
1282  p = get_thr_data(__kmp_get_thread());
1283  p->mode = (bget_mode_t)mode;
1284  }
1285 }
1286 
1287 int kmpc_get_poolmode(void) {
1288  thr_data_t *p;
1289 
1290  p = get_thr_data(__kmp_get_thread());
1291 
1292  return p->mode;
1293 }
1294 
1295 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1296  kmp_info_t *th = __kmp_get_thread();
1297  bufsize a, b;
1298 
1299  __kmp_bget_dequeue(th); /* Release any queued buffers */
1300 
1301  bcheck(th, &a, &b);
1302 
1303  *maxmem = a;
1304  *allmem = b;
1305 }
1306 
1307 void kmpc_poolprint(void) {
1308  kmp_info_t *th = __kmp_get_thread();
1309 
1310  __kmp_bget_dequeue(th); /* Release any queued buffers */
1311 
1312  bfreed(th);
1313 }
1314 
1315 #endif // #if KMP_USE_BGET
1316 
1317 void *kmpc_malloc(size_t size) {
1318  void *ptr;
1319  ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1320  if (ptr != NULL) {
1321  // save allocated pointer just before one returned to user
1322  *(void **)ptr = ptr;
1323  ptr = (void **)ptr + 1;
1324  }
1325  return ptr;
1326 }
1327 
1328 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1329 
1330 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1331  void *ptr;
1332  void *ptr_allocated;
1333  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1334  if (!IS_POWER_OF_TWO(alignment)) {
1335  // AC: do we need to issue a warning here?
1336  errno = EINVAL;
1337  return NULL;
1338  }
1339  size = size + sizeof(void *) + alignment;
1340  ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1341  if (ptr_allocated != NULL) {
1342  // save allocated pointer just before one returned to user
1343  ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1344  ~(alignment - 1));
1345  *((void **)ptr - 1) = ptr_allocated;
1346  } else {
1347  ptr = NULL;
1348  }
1349  return ptr;
1350 }
1351 
1352 void *kmpc_calloc(size_t nelem, size_t elsize) {
1353  void *ptr;
1354  ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1355  if (ptr != NULL) {
1356  // save allocated pointer just before one returned to user
1357  *(void **)ptr = ptr;
1358  ptr = (void **)ptr + 1;
1359  }
1360  return ptr;
1361 }
1362 
1363 void *kmpc_realloc(void *ptr, size_t size) {
1364  void *result = NULL;
1365  if (ptr == NULL) {
1366  // If pointer is NULL, realloc behaves like malloc.
1367  result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1368  // save allocated pointer just before one returned to user
1369  if (result != NULL) {
1370  *(void **)result = result;
1371  result = (void **)result + 1;
1372  }
1373  } else if (size == 0) {
1374  // If size is 0, realloc behaves like free.
1375  // The thread must be registered by the call to kmpc_malloc() or
1376  // kmpc_calloc() before.
1377  // So it should be safe to call __kmp_get_thread(), not
1378  // __kmp_entry_thread().
1379  KMP_ASSERT(*((void **)ptr - 1));
1380  brel(__kmp_get_thread(), *((void **)ptr - 1));
1381  } else {
1382  result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1383  (bufsize)(size + sizeof(ptr)));
1384  if (result != NULL) {
1385  *(void **)result = result;
1386  result = (void **)result + 1;
1387  }
1388  }; // if
1389  return result;
1390 }
1391 
1392 // NOTE: the library must have already been initialized by a previous allocate
1393 void kmpc_free(void *ptr) {
1394  if (!__kmp_init_serial) {
1395  return;
1396  }; // if
1397  if (ptr != NULL) {
1398  kmp_info_t *th = __kmp_get_thread();
1399  __kmp_bget_dequeue(th); /* Release any queued buffers */
1400  // extract allocated pointer and free it
1401  KMP_ASSERT(*((void **)ptr - 1));
1402  brel(th, *((void **)ptr - 1));
1403  };
1404 }
1405 
1406 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1407  void *ptr;
1408  KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1409  (int)size KMP_SRC_LOC_PARM));
1410  ptr = bget(th, (bufsize)size);
1411  KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1412  return ptr;
1413 }
1414 
1415 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1416  size_t elsize KMP_SRC_LOC_DECL) {
1417  void *ptr;
1418  KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1419  (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1420  ptr = bgetz(th, (bufsize)(nelem * elsize));
1421  KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1422  return ptr;
1423 }
1424 
1425 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1426  size_t size KMP_SRC_LOC_DECL) {
1427  KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1428  ptr, (int)size KMP_SRC_LOC_PARM));
1429  ptr = bgetr(th, ptr, (bufsize)size);
1430  KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1431  return ptr;
1432 }
1433 
1434 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1435  KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1436  ptr KMP_SRC_LOC_PARM));
1437  if (ptr != NULL) {
1438  __kmp_bget_dequeue(th); /* Release any queued buffers */
1439  brel(th, ptr);
1440  }
1441  KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1442 }
1443 
1444 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1445  memory leaks, but it may be useful for debugging memory corruptions, used
1446  freed pointers, etc. */
1447 /* #define LEAK_MEMORY */
1448 struct kmp_mem_descr { // Memory block descriptor.
1449  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1450  size_t size_allocated; // Size of allocated memory block.
1451  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1452  size_t size_aligned; // Size of aligned memory block.
1453 };
1454 typedef struct kmp_mem_descr kmp_mem_descr_t;
1455 
1456 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1457  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1458  error. Must use __kmp_free when freeing memory allocated by this routine! */
1459 static void *___kmp_allocate_align(size_t size,
1460  size_t alignment KMP_SRC_LOC_DECL) {
1461  /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1462  requested to return properly aligned pointer. Original pointer returned
1463  by malloc() and size of allocated block is saved in descriptor just
1464  before the aligned pointer. This information used by __kmp_free() -- it
1465  has to pass to free() original pointer, not aligned one.
1466 
1467  +---------+------------+-----------------------------------+---------+
1468  | padding | descriptor | aligned block | padding |
1469  +---------+------------+-----------------------------------+---------+
1470  ^ ^
1471  | |
1472  | +- Aligned pointer returned to caller
1473  +- Pointer returned by malloc()
1474 
1475  Aligned block is filled with zeros, paddings are filled with 0xEF. */
1476 
1477  kmp_mem_descr_t descr;
1478  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1479  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1480  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1481 
1482  KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1483  (int)size, (int)alignment KMP_SRC_LOC_PARM));
1484 
1485  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1486  KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1487  // Make sure kmp_uintptr_t is enough to store addresses.
1488 
1489  descr.size_aligned = size;
1490  descr.size_allocated =
1491  descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1492 
1493 #if KMP_DEBUG
1494  descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1495 #else
1496  descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1497 #endif
1498  KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1499  descr.ptr_allocated));
1500  if (descr.ptr_allocated == NULL) {
1501  KMP_FATAL(OutOfHeapMemory);
1502  };
1503 
1504  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1505  addr_aligned =
1506  (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1507  addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1508 
1509  descr.ptr_aligned = (void *)addr_aligned;
1510 
1511  KE_TRACE(26, (" ___kmp_allocate_align: "
1512  "ptr_allocated=%p, size_allocated=%d, "
1513  "ptr_aligned=%p, size_aligned=%d\n",
1514  descr.ptr_allocated, (int)descr.size_allocated,
1515  descr.ptr_aligned, (int)descr.size_aligned));
1516 
1517  KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1518  KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1519  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1520  addr_allocated + descr.size_allocated);
1521  KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1522 #ifdef KMP_DEBUG
1523  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1524 // Fill allocated memory block with 0xEF.
1525 #endif
1526  memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1527  // Fill the aligned memory block (which is intended for using by caller) with
1528  // 0x00. Do not
1529  // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1530  // memory. (Padding
1531  // bytes remain filled with 0xEF in debugging library.)
1532  *((kmp_mem_descr_t *)addr_descr) = descr;
1533 
1534  KMP_MB();
1535 
1536  KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1537  return descr.ptr_aligned;
1538 } // func ___kmp_allocate_align
1539 
1540 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1541  Do not call this func directly! Use __kmp_allocate macro instead.
1542  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1543  error. Must use __kmp_free when freeing memory allocated by this routine! */
1544 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1545  void *ptr;
1546  KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1547  (int)size KMP_SRC_LOC_PARM));
1548  ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1549  KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1550  return ptr;
1551 } // func ___kmp_allocate
1552 
1553 #if (BUILD_MEMORY == FIRST_TOUCH)
1554 void *__kmp_ft_page_allocate(size_t size) {
1555  void *adr, *aadr;
1556 
1557  const int page_size = KMP_GET_PAGE_SIZE();
1558 
1559  adr = (void *)__kmp_thread_malloc(__kmp_get_thread(),
1560  size + page_size + KMP_PTR_SKIP);
1561  if (adr == 0)
1562  KMP_FATAL(OutOfHeapMemory);
1563 
1564  /* check to see if adr is on a page boundary. */
1565  if (((kmp_uintptr_t)adr & (page_size - 1)) == 0)
1566  /* nothing to do if adr is already on a page boundary. */
1567  aadr = adr;
1568  else
1569  /* else set aadr to the first page boundary in the allocated memory. */
1570  aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1));
1571 
1572  /* the first touch by the owner thread. */
1573  *((void **)aadr) = adr;
1574 
1575  /* skip the memory space used for storing adr above. */
1576  return (void *)((char *)aadr + KMP_PTR_SKIP);
1577 }
1578 #endif
1579 
1580 /* Allocate memory on page boundary, fill allocated memory with 0x00.
1581  Does not call this func directly! Use __kmp_page_allocate macro instead.
1582  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1583  error. Must use __kmp_free when freeing memory allocated by this routine! */
1584 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1585  int page_size = 8 * 1024;
1586  void *ptr;
1587 
1588  KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1589  (int)size KMP_SRC_LOC_PARM));
1590  ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1591  KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1592  return ptr;
1593 } // ___kmp_page_allocate
1594 
1595 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1596  In debug mode, fill the memory block with 0xEF before call to free(). */
1597 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1598  kmp_mem_descr_t descr;
1599  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1600  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1601 
1602  KE_TRACE(25,
1603  ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1604  KMP_ASSERT(ptr != NULL);
1605 
1606  descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
1607 
1608  KE_TRACE(26, (" __kmp_free: "
1609  "ptr_allocated=%p, size_allocated=%d, "
1610  "ptr_aligned=%p, size_aligned=%d\n",
1611  descr.ptr_allocated, (int)descr.size_allocated,
1612  descr.ptr_aligned, (int)descr.size_aligned));
1613 
1614  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1615  addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1616 
1617  KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1618  KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1619  KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1620  KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1621  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1622  addr_allocated + descr.size_allocated);
1623 
1624 #ifdef KMP_DEBUG
1625  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1626 // Fill memory block with 0xEF, it helps catch using freed memory.
1627 #endif
1628 
1629 #ifndef LEAK_MEMORY
1630  KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
1631 #ifdef KMP_DEBUG
1632  _free_src_loc(descr.ptr_allocated, _file_, _line_);
1633 #else
1634  free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1635 #endif
1636 #endif
1637  KMP_MB();
1638  KE_TRACE(25, ("<- __kmp_free() returns\n"));
1639 } // func ___kmp_free
1640 
1641 #if USE_FAST_MEMORY == 3
1642 // Allocate fast memory by first scanning the thread's free lists
1643 // If a chunk the right size exists, grab it off the free list.
1644 // Otherwise allocate normally using kmp_thread_malloc.
1645 
1646 // AC: How to choose the limit? Just get 16 for now...
1647 #define KMP_FREE_LIST_LIMIT 16
1648 
1649 // Always use 128 bytes for determining buckets for caching memory blocks
1650 #define DCACHE_LINE 128
1651 
1652 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1653  void *ptr;
1654  int num_lines;
1655  int idx;
1656  int index;
1657  void *alloc_ptr;
1658  size_t alloc_size;
1659  kmp_mem_descr_t *descr;
1660 
1661  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1662  __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
1663 
1664  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1665  idx = num_lines - 1;
1666  KMP_DEBUG_ASSERT(idx >= 0);
1667  if (idx < 2) {
1668  index = 0; // idx is [ 0, 1 ], use first free list
1669  num_lines = 2; // 1, 2 cache lines or less than cache line
1670  } else if ((idx >>= 2) == 0) {
1671  index = 1; // idx is [ 2, 3 ], use second free list
1672  num_lines = 4; // 3, 4 cache lines
1673  } else if ((idx >>= 2) == 0) {
1674  index = 2; // idx is [ 4, 15 ], use third free list
1675  num_lines = 16; // 5, 6, ..., 16 cache lines
1676  } else if ((idx >>= 2) == 0) {
1677  index = 3; // idx is [ 16, 63 ], use fourth free list
1678  num_lines = 64; // 17, 18, ..., 64 cache lines
1679  } else {
1680  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1681  }
1682 
1683  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1684  if (ptr != NULL) {
1685  // pop the head of no-sync free list
1686  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1687  KMP_DEBUG_ASSERT(
1688  this_thr ==
1689  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1690  ->ptr_aligned);
1691  goto end;
1692  };
1693  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1694  if (ptr != NULL) {
1695  // no-sync free list is empty, use sync free list (filled in by other
1696  // threads only)
1697  // pop the head of the sync free list, push NULL instead
1698  while (!KMP_COMPARE_AND_STORE_PTR(
1699  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
1700  KMP_CPU_PAUSE();
1701  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1702  }
1703  // push the rest of chain into no-sync free list (can be NULL if there was
1704  // the only block)
1705  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1706  KMP_DEBUG_ASSERT(
1707  this_thr ==
1708  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1709  ->ptr_aligned);
1710  goto end;
1711  }
1712 
1713 alloc_call:
1714  // haven't found block in the free lists, thus allocate it
1715  size = num_lines * DCACHE_LINE;
1716 
1717  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1718  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1719  "alloc_size %d\n",
1720  __kmp_gtid_from_thread(this_thr), alloc_size));
1721  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1722 
1723  // align ptr to DCACHE_LINE
1724  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1725  DCACHE_LINE) &
1726  ~(DCACHE_LINE - 1));
1727  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1728 
1729  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1730  // we don't need size_allocated
1731  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1732  // (it is already saved in bget buffer,
1733  // but we may want to use another allocator in future)
1734  descr->size_aligned = size;
1735 
1736 end:
1737  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1738  __kmp_gtid_from_thread(this_thr), ptr));
1739  return ptr;
1740 } // func __kmp_fast_allocate
1741 
1742 // Free fast memory and place it on the thread's free list if it is of
1743 // the correct size.
1744 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1745  kmp_mem_descr_t *descr;
1746  kmp_info_t *alloc_thr;
1747  size_t size;
1748  size_t idx;
1749  int index;
1750 
1751  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1752  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1753  KMP_ASSERT(ptr != NULL);
1754 
1755  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1756 
1757  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1758  (int)descr->size_aligned));
1759 
1760  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1761 
1762  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1763  if (idx == size) {
1764  index = 0; // 2 cache lines
1765  } else if ((idx <<= 1) == size) {
1766  index = 1; // 4 cache lines
1767  } else if ((idx <<= 2) == size) {
1768  index = 2; // 16 cache lines
1769  } else if ((idx <<= 2) == size) {
1770  index = 3; // 64 cache lines
1771  } else {
1772  KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1773  goto free_call; // 65 or more cache lines ( > 8KB )
1774  }
1775 
1776  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1777  if (alloc_thr == this_thr) {
1778  // push block to self no-sync free list, linking previous head (LIFO)
1779  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1780  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1781  } else {
1782  void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1783  if (head == NULL) {
1784  // Create new free list
1785  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1786  *((void **)ptr) = NULL; // mark the tail of the list
1787  descr->size_allocated = (size_t)1; // head of the list keeps its length
1788  } else {
1789  // need to check existed "other" list's owner thread and size of queue
1790  kmp_mem_descr_t *dsc =
1791  (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1792  // allocating thread, same for all queue nodes
1793  kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1794  size_t q_sz =
1795  dsc->size_allocated + 1; // new size in case we add current task
1796  if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1797  // we can add current task to "other" list, no sync needed
1798  *((void **)ptr) = head;
1799  descr->size_allocated = q_sz;
1800  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1801  } else {
1802  // either queue blocks owner is changing or size limit exceeded
1803  // return old queue to allocating thread (q_th) synchroneously,
1804  // and start new list for alloc_thr's tasks
1805  void *old_ptr;
1806  void *tail = head;
1807  void *next = *((void **)head);
1808  while (next != NULL) {
1809  KMP_DEBUG_ASSERT(
1810  // queue size should decrease by 1 each step through the list
1811  ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1812  ->size_allocated +
1813  1 ==
1814  ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1815  ->size_allocated);
1816  tail = next; // remember tail node
1817  next = *((void **)next);
1818  }
1819  KMP_DEBUG_ASSERT(q_th != NULL);
1820  // push block to owner's sync free list
1821  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1822  /* the next pointer must be set before setting free_list to ptr to avoid
1823  exposing a broken list to other threads, even for an instant. */
1824  *((void **)tail) = old_ptr;
1825 
1826  while (!KMP_COMPARE_AND_STORE_PTR(
1827  &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1828  KMP_CPU_PAUSE();
1829  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1830  *((void **)tail) = old_ptr;
1831  }
1832 
1833  // start new list of not-selt tasks
1834  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1835  *((void **)ptr) = NULL;
1836  descr->size_allocated = (size_t)1; // head of queue keeps its length
1837  }
1838  }
1839  }
1840  goto end;
1841 
1842 free_call:
1843  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1844  __kmp_gtid_from_thread(this_thr), size));
1845  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
1846  brel(this_thr, descr->ptr_allocated);
1847 
1848 end:
1849  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
1850 
1851 } // func __kmp_fast_free
1852 
1853 // Initialize the thread free lists related to fast memory
1854 // Only do this when a thread is initially created.
1855 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1856  KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1857 
1858  memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
1859 }
1860 
1861 // Free the memory in the thread free lists related to fast memory
1862 // Only do this when a thread is being reaped (destroyed).
1863 void __kmp_free_fast_memory(kmp_info_t *th) {
1864  // Suppose we use BGET underlying allocator, walk through its structures...
1865  int bin;
1866  thr_data_t *thr = get_thr_data(th);
1867  void **lst = NULL;
1868 
1869  KE_TRACE(
1870  5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1871 
1872  __kmp_bget_dequeue(th); // Release any queued buffers
1873 
1874  // Dig through free lists and extract all allocated blocks
1875  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1876  bfhead_t *b = thr->freelist[bin].ql.flink;
1877  while (b != &thr->freelist[bin]) {
1878  if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
1879  *((void **)b) =
1880  lst; // link the list (override bthr, but keep flink yet)
1881  lst = (void **)b; // push b into lst
1882  }
1883  b = b->ql.flink; // get next buffer
1884  }
1885  }
1886  while (lst != NULL) {
1887  void *next = *lst;
1888  KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1889  lst, next, th, __kmp_gtid_from_thread(th)));
1890  (*thr->relfcn)(lst);
1891 #if BufStats
1892  // count blocks to prevent problems in __kmp_finalize_bget()
1893  thr->numprel++; /* Nr of expansion block releases */
1894  thr->numpblk--; /* Total number of blocks */
1895 #endif
1896  lst = (void **)next;
1897  }
1898 
1899  KE_TRACE(
1900  5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1901 }
1902 
1903 #endif // USE_FAST_MEMORY