Kokkos Core Kernels Package  Version of the Day
Kokkos_MemoryPool.hpp
1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 // Kokkos v. 2.0
6 // Copyright (2014) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
39 //
40 // ************************************************************************
41 //@HEADER
42 */
43 
44 #ifndef KOKKOS_MEMORYPOOL_HPP
45 #define KOKKOS_MEMORYPOOL_HPP
46 
47 #include <Kokkos_Core_fwd.hpp>
48 #include <Kokkos_Parallel.hpp>
49 #include <Kokkos_Atomic.hpp>
50 #include <impl/Kokkos_ConcurrentBitset.hpp>
51 #include <impl/Kokkos_Error.hpp>
52 #include <impl/Kokkos_SharedAlloc.hpp>
53 
54 namespace Kokkos {
55 
56 template< typename DeviceType >
57 class MemoryPool {
58 private:
59 
60  typedef typename Kokkos::Impl::concurrent_bitset CB ;
61 
62  enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 };
63  enum : uint32_t { state_shift = CB::state_shift };
64  enum : uint32_t { state_used_mask = CB::state_used_mask };
65  enum : uint32_t { state_header_mask = CB::state_header_mask };
66  enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 };
67  enum : uint32_t { max_bit_count = CB::max_bit_count };
68 
69  /* Defaults for min block, max block, and superblock sizes */
70  enum : uint32_t { MIN_BLOCK_SIZE_LG2 = 6 /* 64 bytes */ };
71  enum : uint32_t { MAX_BLOCK_SIZE_LG2 = 12 /* 4k bytes */ };
72  enum : uint32_t { SUPERBLOCK_SIZE_LG2 = 16 /* 64k bytes */ };
73 
74  enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 };
75 
76  /* Each superblock has a concurrent bitset state
77  * which is an array of uint32_t integers.
78  * [ { block_count_lg2 : state_shift bits
79  * , used_block_count : ( 32 - state_shift ) bits
80  * }
81  * , { block allocation bit set }* ]
82  *
83  * As superblocks are assigned (allocated) to a block size
84  * and released (deallocated) back to empty the superblock state
85  * is concurrently updated.
86  */
87 
88  typedef typename DeviceType::memory_space base_memory_space ;
89 
90  enum { accessible =
92  , base_memory_space >::accessible };
93 
94  typedef Kokkos::Impl::SharedAllocationTracker Tracker ;
95  typedef Kokkos::Impl::SharedAllocationRecord
96  < base_memory_space > Record ;
97 
98  Tracker m_tracker ;
99  uint32_t * m_sb_state_array ;
100  uint32_t m_sb_state_size ;
101  uint32_t m_sb_size_lg2 ;
102  uint32_t m_max_block_size_lg2 ;
103  uint32_t m_min_block_size_lg2 ;
104  int32_t m_sb_count ;
105  int32_t m_hint_offset ; // Offset to K * #block_size array of hints
106  int32_t m_data_offset ; // Offset to 0th superblock data
107  int32_t m_unused_padding ;
108 
109 public:
110 
111  //--------------------------------------------------------------------------
112 
113  KOKKOS_INLINE_FUNCTION
114  size_t capacity() const noexcept
115  { return size_t(m_sb_count) << m_sb_size_lg2 ; }
116 
117  KOKKOS_INLINE_FUNCTION
118  size_t min_block_size() const noexcept
119  { return ( 1LU << m_min_block_size_lg2 ); }
120 
121  KOKKOS_INLINE_FUNCTION
122  size_t max_block_size() const noexcept
123  { return ( 1LU << m_max_block_size_lg2 ); }
124 
125  struct usage_statistics {
126  size_t capacity_bytes ;
127  size_t superblock_bytes ;
128  size_t max_block_bytes ;
129  size_t min_block_bytes ;
130  size_t capacity_superblocks ;
131  size_t consumed_superblocks ;
132  size_t consumed_blocks ;
133  size_t consumed_bytes ;
134  size_t reserved_blocks ;
135  size_t reserved_bytes ;
136  };
137 
138  void get_usage_statistics( usage_statistics & stats ) const
139  {
140  Kokkos::HostSpace host ;
141 
142  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
143 
144  uint32_t * const sb_state_array =
145  accessible ? m_sb_state_array : (uint32_t *) host.allocate(alloc_size);
146 
147  if ( ! accessible ) {
148  Kokkos::Impl::DeepCopy< Kokkos::HostSpace , base_memory_space >
149  ( sb_state_array , m_sb_state_array , alloc_size );
150  }
151 
152  stats.superblock_bytes = ( 1LU << m_sb_size_lg2 );
153  stats.max_block_bytes = ( 1LU << m_max_block_size_lg2 );
154  stats.min_block_bytes = ( 1LU << m_min_block_size_lg2 );
155  stats.capacity_bytes = stats.superblock_bytes * m_sb_count ;
156  stats.capacity_superblocks = m_sb_count ;
157  stats.consumed_superblocks = 0 ;
158  stats.consumed_blocks = 0 ;
159  stats.consumed_bytes = 0 ;
160  stats.reserved_blocks = 0 ;
161  stats.reserved_bytes = 0 ;
162 
163  const uint32_t * sb_state_ptr = sb_state_array ;
164 
165  for ( int32_t i = 0 ; i < m_sb_count
166  ; ++i , sb_state_ptr += m_sb_state_size ) {
167 
168  const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift ;
169 
170  if ( block_count_lg2 ) {
171  const uint32_t block_count = 1u << block_count_lg2 ;
172  const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2 ;
173  const uint32_t block_size = 1u << block_size_lg2 ;
174  const uint32_t block_used = (*sb_state_ptr) & state_used_mask ;
175 
176  stats.consumed_superblocks++ ;
177  stats.consumed_blocks += block_used ;
178  stats.consumed_bytes += block_used * block_size ;
179  stats.reserved_blocks += block_count - block_used ;
180  stats.reserved_bytes += (block_count - block_used ) * block_size ;
181  }
182  }
183 
184  if ( ! accessible ) {
185  host.deallocate( sb_state_array, alloc_size );
186  }
187  }
188 
189  void print_state( std::ostream & s ) const
190  {
191  Kokkos::HostSpace host ;
192 
193  const size_t alloc_size = m_hint_offset * sizeof(uint32_t);
194 
195  uint32_t * const sb_state_array =
196  accessible ? m_sb_state_array : (uint32_t *) host.allocate(alloc_size);
197 
198  if ( ! accessible ) {
199  Kokkos::Impl::DeepCopy< Kokkos::HostSpace , base_memory_space >
200  ( sb_state_array , m_sb_state_array , alloc_size );
201  }
202 
203  const uint32_t * sb_state_ptr = sb_state_array ;
204 
205  s << "pool_size(" << ( size_t(m_sb_count) << m_sb_size_lg2 ) << ")"
206  << " superblock_size(" << ( 1 << m_sb_size_lg2 ) << ")" << std::endl ;
207 
208  for ( int32_t i = 0 ; i < m_sb_count
209  ; ++i , sb_state_ptr += m_sb_state_size ) {
210 
211  if ( *sb_state_ptr ) {
212 
213  const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift ;
214  const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2 ;
215  const uint32_t block_count = 1 << block_count_lg2 ;
216  const uint32_t block_used = (*sb_state_ptr) & state_used_mask ;
217 
218  s << "Superblock[ " << i << " / " << m_sb_count << " ] {"
219  << " block_size(" << ( 1 << block_size_lg2 ) << ")"
220  << " block_count( " << block_used
221  << " / " << block_count << " )"
222  << std::endl ;
223  }
224  }
225 
226  if ( ! accessible ) {
227  host.deallocate( sb_state_array, alloc_size );
228  }
229  }
230 
231  //--------------------------------------------------------------------------
232 
233  MemoryPool() = default ;
234  MemoryPool( MemoryPool && ) = default ;
235  MemoryPool( const MemoryPool & ) = default ;
236  MemoryPool & operator = ( MemoryPool && ) = default ;
237  MemoryPool & operator = ( const MemoryPool & ) = default ;
238 
253  MemoryPool( const base_memory_space & memspace
254  , const size_t min_total_alloc_size
255  , const uint32_t min_block_alloc_size // = 1 << MIN_BLOCK_SIZE_LG2
256  , const uint32_t max_block_alloc_size // = 1 << MAX_BLOCK_SIZE_LG2
257  , const uint32_t min_superblock_size // = 1 << SUPERBLOCK_SIZE_LG2
258  )
259  : m_tracker()
260  , m_sb_state_array(0)
261  , m_sb_state_size(0)
262  , m_sb_size_lg2(0)
263  , m_max_block_size_lg2(0)
264  , m_min_block_size_lg2(0)
265  , m_sb_count(0)
266  , m_hint_offset(0)
267  , m_data_offset(0)
268  , m_unused_padding(0)
269  {
270  const uint32_t int_align_lg2 = 3 ; /* align as int[8] */
271  const uint32_t int_align_mask = ( 1u << int_align_lg2 ) - 1 ;
272 
273  // Block and superblock size is power of two:
274 
275  m_min_block_size_lg2 =
276  Kokkos::Impl::integral_power_of_two_that_contains(min_block_alloc_size);
277 
278  m_max_block_size_lg2 =
279  Kokkos::Impl::integral_power_of_two_that_contains(max_block_alloc_size);
280 
281  m_sb_size_lg2 =
282  Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size);
283 
284  // Constraints:
285  // m_min_block_size_lg2 <= m_max_block_size_lg2 <= m_sb_size_lg2
286  // m_sb_size_lg2 <= m_min_block_size + max_bit_count_lg2
287 
288  if ( m_min_block_size_lg2 + max_bit_count_lg2 < m_sb_size_lg2 ) {
289  m_min_block_size_lg2 = m_sb_size_lg2 - max_bit_count_lg2 ;
290  }
291  if ( m_min_block_size_lg2 + max_bit_count_lg2 < m_max_block_size_lg2 ) {
292  m_min_block_size_lg2 = m_max_block_size_lg2 - max_bit_count_lg2 ;
293  }
294  if ( m_max_block_size_lg2 < m_min_block_size_lg2 ) {
295  m_max_block_size_lg2 = m_min_block_size_lg2 ;
296  }
297  if ( m_sb_size_lg2 < m_max_block_size_lg2 ) {
298  m_sb_size_lg2 = m_max_block_size_lg2 ;
299  }
300 
301  // At least 32 minimum size blocks in a superblock
302 
303  if ( m_sb_size_lg2 < m_min_block_size_lg2 + 5 ) {
304  m_sb_size_lg2 = m_min_block_size_lg2 + 5 ;
305  }
306 
307  // number of superblocks is multiple of superblock size that
308  // can hold min_total_alloc_size.
309 
310  const uint32_t sb_size_mask = ( 1u << m_sb_size_lg2 ) - 1 ;
311 
312  m_sb_count = ( min_total_alloc_size + sb_size_mask ) >> m_sb_size_lg2 ;
313 
314  // Any superblock can be assigned to the smallest size block
315  // Size the block bitset to maximum number of blocks
316 
317  const uint32_t max_block_count_lg2 =
318  m_sb_size_lg2 - m_min_block_size_lg2 ;
319 
320  m_sb_state_size =
321  ( CB::buffer_bound_lg2( max_block_count_lg2 ) + int_align_mask ) & ~int_align_mask ;
322 
323  // Array of all superblock states
324 
325  const size_t all_sb_state_size =
326  ( m_sb_count * m_sb_state_size + int_align_mask ) & ~int_align_mask ;
327 
328  // Number of block sizes
329 
330  const int32_t number_block_sizes =
331  1 + m_max_block_size_lg2 - m_min_block_size_lg2 ;
332 
333  // Array length for possible block sizes
334  // Hint array is one uint32_t per block size
335 
336  const int32_t block_size_array_size =
337  ( number_block_sizes + int_align_mask ) & ~int_align_mask ;
338 
339  m_hint_offset = all_sb_state_size ;
340  m_data_offset = m_hint_offset +
341  block_size_array_size * HINT_PER_BLOCK_SIZE ;
342 
343  // Allocation:
344 
345  const size_t header_size = m_data_offset * sizeof(uint32_t);
346  const size_t alloc_size = header_size +
347  ( size_t(m_sb_count) << m_sb_size_lg2 );
348 
349  Record * rec = Record::allocate( memspace , "MemoryPool" , alloc_size );
350 
351  m_tracker.assign_allocated_record_to_uninitialized( rec );
352 
353  m_sb_state_array = (uint32_t *) rec->data();
354 
355  Kokkos::HostSpace host ;
356 
357  uint32_t * const sb_state_array =
358  accessible ? m_sb_state_array
359  : (uint32_t *) host.allocate(header_size);
360 
361  for ( int32_t i = 0 ; i < m_data_offset ; ++i ) sb_state_array[i] = 0 ;
362 
363  // Initial assignment of empty superblocks to block sizes:
364 
365  for ( int32_t i = 0 ; i < number_block_sizes ; ++i ) {
366  const uint32_t block_size_lg2 = i + m_min_block_size_lg2 ;
367  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ;
368  const uint32_t block_state = block_count_lg2 << state_shift ;
369  const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE ;
370 
371  // for block size index 'i':
372  // sb_id_hint = sb_state_array[ hint_begin ];
373  // sb_id_begin = sb_state_array[ hint_begin + 1 ];
374 
375  const int32_t jbeg = ( i * m_sb_count ) / number_block_sizes ;
376  const int32_t jend = ( ( i + 1 ) * m_sb_count ) / number_block_sizes ;
377 
378  sb_state_array[ hint_begin ] = uint32_t(jbeg);
379  sb_state_array[ hint_begin + 1 ] = uint32_t(jbeg);
380 
381  for ( int32_t j = jbeg ; j < jend ; ++j ) {
382  sb_state_array[ j * m_sb_state_size ] = block_state ;
383  }
384  }
385 
386  // Write out initialized state:
387 
388  if ( ! accessible ) {
389  Kokkos::Impl::DeepCopy< base_memory_space , Kokkos::HostSpace >
390  ( m_sb_state_array , sb_state_array , header_size );
391 
392  host.deallocate( sb_state_array, header_size );
393  }
394  else {
395  Kokkos::memory_fence();
396  }
397  }
398 
399  //--------------------------------------------------------------------------
400 
401 private:
402 
403  /* Given a size 'n' get the block size in which it can be allocated.
404  * Restrict lower bound to minimum block size.
405  */
406  KOKKOS_FORCEINLINE_FUNCTION
407  unsigned get_block_size_lg2( unsigned n ) const noexcept
408  {
409  const unsigned i = Kokkos::Impl::integral_power_of_two_that_contains( n );
410 
411  return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i ;
412  }
413 
414 public:
415 
416  KOKKOS_INLINE_FUNCTION
417  uint32_t allocate_block_size( uint32_t alloc_size ) const noexcept
418  {
419  return alloc_size <= (1UL << m_max_block_size_lg2)
420  ? ( 1u << get_block_size_lg2( alloc_size ) )
421  : 0 ;
422  }
423 
424  //--------------------------------------------------------------------------
434  KOKKOS_FUNCTION
435  void * allocate( size_t alloc_size
436  , int32_t attempt_limit = 1 ) const noexcept
437  {
438  void * p = 0 ;
439 
440  const uint32_t block_size_lg2 = get_block_size_lg2( alloc_size );
441 
442  if ( block_size_lg2 <= m_max_block_size_lg2 ) {
443 
444  // Allocation will fit within a superblock
445  // that has block sizes ( 1 << block_size_lg2 )
446 
447  const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ;
448  const uint32_t block_state = block_count_lg2 << state_shift ;
449  const uint32_t block_count = 1u << block_count_lg2 ;
450  const uint32_t block_count_mask = block_count - 1 ;
451 
452  // Superblock hints for this block size:
453  // hint_sb_id_ptr[0] is the dynamically changing hint
454  // hint_sb_id_ptr[1] is the static start point
455 
456  volatile uint32_t * const hint_sb_id_ptr
457  = m_sb_state_array /* memory pool state array */
458  + m_hint_offset /* offset to hint portion of array */
459  + HINT_PER_BLOCK_SIZE /* number of hints per block size */
460  * ( block_size_lg2 - m_min_block_size_lg2 ); /* block size id */
461 
462  const int32_t sb_id_begin = int32_t( hint_sb_id_ptr[1] );
463 
464  // Fast query clock register 'tic' to pseudo-randomize
465  // the guess for which block within a superblock should
466  // be claimed. If not available then a search occurs.
467 
468  const uint32_t block_id_hint = block_count_mask &
469  (uint32_t)( Kokkos::Impl::clock_tic()
470 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA )
471  // Spread out potentially concurrent access
472  // by threads within a warp or thread block.
473  + ( threadIdx.x + blockDim.x * threadIdx.y )
474 #endif
475  );
476 
477  int32_t sb_id = -1 ;
478 
479  volatile uint32_t * sb_state_array = 0 ;
480 
481  while ( attempt_limit ) {
482 
483  int32_t hint_sb_id = -1 ;
484 
485  if ( sb_id < 0 ) {
486 
487  sb_id = hint_sb_id = int32_t( *hint_sb_id_ptr );
488 
489  sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size );
490  }
491 
492  // Require:
493  // 0 <= sb_id
494  // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id
495 
496  if ( block_state == ( state_header_mask & *sb_state_array ) ) {
497 
498  // This superblock state is assigned to this block size.
499  // Try to claim a bit.
500 
501  const Kokkos::pair<int,int> result =
502  CB::acquire_bounded_lg2( sb_state_array
503  , block_count_lg2
504  , block_id_hint
505  , block_state
506  );
507 
508  // If result.first < 0 then failed to acquire
509  // due to either full or buffer was wrong state.
510  // Could be wrong state if a deallocation raced the
511  // superblock to empty before the acquire could succeed.
512 
513  if ( 0 <= result.first ) { // acquired a bit
514 
515  // Set the allocated block pointer
516 
517  p = ((char*)( m_sb_state_array + m_data_offset ))
518  + ( uint32_t(sb_id) << m_sb_size_lg2 ) // superblock memory
519  + ( result.first << block_size_lg2 ); // block memory
520 
521  break ; // Success
522  }
523 
524 // printf(" acquire block_count_lg2(%d) block_state(0x%x) sb_id(%d) result(%d,%d)\n" , block_count_lg2 , block_state , sb_id , result.first , result.second );
525 
526  }
527  //------------------------------------------------------------------
528  // Arrive here if failed to acquire a block.
529  // Must find a new superblock.
530 
531  // Start searching at designated index for this block size.
532  // Look for a partially full superblock of this block size.
533  // Look for an empty superblock just in case cannot find partfull.
534 
535  sb_id = -1 ;
536 
537  int32_t sb_id_empty = -1 ;
538 
539  sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size ;
540 
541  for ( int32_t i = 0 , id = sb_id_begin ; i < m_sb_count ; ++i ) {
542 
543  // Query state of the candidate superblock.
544  // Note that the state may change at any moment
545  // as concurrent allocations and deallocations occur.
546 
547  const uint32_t state = *sb_state_array ;
548  const uint32_t used = state & state_used_mask ;
549 
550  if ( block_state == ( state & state_header_mask ) ) {
551 
552  // Superblock is assigned to this block size
553 
554  if ( used < block_count ) {
555 
556  // There is room to allocate one block
557 
558  sb_id = id ;
559 
560  if ( used + 1 < block_count ) {
561 
562  // There is room to allocate more than one block
563 
564  Kokkos::atomic_compare_exchange
565  ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) );
566  }
567 
568  break ;
569  }
570  }
571  else if ( ( used == 0 ) && ( sb_id_empty == -1 ) ) {
572 
573  // Superblock is not assigned to this block size
574  // and is the first empty superblock encountered.
575  // Save this id to use if a partfull superblock is not found.
576 
577  sb_id_empty = id ;
578  }
579 
580  if ( ++id < m_sb_count ) {
581  sb_state_array += m_sb_state_size ;
582  }
583  else {
584  id = 0 ;
585  sb_state_array = m_sb_state_array ;
586  }
587  }
588 
589 // printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)\n" , m_sb_count , sb_id , sb_id_empty );
590 
591  if ( sb_id < 0 ) {
592 
593  // Did not find a partfull superblock for this block size.
594 
595  if ( 0 <= sb_id_empty ) {
596 
597  // Found first empty superblock following designated superblock
598  // Attempt to claim it for this block size.
599  // If the claim fails assume that another thread claimed it
600  // for this block size and try to use it anyway,
601  // but do not update hint.
602 
603  sb_id = sb_id_empty ;
604 
605  sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size );
606 
607  // If successfully changed assignment of empty superblock 'sb_id'
608  // to this block_size then update the hint.
609 
610  const uint32_t state_empty = state_header_mask & *sb_state_array ;
611 
612  if ( state_empty ==
613  Kokkos::atomic_compare_exchange
614  (sb_state_array,state_empty,block_state) ) {
615 
616  // If this thread claimed the block then update the hint
617 
618  Kokkos::atomic_compare_exchange
619  ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) );
620  }
621  }
622  else {
623  // Did not find a potentially usable superblock
624  --attempt_limit ;
625  }
626  }
627  } // end allocation attempt loop
628 
629  //--------------------------------------------------------------------
630  }
631  else {
632  Kokkos::abort("Kokkos MemoryPool allocation request exceeded specified maximum allocation size");
633  }
634 
635  return p ;
636  }
637  // end allocate
638  //--------------------------------------------------------------------------
639 
646  KOKKOS_INLINE_FUNCTION
647  void deallocate( void * p , size_t /* alloc_size */ ) const noexcept
648  {
649  // Determine which superblock and block
650  const ptrdiff_t d =
651  ((char*)p) - ((char*)( m_sb_state_array + m_data_offset ));
652 
653  // Verify contained within the memory pool's superblocks:
654  const int ok_contains =
655  ( 0 <= d ) && ( size_t(d) < ( size_t(m_sb_count) << m_sb_size_lg2 ) );
656 
657  int ok_block_aligned = 0 ;
658  int ok_dealloc_once = 0 ;
659 
660  if ( ok_contains ) {
661 
662  const int sb_id = d >> m_sb_size_lg2 ;
663 
664  // State array for the superblock.
665  volatile uint32_t * const sb_state_array =
666  m_sb_state_array + ( sb_id * m_sb_state_size );
667 
668  const uint32_t block_state = (*sb_state_array) & state_header_mask ;
669  const uint32_t block_size_lg2 =
670  m_sb_size_lg2 - ( block_state >> state_shift );
671 
672  ok_block_aligned = 0 == ( d & ( ( 1 << block_size_lg2 ) - 1 ) );
673 
674  if ( ok_block_aligned ) {
675 
676  // Map address to block's bit
677  // mask into superblock and then shift down for block index
678 
679  const uint32_t bit =
680  ( d & ( ptrdiff_t( 1 << m_sb_size_lg2 ) - 1 ) ) >> block_size_lg2 ;
681 
682  const int result =
683  CB::release( sb_state_array , bit , block_state );
684 
685  ok_dealloc_once = 0 <= result ;
686 
687 // printf(" deallocate from sb_id(%d) result(%d) bit(%d) state(0x%x)\n"
688 // , sb_id
689 // , result
690 // , uint32_t(d >> block_size_lg2)
691 // , *sb_state_array );
692 
693  }
694  }
695 
696  if ( ! ok_contains || ! ok_block_aligned || ! ok_dealloc_once ) {
697 #if 0
698  printf("Kokkos MemoryPool deallocate(0x%lx) contains(%d) block_aligned(%d) dealloc_once(%d)\n",(uintptr_t)p,ok_contains,ok_block_aligned,ok_dealloc_once);
699 #endif
700  Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer");
701  }
702  }
703  // end deallocate
704  //--------------------------------------------------------------------------
705 };
706 
707 } // namespace Kokkos
708 
709 #endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */
710 
void * allocate(const size_t arg_alloc_size) const
Allocate untracked memory in the space.
Replacement for std::pair that works on CUDA devices.
Definition: Kokkos_Pair.hpp:64
first_type first
The first element of the pair.
Definition: Kokkos_Pair.hpp:72
Memory management for host memory.
Declaration of parallel operators.
Atomic functions.
void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const
Deallocate untracked memory in the space.
Access relationship between DstMemorySpace and SrcMemorySpace.