@@ -222,12 +222,14 @@ void initialize_buckets(Table<K, V, S>** table, BaseAllocator* allocator,
222222 uint32_t reserve_size =
223223 bucket_max_size < CACHE_LINE_SIZE ? CACHE_LINE_SIZE : bucket_max_size;
224224 bucket_memory_size += reserve_size * sizeof (uint8_t );
225+ uint8_t * address = nullptr ;
226+ allocator->alloc (MemoryType::Device, (void **)&(address),
227+ bucket_memory_size * (end - start));
228+ (*table)->buckets_address .push_back (address);
225229 for (int i = start; i < end; i++) {
226- uint8_t * address = nullptr ;
227- allocator->alloc (MemoryType::Device, (void **)&(address),
228- bucket_memory_size);
229- allocate_bucket_others<K, V, S><<<1 , 1 >>> ((*table)->buckets , i, address,
230- reserve_size, bucket_max_size);
230+ allocate_bucket_others<K, V, S><<<1 , 1 >>> (
231+ (*table)->buckets , i, address + (bucket_memory_size * (i - start)),
232+ reserve_size, bucket_max_size);
231233 }
232234 CUDA_CHECK (cudaDeviceSynchronize ());
233235
@@ -365,17 +367,9 @@ void double_capacity(Table<K, V, S>** table, BaseAllocator* allocator) {
365367/* free all of the resource of a Table. */
366368template <class K , class V , class S >
367369void destroy_table (Table<K, V, S>** table, BaseAllocator* allocator) {
368- uint8_t ** d_address = nullptr ;
369- CUDA_CHECK (cudaMalloc ((void **)&d_address, sizeof (uint8_t *)));
370- for (int i = 0 ; i < (*table)->buckets_num ; i++) {
371- uint8_t * h_address;
372- get_bucket_others_address<K, V, S>
373- <<<1 , 1 >>> ((*table)->buckets , i, d_address);
374- CUDA_CHECK (cudaMemcpy (&h_address, d_address, sizeof (uint8_t *),
375- cudaMemcpyDeviceToHost));
376- allocator->free (MemoryType::Device, h_address);
377- }
378- CUDA_CHECK (cudaFree (d_address));
370+ for (auto addr : (*table)->buckets_address ) {
371+ allocator->free (MemoryType::Device, addr);
372+ }
379373
380374 for (int i = 0 ; i < (*table)->num_of_memory_slices ; i++) {
381375 if (is_on_device ((*table)->slices [i])) {
0 commit comments