Skip to content

Commit 770be38

Browse files
LinGeLinrhdong
authored andcommitted
[opt] Allocate hbm uniformly for buckets to avoid fragmentation.
1 parent 8a4b781 commit 770be38

File tree

2 files changed

+12
-16
lines changed

2 files changed

+12
-16
lines changed

include/merlin/core_kernels.cuh

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,14 @@ void initialize_buckets(Table<K, V, S>** table, BaseAllocator* allocator,
222222
uint32_t reserve_size =
223223
bucket_max_size < CACHE_LINE_SIZE ? CACHE_LINE_SIZE : bucket_max_size;
224224
bucket_memory_size += reserve_size * sizeof(uint8_t);
225+
uint8_t* address = nullptr;
226+
allocator->alloc(MemoryType::Device, (void**)&(address),
227+
bucket_memory_size * (end - start));
228+
(*table)->buckets_address.push_back(address);
225229
for (int i = start; i < end; i++) {
226-
uint8_t* address = nullptr;
227-
allocator->alloc(MemoryType::Device, (void**)&(address),
228-
bucket_memory_size);
229-
allocate_bucket_others<K, V, S><<<1, 1>>>((*table)->buckets, i, address,
230-
reserve_size, bucket_max_size);
230+
allocate_bucket_others<K, V, S><<<1, 1>>>(
231+
(*table)->buckets, i, address + (bucket_memory_size * (i - start)),
232+
reserve_size, bucket_max_size);
231233
}
232234
CUDA_CHECK(cudaDeviceSynchronize());
233235

@@ -365,17 +367,9 @@ void double_capacity(Table<K, V, S>** table, BaseAllocator* allocator) {
365367
/* free all of the resource of a Table. */
366368
template <class K, class V, class S>
367369
void destroy_table(Table<K, V, S>** table, BaseAllocator* allocator) {
368-
uint8_t** d_address = nullptr;
369-
CUDA_CHECK(cudaMalloc((void**)&d_address, sizeof(uint8_t*)));
370-
for (int i = 0; i < (*table)->buckets_num; i++) {
371-
uint8_t* h_address;
372-
get_bucket_others_address<K, V, S>
373-
<<<1, 1>>>((*table)->buckets, i, d_address);
374-
CUDA_CHECK(cudaMemcpy(&h_address, d_address, sizeof(uint8_t*),
375-
cudaMemcpyDeviceToHost));
376-
allocator->free(MemoryType::Device, h_address);
377-
}
378-
CUDA_CHECK(cudaFree(d_address));
370+
for (auto addr : (*table)->buckets_address) {
371+
allocator->free(MemoryType::Device, addr);
372+
}
379373

380374
for (int i = 0; i < (*table)->num_of_memory_slices; i++) {
381375
if (is_on_device((*table)->slices[i])) {

include/merlin/types.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <stddef.h>
2020
#include <cstdint>
2121
#include <cuda/std/semaphore>
22+
#include <vector>
2223

2324
namespace nv {
2425
namespace merlin {
@@ -161,6 +162,7 @@ struct Table {
161162
int slots_number = 0; // unused
162163
int device_id = 0; // Device id
163164
int tile_size;
165+
std::vector<uint8_t*> buckets_address;
164166
};
165167

166168
template <class K, class S>

0 commit comments

Comments
 (0)