Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 3bc2d3e

Browse files
committed
MT GPU fetch, l0 async
1 parent 7a25adf commit 3bc2d3e

22 files changed

+385
-126
lines changed

omniscidb/BufferProvider/BufferProvider.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class BufferProvider {
4444
const int8_t* host_ptr,
4545
const size_t num_bytes,
4646
const int device_id) const = 0;
47-
virtual void synchronizeStream(const int device_id) const = 0;
47+
virtual void synchronizeDeviceDataStream(const int device_id) const = 0;
4848
virtual void copyFromDevice(int8_t* host_ptr,
4949
const int8_t* device_ptr,
5050
const size_t num_bytes,

omniscidb/CudaMgr/CudaMgr.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,17 @@ void CudaMgr::copyHostToDevice(int8_t* device_ptr,
112112
cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
113113
}
114114

115+
void CudaMgr::copyHostToDeviceAsyncIfPossible(int8_t* device_ptr,
116+
const int8_t* host_ptr,
117+
const size_t num_bytes,
118+
const int device_num) {
119+
if constexpr (async_data_load_available) {
120+
copyHostToDeviceAsync(device_ptr, host_ptr, num_bytes, device_num);
121+
} else {
122+
copyHostToDevice(device_ptr, host_ptr, num_bytes, device_num);
123+
}
124+
}
125+
115126
void CudaMgr::copyHostToDeviceAsync(int8_t* device_ptr,
116127
const int8_t* host_ptr,
117128
const size_t num_bytes,
@@ -120,7 +131,7 @@ void CudaMgr::copyHostToDeviceAsync(int8_t* device_ptr,
120131
checkError(cuMemcpyHtoDAsync(
121132
reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes, stream_));
122133
}
123-
void CudaMgr::synchronizeStream(const int device_num) {
134+
void CudaMgr::synchronizeDeviceDataStream(const int device_num) {
124135
setContext(device_num);
125136
checkError(cuStreamSynchronize(stream_));
126137
}

omniscidb/CudaMgr/CudaMgr.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,17 @@ class CudaMgr : public GpuMgr {
9696
const size_t num_bytes,
9797
const int device_num) override;
9898

99+
void copyHostToDeviceAsyncIfPossible(int8_t* device_ptr,
100+
const int8_t* host_ptr,
101+
const size_t num_bytes,
102+
const int device_num) override;
103+
99104
void copyHostToDeviceAsync(int8_t* device_ptr,
100105
const int8_t* host_ptr,
101106
const size_t num_bytes,
102107
const int device_num) override;
103108

104-
void synchronizeStream(const int device_num) override;
109+
void synchronizeDeviceDataStream(const int device_num) override;
105110

106111
void copyDeviceToHost(int8_t* host_ptr,
107112
const int8_t* device_ptr,
@@ -289,7 +294,7 @@ class CudaMgr : public GpuMgr {
289294
omnisci::DeviceGroup device_group_;
290295
std::vector<CUcontext> device_contexts_;
291296
mutable std::mutex device_cleanup_mutex_;
292-
static constexpr bool async_data_load_available{true};
297+
static constexpr bool async_data_load_available{false};
293298
};
294299

295300
} // Namespace CudaMgr_Namespace

omniscidb/CudaMgr/CudaMgrNoCuda.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@ void CudaMgr::copyHostToDeviceAsync(int8_t* device_ptr,
4343
CHECK(false);
4444
}
4545

46-
void CudaMgr::synchronizeStream(const int device_num) {
46+
void CudaMgr::copyHostToDeviceAsyncIfPossible(int8_t* device_ptr,
47+
const int8_t* host_ptr,
48+
const size_t num_bytes,
49+
const int device_num) {
50+
CHECK(false);
51+
}
52+
53+
void CudaMgr::synchronizeDeviceDataStream(const int device_num) {
4754
CHECK(false);
4855
}
4956

omniscidb/DataMgr/Allocators/DeviceAllocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,5 @@ class DeviceAllocator : public Allocator {
5353
virtual void setDeviceMem(int8_t* device_ptr,
5454
unsigned char uc,
5555
const size_t num_bytes) const = 0;
56+
virtual void sync() = 0;
5657
};

omniscidb/DataMgr/Allocators/GpuAllocator.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,7 @@ void GpuAllocator::setDeviceMem(int8_t* device_ptr,
8484
const size_t num_bytes) const {
8585
buffer_provider_->setDeviceMem(device_ptr, uc, num_bytes, device_id_);
8686
}
87+
88+
void GpuAllocator::sync() {
89+
buffer_provider_->synchronizeDeviceDataStream(device_id_);
90+
}

omniscidb/DataMgr/Allocators/GpuAllocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ class GpuAllocator : public DeviceAllocator {
5858
void setDeviceMem(int8_t* device_ptr,
5959
unsigned char uc,
6060
const size_t num_bytes) const override;
61+
void sync() override;
6162

6263
private:
6364
std::vector<Data_Namespace::AbstractBuffer*> owned_buffers_;

omniscidb/DataMgr/BufferMgr/CpuBufferMgr/CpuBuffer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ void CpuBuffer::readData(int8_t* const dst,
4848
memcpy(dst, mem_ + offset, num_bytes);
4949
} else if (dst_memory_level == GPU_LEVEL) {
5050
CHECK_GE(dst_device_id, 0);
51-
gpu_mgr_->copyHostToDevice(dst, mem_ + offset, num_bytes, dst_device_id);
51+
gpu_mgr_->copyHostToDeviceAsyncIfPossible(
52+
dst, mem_ + offset, num_bytes, dst_device_id);
5253
} else {
5354
LOG(FATAL) << "Unsupported buffer type";
5455
}

omniscidb/DataMgr/DataMgrBufferProvider.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,14 @@ void DataMgrBufferProvider::copyToDeviceAsyncIfPossible(int8_t* device_ptr,
5757
CHECK(data_mgr_);
5858
const auto gpu_mgr = data_mgr_->getGpuMgr();
5959
CHECK(gpu_mgr);
60-
if (gpu_mgr->canLoadAsync()) {
61-
gpu_mgr->copyHostToDeviceAsync(device_ptr, host_ptr, num_bytes, device_id);
62-
} else {
63-
gpu_mgr->copyHostToDevice(device_ptr, host_ptr, num_bytes, device_id);
64-
}
60+
gpu_mgr->copyHostToDeviceAsyncIfPossible(device_ptr, host_ptr, num_bytes, device_id);
6561
}
6662

67-
void DataMgrBufferProvider::synchronizeStream(const int device_num) const {
63+
void DataMgrBufferProvider::synchronizeDeviceDataStream(const int device_num) const {
6864
CHECK(data_mgr_);
6965
const auto gpu_mgr = data_mgr_->getGpuMgr();
7066
CHECK(gpu_mgr);
71-
gpu_mgr->synchronizeStream(device_num);
67+
gpu_mgr->synchronizeDeviceDataStream(device_num);
7268
}
7369

7470
void DataMgrBufferProvider::copyFromDevice(int8_t* host_ptr,

omniscidb/DataMgr/DataMgrBufferProvider.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class DataMgrBufferProvider : public BufferProvider {
4545
const int8_t* host_ptr,
4646
const size_t num_bytes,
4747
const int device_id) const override;
48-
void synchronizeStream(const int device_id) const override;
48+
void synchronizeDeviceDataStream(const int device_id) const override;
4949
void copyFromDevice(int8_t* host_ptr,
5050
const int8_t* device_ptr,
5151
const size_t num_bytes,

0 commit comments

Comments
 (0)