Skip to content

Commit 0775772

Browse files
author
20177282
committed
fixed compilation issues with latest cuda
1 parent a5b3d5d commit 0775772

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1588
-2036
lines changed

.github/workflows/test-build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ jobs:
2424
sudo apt-get update
2525
sudo apt-get -y install cuda-toolkit-12-8
2626
- name: make check
27-
run: bash install.sh -c
27+
run: bash install.sh -c -g

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
cpu:3.2.5
2-
gpu:3.4.3
2+
gpu:3.4.4

install.sh

100755100644
Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ $ch -s or --statistics enable costly statistics (may impact runtime)
3232
$ch -a or --all enable all above flags except 'assert' and 'verbosity'
3333
$ch --ncolors disable colors in all solver outputs
3434
$ch --clean=<target> remove old installation of <cpu | gpu | all> solvers
35-
$ch --standard=<n> compile with <11 | 14 | 17> c++ standard
35+
$ch --standard=<n> compile with <17 | 20> c++ standard
3636
$ch --gextra="flags" pass extra "flags" to the GPU compiler (nvcc)
3737
$ch --cextra="flags" pass extra "flags" to the CPU compiler (g++)
3838
EOF
@@ -116,7 +116,7 @@ if [[ "$clean" != "" ]] && [[ "$clean" != "cpu" ]] && [[ "$clean" != "gpu" ]] &&
116116
error "invalid clean target '$clean'"
117117
fi
118118

119-
if [ ! $standard = 14 ] && [ ! $standard = 17 ]; then
119+
if [ ! $standard = 17 ] && [ ! $standard = 20 ]; then
120120
error "invalid c++ standard '$standard'"
121121
fi
122122

@@ -397,6 +397,7 @@ if [[ $pedantic = 1 ]]; then log " turning off 'pedantic' due to incompatibilit
397397

398398
# default flags
399399
INCLUDE="../../dep"
400+
RELOC="-rdc=true"
400401
EXTLAMBDA="--expt-extended-lambda"
401402
RELAXEDEXPR="--expt-relaxed-constexpr"
402403
OPTIMIZE="-O3"
@@ -433,13 +434,21 @@ if [ -d dep ]; then
433434
NVCCFLAGS="$NVCCFLAGS -I$INCLUDE"
434435
fi
435436

437+
NVCCFLAGS="$NVCCFLAGS $RELOC"
438+
436439
if [[ $gextra != "" ]]; then NVCCFLAGS="$NVCCFLAGS $gextra"; fi
437440
if [[ $cextra != "" ]]; then CCFLAGS="$CCFLAGS $cextra"; fi
438441

442+
first=$(printf "%s" "$NVCCFLAGS" | cut -d' ' -f1-5)
443+
second=$(printf "%s" "$NVCCFLAGS" | cut -d' ' -f6-8)
444+
rest=$(printf "%s" "$NVCCFLAGS" | cut -d' ' -f9-)
445+
439446
log ""
440447
log "building with:"
441-
log "'$NVCCFLAGS"
442-
log "-Xcompiler $CCFLAGS'"
448+
[ ! -z "$first" ] && log "$(printf " %s\n" "$first")"
449+
[ ! -z "$second" ] && log "$(printf " %s\n" "$second")"
450+
[ ! -z "$rest" ] && log "$(printf " %s\n" "$rest")"
451+
log " -Xcompiler $CCFLAGS'"
443452
log ""
444453

445454
[ ! -f $gputemplate ] && error "cannot find the GPU makefile template"

src/cpu/version.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,6 @@ namespace ParaFROST {
2626
const char* date();
2727

2828
}
29+
#define VERSION "3.2.5"
30+
#define OSYSTEM "linux rig-muhos 5.15.167.4-microsoft-standard-wsl2 x86_64"
31+
#define DATE "Sun Apr 27 21:41:37 CEST 2025"

src/gpu/atomics.cuh

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,26 +25,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
2525

2626
namespace ParaFROST {
2727

28-
template<class T>
29-
_PFROST_D_ T atomicAggInc(T* counter) {
30-
const uint32 mask = __activemask(), total = __popc(mask);
31-
uint32 laneMask;
32-
laneMask_lt(laneMask);
33-
const T prefix = (T)__popc(mask & laneMask);
34-
const int lowest_lane = __ffs(mask) - 1;
35-
T warpRes = prefix ? 0 : atomicAdd(counter, total);
36-
warpRes = __shfl_sync(mask, warpRes, lowest_lane);
37-
return (prefix + warpRes);
38-
}
39-
40-
template<class T, class R>
41-
_PFROST_D_ void atomicAggMax(T* counter, const R ref) {
42-
const uint32 mask = __activemask(), max_id = (32 - __clz(mask)) - 1;
43-
uint32 lane_id;
44-
laneId(lane_id);
45-
if (lane_id == max_id)
46-
atomicMax(counter, ref);
47-
}
28+
29+
4830

4931
}
5032

src/gpu/bounded.cuh

Lines changed: 14 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
2525
#include "function.cuh"
2626
#include "ifthenelse.cuh"
2727
#include "equivalence.cuh"
28+
#include "shared.cuh"
2829

2930
namespace ParaFROST {
3031

@@ -65,8 +66,6 @@ namespace ParaFROST {
6566

6667
//==========================================//
6768

68-
__device__ int lastEliminatedID;
69-
7069
#define ADD_RESOLVENT \
7170
{ \
7271
int rsize; \
@@ -262,21 +261,7 @@ namespace ParaFROST {
262261
//=========================================================//
263262
// kernels
264263
//=========================================================//
265-
266-
__global__ void reset_id() { lastEliminatedID = -1; }
267-
268-
__global__ void print_id() { printf("c lastEliminatedID = %d\n", lastEliminatedID); }
269264

270-
__global__ void mapfrozen_k(const uint32* __restrict__ frozen, uint32* __restrict__ varcore, const uint32 size)
271-
{
272-
grid_t tid = global_tx;
273-
while (tid < size) {
274-
assert(frozen[tid] && frozen[tid] < NOVAR);
275-
varcore[frozen[tid]] = tid;
276-
tid += stride_x;
277-
}
278-
}
279-
280265
// Macros for checking applicability of variable elimination
281266
#define MEMORY_SAFE_DBG \
282267
if ((addedPos + nAddedCls) > cnf->refs().capacity()) { \
@@ -491,6 +476,19 @@ namespace ParaFROST {
491476
}
492477
}
493478

479+
_PFROST_IN_D_ uint32 laneId() {
480+
uint32 id;
481+
asm("mov.u32 %0, %%laneid;" : "=r"(id));
482+
return id;
483+
}
484+
485+
template<class T, class R>
486+
_PFROST_IN_D_ void atomicAggMax(T* counter, const R ref) {
487+
const uint32 mask = __activemask(), max_id = (32 - __clz(mask)) - 1;
488+
if (laneId() == max_id)
489+
atomicMax(counter, ref);
490+
}
491+
494492
__global__ void ve_k_2(
495493
CNF* __restrict__ cnf,
496494
OT* __restrict__ ot,
@@ -551,32 +549,6 @@ namespace ParaFROST {
551549
}
552550
}
553551

554-
__global__ void resizeCNF_k(CNF* cnf,
555-
const uint32* __restrict__ type,
556-
const uint32* __restrict__ rpos,
557-
const S_REF* __restrict__ rref,
558-
const int verbose)
559-
{
560-
if (lastEliminatedID >= 0) {
561-
const uint32 lastAdded = type[lastEliminatedID];
562-
const uint32 lastAddedPos = rpos[lastEliminatedID];
563-
const S_REF lastAddedRef = rref[lastEliminatedID];
564-
assert(lastAdded < NOVAR);
565-
assert(lastAddedPos < NOVAR);
566-
assert(lastAddedRef < GNOREF);
567-
assert(RECOVERTYPE(lastAdded) < TYPE_MASK);
568-
const uint32 lastAddedCls = RECOVERADDEDCLS(lastAdded);
569-
const uint32 lastAddedLits = RECOVERADDEDLITS(lastAdded);
570-
assert(lastAddedCls && lastAddedCls <= ADDEDCLS_MAX);
571-
assert(lastAddedLits && lastAddedLits <= ADDEDLITS_MAX);
572-
const S_REF lastAddedBuckets = lastAddedLits + DC_NBUCKETS * lastAddedCls;
573-
const S_REF data_size = lastAddedBuckets + lastAddedRef;
574-
const uint32 cs_size = lastAddedCls + lastAddedPos;
575-
cnf->resize(data_size, cs_size);
576-
if (verbose > 1) printf("c resized CNF to %d clauses and %lld data for a last ID %d\n", cs_size, data_size, lastEliminatedID);
577-
}
578-
}
579-
580552
}
581553

582554

src/gpu/cache.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ namespace ParaFROST {
6767

6868
};
6969

70-
extern CACHER cacher;
71-
7270
}
7371

7472
#endif

0 commit comments

Comments
 (0)