Skip to content

Commit 9a463d9

Browse files
committed
temp
1 parent e17e989 commit 9a463d9

File tree

2 files changed

+128
-128
lines changed

2 files changed

+128
-128
lines changed

perf/array.jl

Lines changed: 115 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -19,37 +19,37 @@ gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
1919
gpu_mat_bools = CuArray(rand(rng, Bool, m, n))
2020
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))
2121

22-
group["construct"] = @benchmarkable CuArray{Int}(undef, 1)
22+
# group["construct"] = @benchmarkable CuArray{Int}(undef, 1)
2323

24-
group["copy"] = @async_benchmarkable copy($gpu_mat)
24+
# group["copy"] = @async_benchmarkable copy($gpu_mat)
2525

26-
gpu_mat2 = copy(gpu_mat)
27-
let group = addgroup!(group, "copyto!")
28-
group["cpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat, $cpu_mat)
29-
group["gpu_to_cpu"] = @async_benchmarkable copyto!($cpu_mat, $gpu_mat)
30-
group["gpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat2, $gpu_mat)
31-
end
26+
# gpu_mat2 = copy(gpu_mat)
27+
# let group = addgroup!(group, "copyto!")
28+
# group["cpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat, $cpu_mat)
29+
# group["gpu_to_cpu"] = @async_benchmarkable copyto!($cpu_mat, $gpu_mat)
30+
# group["gpu_to_gpu"] = @async_benchmarkable copyto!($gpu_mat2, $gpu_mat)
31+
# end
3232

33-
let group = addgroup!(group, "iteration")
34-
group["scalar"] = @benchmarkable CUDA.@allowscalar [$gpu_vec[i] for i in 1:10]
33+
# let group = addgroup!(group, "iteration")
34+
# group["scalar"] = @benchmarkable CUDA.@allowscalar [$gpu_vec[i] for i in 1:10]
3535

36-
group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]
36+
# group["logical"] = @benchmarkable $gpu_vec[$gpu_vec_bools]
3737

38-
let group = addgroup!(group, "findall")
39-
group["bool"] = @benchmarkable findall($gpu_vec_bools)
40-
group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
41-
end
38+
# let group = addgroup!(group, "findall")
39+
# group["bool"] = @benchmarkable findall($gpu_vec_bools)
40+
# group["int"] = @benchmarkable findall(isodd, $gpu_vec_ints)
41+
# end
4242

43-
let group = addgroup!(group, "findfirst")
44-
group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
45-
group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
46-
end
43+
# let group = addgroup!(group, "findfirst")
44+
# group["bool"] = @benchmarkable findfirst($gpu_vec_bools)
45+
# group["int"] = @benchmarkable findfirst(isodd, $gpu_vec_ints)
46+
# end
4747

48-
let group = addgroup!(group, "findmin") # findmax
49-
group["1d"] = @async_benchmarkable findmin($gpu_vec)
50-
group["2d"] = @async_benchmarkable findmin($gpu_mat; dims=1)
51-
end
52-
end
48+
# let group = addgroup!(group, "findmin") # findmax
49+
# group["1d"] = @async_benchmarkable findmin($gpu_vec)
50+
# group["2d"] = @async_benchmarkable findmin($gpu_mat; dims=1)
51+
# end
52+
# end
5353

5454
let group = addgroup!(group, "reverse")
5555
group["1d"] = @async_benchmarkable reverse($gpu_vec)
@@ -62,94 +62,94 @@ let group = addgroup!(group, "reverse")
6262
group["2dL_inplace"] = @async_benchmarkable reverse!($gpu_mat_long; dims=2)
6363
end
6464

65-
group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
66-
67-
# no need to test inplace version, which performs the same operation (but with an alloc)
68-
let group = addgroup!(group, "accumulate")
69-
let group = addgroup!(group, "Float32")
70-
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)
71-
group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)
72-
group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=2)
73-
74-
group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=1)
75-
group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=2)
76-
end
77-
let group = addgroup!(group, "Int64")
78-
group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec_ints)
79-
group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=1)
80-
group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=2)
81-
82-
group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=1)
83-
group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=2)
84-
end
85-
end
86-
87-
let group = addgroup!(group, "reductions")
88-
let group = addgroup!(group, "reduce")
89-
let group = addgroup!(group, "Float32")
90-
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)
91-
group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)
92-
group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat; dims=2)
93-
group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=1)
94-
group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=2)
95-
end
96-
let group = addgroup!(group, "Int64")
97-
group["1d"] = @async_benchmarkable reduce(+, $gpu_vec_ints)
98-
group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=1)
99-
group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=2)
100-
group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=1)
101-
group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=2)
102-
end
103-
end
104-
105-
let group = addgroup!(group, "mapreduce")
106-
let group = addgroup!(group, "Float32")
107-
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)
108-
group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)
109-
group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=2)
110-
group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=1)
111-
group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=2)
112-
end
113-
let group = addgroup!(group, "Int64")
114-
group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec_ints)
115-
group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=1)
116-
group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=2)
117-
group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=1)
118-
group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=2)
119-
end
120-
end
121-
122-
# used by sum, prod, minimum, maximum, all, any, count
123-
end
124-
125-
let group = addgroup!(group, "random")
126-
let group = addgroup!(group, "rand")
127-
group["Float32"] = @async_benchmarkable CUDA.rand(Float32, m*n)
128-
group["Int64"] = @async_benchmarkable CUDA.rand(Int64, m*n)
129-
end
130-
131-
let group = addgroup!(group, "rand!")
132-
group["Float32"] = @async_benchmarkable CUDA.rand!($gpu_vec)
133-
group["Int64"] = @async_benchmarkable CUDA.rand!($gpu_vec_ints)
134-
end
135-
136-
let group = addgroup!(group, "randn")
137-
group["Float32"] = @async_benchmarkable CUDA.randn(Float32, m*n)
138-
end
139-
140-
let group = addgroup!(group, "randn!")
141-
group["Float32"] = @async_benchmarkable CUDA.randn!($gpu_vec)
142-
end
143-
end
144-
145-
let group = addgroup!(group, "sorting")
146-
group["1d"] = @async_benchmarkable sort($gpu_vec)
147-
group["2d"] = @async_benchmarkable sort($gpu_mat; dims=1)
148-
group["by"] = @async_benchmarkable sort($gpu_vec; by=sin)
149-
end
150-
151-
let group = addgroup!(group, "permutedims")
152-
group["2d"] = @async_benchmarkable permutedims($gpu_mat, (2,1))
153-
group["3d"] = @async_benchmarkable permutedims($gpu_arr_3d, (3,1,2))
154-
group["4d"] = @async_benchmarkable permutedims($gpu_arr_4d, (2,1,4,3))
155-
end
65+
# group["broadcast"] = @async_benchmarkable $gpu_mat .= 0f0
66+
67+
# # no need to test inplace version, which performs the same operation (but with an alloc)
68+
# let group = addgroup!(group, "accumulate")
69+
# let group = addgroup!(group, "Float32")
70+
# group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec)
71+
# group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=1)
72+
# group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat; dims=2)
73+
74+
# group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=1)
75+
# group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long; dims=2)
76+
# end
77+
# let group = addgroup!(group, "Int64")
78+
# group["1d"] = @async_benchmarkable accumulate(+, $gpu_vec_ints)
79+
# group["dims=1"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=1)
80+
# group["dims=2"] = @async_benchmarkable accumulate(+, $gpu_mat_ints; dims=2)
81+
82+
# group["dims=1L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=1)
83+
# group["dims=2L"] = @async_benchmarkable accumulate(+, $gpu_mat_long_ints; dims=2)
84+
# end
85+
# end
86+
87+
# let group = addgroup!(group, "reductions")
88+
# let group = addgroup!(group, "reduce")
89+
# let group = addgroup!(group, "Float32")
90+
# group["1d"] = @async_benchmarkable reduce(+, $gpu_vec)
91+
# group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat; dims=1)
92+
# group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat; dims=2)
93+
# group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=1)
94+
# group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long; dims=2)
95+
# end
96+
# let group = addgroup!(group, "Int64")
97+
# group["1d"] = @async_benchmarkable reduce(+, $gpu_vec_ints)
98+
# group["dims=1"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=1)
99+
# group["dims=2"] = @async_benchmarkable reduce(+, $gpu_mat_ints; dims=2)
100+
# group["dims=1L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=1)
101+
# group["dims=2L"] = @async_benchmarkable reduce(+, $gpu_mat_long_ints; dims=2)
102+
# end
103+
# end
104+
105+
# let group = addgroup!(group, "mapreduce")
106+
# let group = addgroup!(group, "Float32")
107+
# group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec)
108+
# group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=1)
109+
# group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat; dims=2)
110+
# group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=1)
111+
# group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long; dims=2)
112+
# end
113+
# let group = addgroup!(group, "Int64")
114+
# group["1d"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_vec_ints)
115+
# group["dims=1"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=1)
116+
# group["dims=2"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_ints; dims=2)
117+
# group["dims=1L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=1)
118+
# group["dims=2L"] = @async_benchmarkable mapreduce(x->x+1, +, $gpu_mat_long_ints; dims=2)
119+
# end
120+
# end
121+
122+
# # used by sum, prod, minimum, maximum, all, any, count
123+
# end
124+
125+
# let group = addgroup!(group, "random")
126+
# let group = addgroup!(group, "rand")
127+
# group["Float32"] = @async_benchmarkable CUDA.rand(Float32, m*n)
128+
# group["Int64"] = @async_benchmarkable CUDA.rand(Int64, m*n)
129+
# end
130+
131+
# let group = addgroup!(group, "rand!")
132+
# group["Float32"] = @async_benchmarkable CUDA.rand!($gpu_vec)
133+
# group["Int64"] = @async_benchmarkable CUDA.rand!($gpu_vec_ints)
134+
# end
135+
136+
# let group = addgroup!(group, "randn")
137+
# group["Float32"] = @async_benchmarkable CUDA.randn(Float32, m*n)
138+
# end
139+
140+
# let group = addgroup!(group, "randn!")
141+
# group["Float32"] = @async_benchmarkable CUDA.randn!($gpu_vec)
142+
# end
143+
# end
144+
145+
# let group = addgroup!(group, "sorting")
146+
# group["1d"] = @async_benchmarkable sort($gpu_vec)
147+
# group["2d"] = @async_benchmarkable sort($gpu_mat; dims=1)
148+
# group["by"] = @async_benchmarkable sort($gpu_vec; by=sin)
149+
# end
150+
151+
# let group = addgroup!(group, "permutedims")
152+
# group["2d"] = @async_benchmarkable permutedims($gpu_mat, (2,1))
153+
# group["3d"] = @async_benchmarkable permutedims($gpu_arr_3d, (3,1,2))
154+
# group["4d"] = @async_benchmarkable permutedims($gpu_arr_4d, (2,1,4,3))
155+
# end

perf/runbenchmarks.jl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ end
1818

1919
# before anything else, run latency benchmarks. these spawn subprocesses, so we don't want
2020
# to do so after regular benchmarks have caused the memory allocator to reserve memory.
21-
@info "Running latency benchmarks"
22-
latency_results = include("latency.jl")
21+
# @info "Running latency benchmarks"
22+
# latency_results = include("latency.jl")
2323

2424
SUITE = BenchmarkGroup()
2525

26-
include("cuda.jl")
27-
include("kernel.jl")
26+
# include("cuda.jl")
27+
# include("kernel.jl")
2828
include("array.jl")
2929

3030
@info "Preparing main benchmarks"
@@ -36,20 +36,20 @@ GC.gc(true)
3636
CUDA.reclaim()
3737

3838
# benchmark groups that aren't part of the suite
39-
addgroup!(SUITE, "integration")
39+
# addgroup!(SUITE, "integration")
4040

4141
@info "Running main benchmarks"
4242
results = run(SUITE, verbose=true)
4343

4444
# integration tests (that do nasty things, so need to be run last)
45-
@info "Running integration benchmarks"
46-
integration_results = BenchmarkGroup()
47-
integration_results["volumerhs"] = include("volumerhs.jl")
48-
integration_results["byval"] = include("byval.jl")
49-
integration_results["cudadevrt"] = include("cudadevrt.jl")
50-
51-
results["latency"] = latency_results
52-
results["integration"] = integration_results
45+
# @info "Running integration benchmarks"
46+
# integration_results = BenchmarkGroup()
47+
# integration_results["volumerhs"] = include("volumerhs.jl")
48+
# integration_results["byval"] = include("byval.jl")
49+
# integration_results["cudadevrt"] = include("cudadevrt.jl")
50+
51+
# results["latency"] = latency_results
52+
# results["integration"] = integration_results
5353

5454
# write out the results
5555
result_file = length(ARGS) >= 1 ? ARGS[1] : "benchmarkresults.json"

0 commit comments

Comments
 (0)