Skip to content

Commit 15434c8

Browse files
authored
rewrite using CompilerPluginTools (#19)
* got interpreter working * update CI * reorganize * need to fix test cov * use generic target by default * parameterize target * yay! llvm codegen works * merge gate & ctrl to apply intrinsic * update test * update * use register intrinsic * emulation works * clean up * polish intrinsics * clean up * test pass yay * clean up * clean files * rm 1.7 for now * crashes somehow * fix test * fix optimization * add test back * polish dummy test * test printing
1 parent b1cd0d5 commit 15434c8

32 files changed

+1283
-3743
lines changed

.github/workflows/CI.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ jobs:
1010
fail-fast: false
1111
matrix:
1212
version:
13-
- '1.6.0-beta1'
14-
- 'nightly'
13+
- '1.6'
1514
os:
1615
- ubuntu-latest
1716
arch:

Project.toml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
11
name = "YaoCompiler"
22
uuid = "b3514fdc-7bcc-425b-920a-42c43c8eb4b4"
3-
authors = ["Roger-luo"]
3+
authors = ["Roger-Luo <rogerluo.rl18@gmail.com> and contributors"]
44
version = "0.1.0"
55

66
[deps]
7-
BitBasis = "50ba71b6-fa0f-514d-ae9a-0916efc90dcf"
8-
ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
7+
CompilerPluginTools = "6b7a57c9-7cc1-4fdf-b7f5-e857abae3638"
8+
Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d"
9+
Expronicon = "6b7a57c9-7cc1-4fdf-b7f5-e857abae3636"
10+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
11+
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
912
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
10-
LuxurySparse = "d05aeea4-b7d4-55ac-b691-9e7fabb07ba2"
1113
MLStyle = "d8e11817-5142-5d16-987a-aa16d5891078"
12-
OpenQASM = "a8821629-a4c0-4df7-9e00-12969ff383a7"
13-
RBNF = "83ef0002-5b9e-11e9-219b-65bac3c6d69c"
1414
TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
1515
YaoAPI = "0843a435-28de-4971-9e8b-a9641b2983a8"
1616
YaoLocations = "66df03fb-d475-48f7-b449-3d9064bf085b"
1717
ZXCalculus = "3525faa3-032d-4235-a8d4-8c2939a218dd"
1818

1919
[compat]
20-
ExprTools = "0.1"
21-
LuxurySparse = "0.6"
20+
CompilerPluginTools = "0.1"
21+
Configurations = "0.15"
22+
Expronicon = "0.6"
23+
GPUCompiler = "0.11"
24+
LLVM = "3.6"
25+
MLStyle = "0.4"
2226
TimerOutputs = "0.5"
2327
YaoAPI = "0.1"
2428
YaoLocations = "0.1"

src/YaoCompiler.jl

Lines changed: 58 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,123 +1,77 @@
11
module YaoCompiler
22

3-
export @device, @gate, @ctrl, @measure, @barrier
4-
# reflections
5-
export @code_yao, @code_qasm
6-
export gate_count
7-
export Intrinsics
3+
export @device, @gate, @ctrl, @measure, @barrier,
4+
compile,
5+
YaoInterpreter,
6+
YaoCompileTarget,
7+
JLGenericTarget,
8+
TargetHostKernel,
9+
HardwareFreeOptions,
10+
Routine,
11+
GenericRoutine,
12+
IntrinsicRoutine,
13+
Operation,
14+
AdjointOperation,
15+
routine_name,
16+
IntrinsicError,
17+
# reexport YaoLocations
18+
Locations,
19+
CtrlLocations
820

21+
using MLStyle
22+
using YaoAPI
23+
using LLVM
24+
using Expronicon
25+
using YaoLocations
26+
using TimerOutputs
927
using LinearAlgebra
28+
using GPUCompiler
29+
using Configurations
30+
using CompilerPluginTools
31+
using LLVM.Interop
32+
using GPUCompiler: CodeCache, CompilerJob, AbstractCompilerTarget, AbstractCompilerParams, WorldView
33+
using YaoLocations: map_check, map_check_nothrow, map_error, plain, unsafe_mapping
34+
using CompilerPluginTools: Argument
35+
using Base.Meta: ParseError
1036

11-
using TimerOutputs
1237
const to = TimerOutput()
1338
timings() = (TimerOutputs.print_timer(to); println())
1439
enable_timings() = (TimerOutputs.enable_debug_timings(Compiler); return)
1540

16-
using ExprTools
17-
using MLStyle
18-
using YaoAPI
19-
using BitBasis
20-
using ZXCalculus
21-
using YaoLocations
22-
using YaoLocations: map_check, map_check_nothrow, map_error, plain
23-
# include("runtime/locations.jl")
24-
25-
using Core:
26-
CodeInfo,
27-
SSAValue,
28-
Const,
29-
PartialStruct,
30-
Slot,
31-
GotoIfNot,
32-
GotoNode,
33-
SlotNumber,
34-
Argument,
35-
ReturnNode
36-
using Core.Compiler:
37-
InferenceParams,
38-
InferenceResult,
39-
OptimizationParams,
40-
OptimizationState,
41-
Bottom,
42-
AbstractInterpreter,
43-
VarTable,
44-
InferenceState,
45-
CFG,
46-
NewSSAValue,
47-
IRCode,
48-
InstructionStream,
49-
CallMeta
50-
using Core.Compiler:
51-
get_world_counter,
52-
get_inference_cache,
53-
may_optimize,
54-
isconstType,
55-
isconcretetype,
56-
widenconst,
57-
isdispatchtuple,
58-
isinlineable,
59-
is_inlineable_constant,
60-
copy_exprargs,
61-
convert_to_ircode,
62-
coverage_enabled,
63-
argtypes_to_type,
64-
userefs,
65-
UseRefIterator,
66-
UseRef,
67-
MethodResultPure,
68-
is_pure_intrinsic_infer,
69-
intrinsic_nothrow,
70-
quoted,
71-
anymap,
72-
# Julia passes
73-
compact!,
74-
ssa_inlining_pass!,
75-
getfield_elim_pass!,
76-
adce_pass!,
77-
type_lift_pass!,
78-
verify_linetable,
79-
verify_ir,
80-
slot2reg
81-
82-
using Base.Meta: ParseError
83-
84-
export Routine,
85-
GenericRoutine,
86-
IntrinsicRoutine,
87-
RoutineSpec,
88-
IntrinsicSpec,
89-
@ctrl,
90-
@measure,
91-
@gate,
92-
@barrier,
93-
@device
94-
export routine_name
41+
@as_record Locations
42+
@as_record CtrlLocations
9543

96-
include("compiler/patch.jl")
97-
include("compiler/routine.jl")
44+
include("compiler/types.jl")
45+
include("compiler/printing.jl")
9846
include("compiler/intrinsics.jl")
99-
include("compiler/qasm.jl")
47+
include("compiler/syntax.jl")
48+
include("compiler/interp.jl")
10049

101-
using .QASM: @qasm_str
102-
export @qasm_str
50+
include("codegen/llvmopt.jl")
51+
include("codegen/native.jl")
10352

104-
# compiler internal extensions
105-
include("compiler/interpreter.jl")
106-
include("compiler/codeinfo.jl")
107-
include("compiler/optimize.jl")
108-
109-
# code generators
110-
include("compiler/codegen/codegen.jl")
111-
112-
include("compiler/reflection.jl")
113-
include("compiler/utils.jl")
114-
# include("compiler/validation.jl")
115-
# include("compiler/trace.jl")
53+
# We have one global JIT and TM
54+
const orc = Ref{LLVM.OrcJIT}()
55+
const tm = Ref{LLVM.TargetMachine}()
11656

11757
function __init__()
11858
TimerOutputs.reset_timer!(to)
119-
end
59+
opt_level = Base.JLOptions().opt_level
60+
if opt_level < 2
61+
optlevel = LLVM.API.LLVMCodeGenLevelNone
62+
elseif opt_level == 2
63+
optlevel = LLVM.API.LLVMCodeGenLevelDefault
64+
else
65+
optlevel = LLVM.API.LLVMCodeGenLevelAggressive
66+
end
12067

121-
include("runtime/intrinsics.jl")
68+
tm[] = LLVM.JITTargetMachine(; optlevel=optlevel)
69+
LLVM.asm_verbosity!(tm[], true)
70+
71+
orc[] = LLVM.OrcJIT(tm[]) # takes ownership of tm
72+
atexit() do
73+
return LLVM.dispose(orc[])
74+
end
75+
end
12276

12377
end # module

src/codegen/llvmopt.jl

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#####
2+
##### LLVM optimization pipeline
3+
#####
4+
5+
# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L603
6+
function addTargetPasses!(pm, tm)
7+
add_library_info!(pm, LLVM.triple(tm))
8+
return add_transform_info!(pm, tm)
9+
end
10+
11+
# TODO (Missing C-API):
12+
# - https://reviews.llvm.org/D86764 adds InstSimplify
13+
# - createDivRemPairs
14+
# - createLoopLoadEliminationPass
15+
# - createVectorCombinePass
16+
# TODO (Missing LLVM.jl)
17+
# - AggressiveInstCombinePass
18+
19+
# https://github.com/JuliaLang/julia/blob/2eb5da0e25756c33d1845348836a0a92984861ac/src/aotcompile.cpp#L620
20+
function addOptimizationPasses!(pm, tm, opt_level, lower_intrinsics, dump_native)
21+
constant_merge!(pm)
22+
if opt_level < 2
23+
error("opt_level less than 2 not supported")
24+
return
25+
end
26+
27+
propagate_julia_addrsp!(pm)
28+
scoped_no_alias_aa!(pm)
29+
type_based_alias_analysis!(pm)
30+
if opt_level >= 3
31+
basic_alias_analysis!(pm)
32+
end
33+
cfgsimplification!(pm)
34+
dce!(pm)
35+
scalar_repl_aggregates!(pm)
36+
37+
# mem_cpy_opt!(pm)
38+
39+
always_inliner!(pm) # Respect always_inline
40+
41+
# Running `memcpyopt` between this and `sroa` seems to give `sroa` a hard time
42+
# merging the `alloca` for the unboxed data and the `alloca` created by the `alloc_opt`
43+
# pass.
44+
45+
alloc_opt!(pm)
46+
# consider AggressiveInstCombinePass at optlevel > 2
47+
48+
instruction_combining!(pm)
49+
cfgsimplification!(pm)
50+
if dump_native
51+
error("dump_native not supported")
52+
# TODO: createMultiversoningPass
53+
end
54+
scalar_repl_aggregates!(pm)
55+
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
56+
jump_threading!(pm)
57+
58+
reassociate!(pm)
59+
60+
early_cse!(pm)
61+
62+
# Load forwarding above can expose allocations that aren't actually used
63+
# remove those before optimizing loops.
64+
alloc_opt!(pm)
65+
loop_rotate!(pm)
66+
# moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
67+
loop_idiom!(pm)
68+
69+
# TODO: Polly (Quo vadis?)
70+
71+
# LoopRotate strips metadata from terminator, so run LowerSIMD afterwards
72+
lower_simdloop!(pm) # Annotate loop marked with "loopinfo" as LLVM parallel loop
73+
licm!(pm)
74+
julia_licm!(pm)
75+
# Subsequent passes not stripping metadata from terminator
76+
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
77+
ind_var_simplify!(pm)
78+
loop_deletion!(pm)
79+
loop_unroll!(pm) # TODO: in Julia createSimpleLoopUnroll
80+
81+
# Run our own SROA on heap objects before LLVM's
82+
alloc_opt!(pm)
83+
# Re-run SROA after loop-unrolling (useful for small loops that operate,
84+
# over the structure of an aggregate)
85+
scalar_repl_aggregates!(pm)
86+
instruction_combining!(pm) # TODO: createInstSimplifyLegacy
87+
88+
gvn!(pm)
89+
mem_cpy_opt!(pm)
90+
sccp!(pm)
91+
92+
# Run instcombine after redundancy elimination to exploit opportunities
93+
# opened up by them.
94+
# This needs to be InstCombine instead of InstSimplify to allow
95+
# loops over Union-typed arrays to vectorize.
96+
instruction_combining!(pm)
97+
jump_threading!(pm)
98+
dead_store_elimination!(pm)
99+
100+
# More dead allocation (store) deletion before loop optimization
101+
# consider removing this:
102+
alloc_opt!(pm)
103+
104+
# see if all of the constant folding has exposed more loops
105+
# to simplification and deletion
106+
# this helps significantly with cleaning up iteration
107+
cfgsimplification!(pm)
108+
loop_deletion!(pm)
109+
instruction_combining!(pm)
110+
loop_vectorize!(pm)
111+
# TODO: createLoopLoadEliminationPass
112+
cfgsimplification!(pm)
113+
slpvectorize!(pm)
114+
# might need this after LLVM 11:
115+
# TODO: createVectorCombinePass()
116+
117+
aggressive_dce!(pm)
118+
119+
if lower_intrinsics
120+
# LowerPTLS removes an indirect call. As a result, it is likely to trigger
121+
# LLVM's devirtualization heuristics, which would result in the entire
122+
# pass pipeline being re-exectuted. Prevent this by inserting a barrier.
123+
barrier_noop!(pm)
124+
lower_exc_handlers!(pm)
125+
gc_invariant_verifier!(pm, false)
126+
# Needed **before** LateLowerGCFrame on LLVM < 12
127+
# due to bug in `CreateAlignmentAssumption`.
128+
remove_ni!(pm)
129+
late_lower_gc_frame!(pm)
130+
final_lower_gc!(pm)
131+
# We need these two passes and the instcombine below
132+
# after GC lowering to let LLVM do some constant propagation on the tags.
133+
# and remove some unnecessary write barrier checks.
134+
gvn!(pm)
135+
sccp!(pm)
136+
# Remove dead use of ptls
137+
dce!(pm)
138+
lower_ptls!(pm, dump_native)
139+
instruction_combining!(pm)
140+
# Clean up write barrier and ptls lowering
141+
cfgsimplification!(pm)
142+
else
143+
remove_ni!(pm)
144+
end
145+
return combine_mul_add!(pm)
146+
# TODO: createDivRemPairs[]
147+
end
148+
149+
function addMachinePasses!(pm, tm)
150+
demote_float16!(pm)
151+
return gvn!(pm)
152+
end
153+
154+
function run_pipeline!(mod::LLVM.Module)
155+
LLVM.ModulePassManager() do pm
156+
addTargetPasses!(pm, tm[])
157+
addOptimizationPasses!(pm, tm[], 3, true, false)
158+
addMachinePasses!(pm, tm[])
159+
return run!(pm, mod)
160+
end
161+
end

0 commit comments

Comments
 (0)