Skip to content

Commit 2d76cfa

Browse files
committed
Move control packet data to end of runtime_sequence argument list
1 parent 02bc066 commit 2d76cfa

File tree

7 files changed

+77
-59
lines changed

7 files changed

+77
-59
lines changed

lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,9 @@ static LogicalResult convertTransactionOpsToMLIR(
408408
llvm_unreachable("bad output type");
409409
}
410410

411+
if (!configureOps.empty())
412+
configureOps.front().erase();
413+
411414
return success();
412415
}
413416

lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,6 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
7979
builder.create<AIEX::RuntimeSequenceOp>(loc, f.getSymNameAttr());
8080
newSeq.getBody().push_back(new Block);
8181

82-
// Using dynamic shape for ctrl pkt stream.
83-
auto ctrlPktMemrefType = MemRefType::get(
84-
ShapedType::kDynamic, IntegerType::get(ctx, 32), nullptr, 0);
85-
auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc);
8682
// Copy the arguments from the old sequence to the new one.
8783
for (auto arg : f.getBody().getArguments()) {
8884
// Add the argument to the new sequence.
@@ -92,6 +88,12 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
9288
// Add the mapping for the argument.
9389
mapping.map(arg, newArg);
9490
}
91+
92+
// Using dynamic shape for ctrl pkt stream.
93+
auto ctrlPktMemrefType = MemRefType::get(
94+
ShapedType::kDynamic, IntegerType::get(ctx, 32), nullptr, 0);
95+
auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc);
96+
9597
builder.setInsertionPointToStart(&newSeq.getBody().front());
9698

9799
int64_t ddrOffset = 0;

python/compiler/aiecc/main.py

Lines changed: 49 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import aie.compiler.aiecc.cl_arguments
3232
import aie.compiler.aiecc.configure
3333
from aie.dialects import aie as aiedialect
34+
from aie.dialects import aiex as aiexdialect
3435
from aie.ir import Context, Location, Module
3536
from aie.passmanager import PassManager
3637

@@ -614,7 +615,9 @@ async def process_txn(self, module_str):
614615
print(f"copy {tmp} to {opts.txn_name}")
615616
shutil.copy(tmp, opts.txn_name)
616617

617-
async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
618+
async def aiebu_asm(
619+
self, input_file, output_file, ctrl_packet_file=None, ctrl_packet_idx=0
620+
):
618621

619622
# find aiebu-asm binary
620623
asm_bin = "aiebu-asm"
@@ -645,7 +648,7 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
645648
exteral_buffers_json = {
646649
"external_buffers": {
647650
"buffer_ctrl": {
648-
"xrt_id": 0,
651+
"xrt_id": ctrl_packet_idx,
649652
"logical_id": -1,
650653
"size_in_bytes": ctrl_packet_size,
651654
"ctrl_pkt_buffer": 1,
@@ -665,43 +668,53 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
665668
await self.do_call(None, args)
666669

667670
async def process_ctrlpkt(self, module_str):
668-
with Context(), Location.unknown():
669-
run_passes(
670-
"builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
671-
+ self.tmpdirname
672-
+ "}))",
673-
module_str,
671+
run_passes(
672+
"builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
673+
+ self.tmpdirname
674+
+ "}))",
675+
module_str,
676+
self.prepend_tmp("ctrlpkt.mlir"),
677+
self.opts.verbose,
678+
)
679+
await self.do_call(
680+
None,
681+
[
682+
"aie-translate",
683+
"-aie-ctrlpkt-to-bin",
674684
self.prepend_tmp("ctrlpkt.mlir"),
675-
self.opts.verbose,
676-
)
677-
await self.do_call(
678-
None,
679-
[
680-
"aie-translate",
681-
"-aie-ctrlpkt-to-bin",
682-
self.prepend_tmp("ctrlpkt.mlir"),
683-
"-o",
684-
"ctrlpkt.bin",
685-
],
686-
)
687-
ctrlpkt_mlir_str = await read_file_async(self.prepend_tmp("ctrlpkt.mlir"))
688-
run_passes(
689-
"builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))",
690-
ctrlpkt_mlir_str,
685+
"-o",
686+
"ctrlpkt.bin",
687+
],
688+
)
689+
ctrlpkt_mlir_str = await read_file_async(self.prepend_tmp("ctrlpkt.mlir"))
690+
run_passes(
691+
"builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))",
692+
ctrlpkt_mlir_str,
693+
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
694+
self.opts.verbose,
695+
)
696+
await self.do_call(
697+
None,
698+
[
699+
"aie-translate",
700+
"-aie-npu-to-binary",
691701
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
692-
self.opts.verbose,
693-
)
694-
await self.do_call(
695-
None,
696-
[
697-
"aie-translate",
698-
"-aie-npu-to-binary",
699-
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
700-
"-o",
701-
opts.insts_name,
702-
],
702+
"-o",
703+
opts.insts_name,
704+
],
705+
)
706+
ctrl_idx = 0
707+
ctrl_seq_str = await read_file_async(self.prepend_tmp("ctrlpkt_dma_seq.mlir"))
708+
with Context(), Location.unknown():
709+
dma_seq_module = Module.parse(ctrl_seq_str)
710+
# walk through the dma sequence module to find runtime sequence
711+
seqs = find_ops(
712+
dma_seq_module.operation,
713+
lambda o: isinstance(o.operation.opview, aiexdialect.RuntimeSequenceOp),
703714
)
704-
await self.aiebu_asm(opts.insts_name, opts.elf_name, "ctrlpkt.bin")
715+
if seqs:
716+
ctrl_idx = len(seqs[0].regions[0].blocks[0].arguments.types) - 1
717+
await self.aiebu_asm(opts.insts_name, opts.elf_name, "ctrlpkt.bin", ctrl_idx)
705718

706719
async def process_elf(self, module_str):
707720
with Context(), Location.unknown():

test/npu-xrt/ctrl_packet_reconfig/test.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,11 @@ int main(int argc, const char *argv[]) {
6868
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
6969
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
7070
auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(IN_DATATYPE),
71-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
71+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
7272
auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(OUT_DATATYPE),
73-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
73+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
7474
auto bo_ctrlpkt = xrt::bo(device, ctrlPackets.size() * sizeof(int32_t),
75-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
75+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
7676

7777
IN_DATATYPE *bufInA = bo_inA.map<IN_DATATYPE *>();
7878
std::vector<IN_DATATYPE> srcVecA;
@@ -101,9 +101,9 @@ int main(int argc, const char *argv[]) {
101101
run1.set_arg(0, opcode);
102102
run1.set_arg(1, bo_instr);
103103
run1.set_arg(2, instr_v.size());
104-
run1.set_arg(3, bo_ctrlpkt);
105-
run1.set_arg(4, bo_inA);
106-
run1.set_arg(5, bo_out);
104+
run1.set_arg(3, bo_inA);
105+
run1.set_arg(4, bo_out);
106+
run1.set_arg(5, bo_ctrlpkt);
107107

108108
// Executing and waiting on the runlist
109109
runlist.add(run1);

test/npu-xrt/ctrl_packet_reconfig_1x4_cores/test.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@ int main(int argc, const char *argv[]) {
6161
auto kernel = xrt::kernel(context, kernelName);
6262

6363
auto bo_ctrlpkt = xrt::bo(device, ctrlPackets.size() * sizeof(int32_t),
64-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
64+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
6565
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
6666
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
6767
auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(IN_DATATYPE),
68-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
68+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
6969
auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(OUT_DATATYPE),
70-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
70+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
7171

7272
IN_DATATYPE *bufInA = bo_inA.map<IN_DATATYPE *>();
7373
std::vector<IN_DATATYPE> srcVecA;
@@ -96,9 +96,9 @@ int main(int argc, const char *argv[]) {
9696
run1.set_arg(0, opcode);
9797
run1.set_arg(1, bo_instr);
9898
run1.set_arg(2, instr_v.size());
99-
run1.set_arg(3, bo_ctrlpkt);
100-
run1.set_arg(4, bo_inA);
101-
run1.set_arg(5, bo_out);
99+
run1.set_arg(3, bo_inA);
100+
run1.set_arg(4, bo_out);
101+
run1.set_arg(5, bo_ctrlpkt);
102102

103103
// Executing and waiting on the runlist
104104
runlist.add(run1);

test/npu-xrt/ctrl_packet_reconfig_4x1_cores/test.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@ int main(int argc, const char *argv[]) {
6161
auto kernel = xrt::kernel(context, kernelName);
6262

6363
auto bo_ctrlpkt = xrt::bo(device, ctrlPackets.size() * sizeof(int32_t),
64-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
64+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
6565
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
6666
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
6767
auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(IN_DATATYPE),
68-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
68+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
6969
auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(OUT_DATATYPE),
70-
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));
70+
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
7171

7272
IN_DATATYPE *bufInA = bo_inA.map<IN_DATATYPE *>();
7373
std::vector<IN_DATATYPE> srcVecA;
@@ -96,9 +96,9 @@ int main(int argc, const char *argv[]) {
9696
run1.set_arg(0, opcode);
9797
run1.set_arg(1, bo_instr);
9898
run1.set_arg(2, instr_v.size());
99-
run1.set_arg(3, bo_ctrlpkt);
100-
run1.set_arg(4, bo_inA);
101-
run1.set_arg(5, bo_out);
99+
run1.set_arg(3, bo_inA);
100+
run1.set_arg(4, bo_out);
101+
run1.set_arg(5, bo_ctrlpkt);
102102

103103
// Executing and waiting on the runlist
104104
runlist.add(run1);

test/npu-xrt/ctrl_packet_reconfig_elf/test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ int main(int argc, const char *argv[]) {
7575

7676
unsigned int opcode = 3;
7777

78-
kernel0(opcode, 0, 0, 0, bo_in, bo_out).wait2();
78+
kernel0(opcode, 0, 0, bo_in, bo_out).wait2();
7979

8080
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
8181

0 commit comments

Comments
 (0)