Skip to content

Commit aae9688

Browse files
committed
Sketch out npu.configure op with ctrl pkt config
fixes fixup tests Move control packet data to end of runtime_sequence argument list
1 parent 366fd0e commit aae9688

File tree

13 files changed

+164
-163
lines changed

13 files changed

+164
-163
lines changed

include/aie/Dialect/AIEX/IR/AIEX.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,16 @@ def AIE_NpuControlPacketOp: AIEX_Op<"control_packet", []> {
907907
}];
908908
}
909909

910+
def AIE_NpuConfigureOp: AIEX_Op<"npu.configure", []> {
911+
let summary = "Configure NPU operation";
912+
let arguments = (ins );
913+
let results = (outs );
914+
let assemblyFormat = [{ attr-dict }];
915+
let description = [{
916+
This operation is used to configure the NPU.
917+
}];
918+
}
919+
910920
// NPU Bd Write operation
911921
def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
912922
let summary = "dma operator";

lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,7 @@ emitTransactionOps(OpBuilder &builder,
219219
auto loc = builder.getUnknownLoc();
220220

221221
// create the txn ops
222-
for (auto p : llvm::zip(operations, global_data)) {
223-
auto op = std::get<0>(p);
224-
memref::GlobalOp payload = std::get<1>(p);
222+
for (auto [op, payload] : llvm::zip(operations, global_data)) {
225223

226224
if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
227225
builder.create<AIEX::NpuWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
@@ -255,9 +253,7 @@ emitControlPacketOps(OpBuilder &builder,
255253
auto ctx = builder.getContext();
256254

257255
// create the control packet ops
258-
for (auto p : llvm::zip(operations, global_data)) {
259-
auto op = std::get<0>(p);
260-
memref::GlobalOp payload = std::get<1>(p);
256+
for (auto [op, payload] : llvm::zip(operations, global_data)) {
261257

262258
if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
263259
builder.create<AIEX::NpuControlPacketOp>(
@@ -266,10 +262,10 @@ emitControlPacketOps(OpBuilder &builder,
266262
/*stream_id*/ builder.getI32IntegerAttr(0),
267263
DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
268264
} else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
269-
if (!std::get<1>(p).getInitialValue())
265+
if (!payload.getInitialValue())
270266
continue;
271267
auto blockWriteData =
272-
dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
268+
dyn_cast<DenseIntElementsAttr>(*payload.getInitialValue());
273269
if (!blockWriteData) {
274270
payload.emitError(
275271
"Global symbol initial value is not a dense int array");
@@ -383,30 +379,44 @@ static LogicalResult convertTransactionOpsToMLIR(
383379
global_data.push_back(global);
384380
}
385381

386-
// create aiex.runtime_sequence
387-
int id = 0;
388-
std::string seq_name = "configure";
389-
while (device.lookupSymbol(seq_name))
390-
seq_name = "configure" + std::to_string(id++);
391-
StringAttr seq_sym_name = builder.getStringAttr(seq_name);
392-
auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
393-
seq.getBody().push_back(new Block);
382+
// search for npu.configure ops in runtime sequences by walking the device
383+
// and collect them in a vector.
384+
SmallVector<AIEX::NpuConfigureOp> configureOps;
385+
device.walk([&](AIEX::NpuConfigureOp op) { configureOps.push_back(op); });
386+
387+
if (configureOps.empty()) {
388+
389+
// create aiex.runtime_sequence
390+
int id = 0;
391+
std::string seq_name = "configure";
392+
while (device.lookupSymbol(seq_name))
393+
seq_name = "configure" + std::to_string(id++);
394+
StringAttr seq_sym_name = builder.getStringAttr(seq_name);
395+
auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
396+
seq.getBody().push_back(new Block);
397+
398+
builder.setInsertionPointToStart(&seq.getBody().front());
399+
} else {
400+
builder.setInsertionPoint(configureOps.front());
401+
}
394402

395403
// create the txn ops
396-
builder.setInsertionPointToStart(&seq.getBody().front());
397404
if (outputType == OutputType::Transaction) {
398405
if (failed(emitTransactionOps(builder, operations, global_data)))
399406
return failure();
400407
} else if (outputType == OutputType::ControlPacket) {
401408
if (failed(emitControlPacketOps(builder, operations, global_data)))
402409
return failure();
403410
// resolve mask writes; control packet doesn't natively support mask write.
404-
if (failed(orConsecutiveWritesOnSameAddr(&seq.getBody().front())))
411+
if (failed(orConsecutiveWritesOnSameAddr(builder.getBlock())))
405412
return failure();
406413
} else {
407414
llvm_unreachable("bad output type");
408415
}
409416

417+
if (!configureOps.empty())
418+
configureOps.front().erase();
419+
410420
return success();
411421
}
412422

lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h"
1515

1616
#include "mlir/IR/Attributes.h"
17+
#include "mlir/IR/IRMapping.h"
1718
#include "mlir/Pass/Pass.h"
1819

1920
#include "llvm/ADT/TypeSwitch.h"
@@ -72,21 +73,34 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
7273

7374
OpBuilder builder(f);
7475

76+
IRMapping mapping;
77+
7578
auto newSeq =
7679
builder.create<AIEX::RuntimeSequenceOp>(loc, f.getSymNameAttr());
7780
newSeq.getBody().push_back(new Block);
7881

82+
// Copy the arguments from the old sequence to the new one.
83+
for (auto arg : f.getBody().getArguments()) {
84+
// Add the argument to the new sequence.
85+
auto newArg = newSeq.getBody().addArgument(arg.getType(), arg.getLoc());
86+
// Replace all uses of the old argument with the new one.
87+
arg.replaceAllUsesWith(newArg);
88+
// Add the mapping for the argument.
89+
mapping.map(arg, newArg);
90+
}
91+
7992
// Using dynamic shape for ctrl pkt stream.
8093
auto ctrlPktMemrefType = MemRefType::get(
8194
ShapedType::kDynamic, IntegerType::get(ctx, 32), nullptr, 0);
8295
auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc);
96+
8397
builder.setInsertionPointToStart(&newSeq.getBody().front());
8498

8599
int64_t ddrOffset = 0;
86100
Block &entry = f.getBody().front();
87101
for (auto &o : entry) {
88-
llvm::TypeSwitch<Operation *>(&o).Case<NpuControlPacketOp>(
89-
[&](auto op) {
102+
llvm::TypeSwitch<Operation *>(&o)
103+
.Case<NpuControlPacketOp>([&](auto op) {
90104
// Destination tile info
91105
int col = op.getColumnFromAddr();
92106
int row = op.getRowFromAddr();
@@ -131,6 +145,10 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
131145
auto row_num = builder.getI32IntegerAttr(1);
132146
builder.create<AIEX::NpuSyncOp>(loc, shimCol, shimRow, dir, chan,
133147
col_num, row_num);
148+
})
149+
.Default([&](Operation *op) {
150+
// For all other operations, just clone them to the new sequence.
151+
builder.clone(*op, mapping);
134152
});
135153
}
136154

lib/Targets/AIETargetNPU.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,11 @@ LogicalResult xilinx::AIE::AIETranslateControlPacketsToUI32Vec(
330330
int row = packetOp.getRowFromAddr();
331331
auto destTile = TileOp::getOrCreate(builder, deviceOp, col, row);
332332
auto info = destTile->getAttrOfType<AIE::PacketInfoAttr>("controller_id");
333-
if (!info)
334-
return destTile->emitError("Expected controller_id attribute");
335-
uint32_t hdr = (info.getPktType() & 0x7) << 12 | (info.getPktId() & 0xff);
333+
uint32_t hdr = 0;
334+
if (info)
335+
hdr = (info.getPktType() & 0x7) << 12 | (info.getPktId() & 0xff);
336+
else
337+
destTile->emitWarning("Expected controller_id attribute");
336338
words[0] = hdr | (0x1 & parity(hdr)) << 31;
337339

338340
// control packet header

python/compiler/aiecc/main.py

Lines changed: 74 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import aie.compiler.aiecc.cl_arguments
3232
import aie.compiler.aiecc.configure
3333
from aie.dialects import aie as aiedialect
34+
from aie.dialects import aiex as aiexdialect
3435
from aie.ir import Context, Location, Module
3536
from aie.passmanager import PassManager
3637

@@ -614,7 +615,9 @@ async def process_txn(self, module_str):
614615
print(f"copy {tmp} to {opts.txn_name}")
615616
shutil.copy(tmp, opts.txn_name)
616617

617-
async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
618+
async def aiebu_asm(
619+
self, input_file, output_file, ctrl_packet_file=None, ctrl_packet_idx=0
620+
):
618621

619622
# find aiebu-asm binary
620623
asm_bin = "aiebu-asm"
@@ -645,7 +648,7 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
645648
exteral_buffers_json = {
646649
"external_buffers": {
647650
"buffer_ctrl": {
648-
"xrt_id": 0,
651+
"xrt_id": ctrl_packet_idx,
649652
"logical_id": -1,
650653
"size_in_bytes": ctrl_packet_size,
651654
"ctrl_pkt_buffer": 1,
@@ -665,49 +668,53 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
665668
await self.do_call(None, args)
666669

667670
async def process_ctrlpkt(self, module_str):
668-
with Context(), Location.unknown():
669-
run_passes(
670-
"builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
671-
+ self.tmpdirname
672-
+ "}))",
673-
module_str,
671+
run_passes(
672+
"builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
673+
+ self.tmpdirname
674+
+ "}))",
675+
module_str,
676+
self.prepend_tmp("ctrlpkt.mlir"),
677+
self.opts.verbose,
678+
)
679+
await self.do_call(
680+
None,
681+
[
682+
"aie-translate",
683+
"-aie-ctrlpkt-to-bin",
674684
self.prepend_tmp("ctrlpkt.mlir"),
675-
self.opts.verbose,
676-
)
677-
await self.do_call(
678-
None,
679-
[
680-
"aie-translate",
681-
"-aie-ctrlpkt-to-bin",
682-
"-aie-sequence-name",
683-
"configure",
684-
self.prepend_tmp("ctrlpkt.mlir"),
685-
"-o",
686-
"ctrlpkt.bin",
687-
],
688-
)
689-
ctrlpkt_mlir_str = await read_file_async(self.prepend_tmp("ctrlpkt.mlir"))
690-
run_passes(
691-
"builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))",
692-
ctrlpkt_mlir_str,
685+
"-o",
686+
"ctrlpkt.bin",
687+
],
688+
)
689+
ctrlpkt_mlir_str = await read_file_async(self.prepend_tmp("ctrlpkt.mlir"))
690+
run_passes(
691+
"builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))",
692+
ctrlpkt_mlir_str,
693+
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
694+
self.opts.verbose,
695+
)
696+
await self.do_call(
697+
None,
698+
[
699+
"aie-translate",
700+
"-aie-npu-to-binary",
693701
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
694-
self.opts.verbose,
695-
)
696-
await self.do_call(
697-
None,
698-
[
699-
"aie-translate",
700-
"-aie-npu-to-binary",
701-
"-aie-sequence-name",
702-
"configure",
703-
self.prepend_tmp("ctrlpkt_dma_seq.mlir"),
704-
"-o",
705-
"ctrlpkt_dma_seq.bin",
706-
],
707-
)
708-
await self.aiebu_asm(
709-
"ctrlpkt_dma_seq.bin", "ctrlpkt_dma_seq.elf", "ctrlpkt.bin"
702+
"-o",
703+
opts.insts_name,
704+
],
705+
)
706+
ctrl_idx = 0
707+
ctrl_seq_str = await read_file_async(self.prepend_tmp("ctrlpkt_dma_seq.mlir"))
708+
with Context(), Location.unknown():
709+
dma_seq_module = Module.parse(ctrl_seq_str)
710+
# walk through the dma sequence module to find runtime sequence
711+
seqs = find_ops(
712+
dma_seq_module.operation,
713+
lambda o: isinstance(o.operation.opview, aiexdialect.RuntimeSequenceOp),
710714
)
715+
if seqs:
716+
ctrl_idx = len(seqs[0].regions[0].blocks[0].arguments.types) - 1
717+
await self.aiebu_asm(opts.insts_name, opts.elf_name, "ctrlpkt.bin", ctrl_idx)
711718

712719
async def process_elf(self, module_str):
713720
with Context(), Location.unknown():
@@ -1233,30 +1240,6 @@ async def run_flow(self):
12331240
exit(-3)
12341241
aie_peano_target = aie_target.lower() + "-none-unknown-elf"
12351242

1236-
# Optionally generate insts.txt for NPU instruction stream
1237-
if opts.npu:
1238-
with Context(), Location.unknown():
1239-
file_with_addresses_module = Module.parse(
1240-
await read_file_async(file_with_addresses)
1241-
)
1242-
pass_pipeline = NPU_LOWERING_PIPELINE.materialize(module=True)
1243-
npu_insts_file = (
1244-
self.prepend_tmp("npu_insts.mlir")
1245-
if self.opts.verbose
1246-
else None
1247-
)
1248-
npu_insts_module = run_passes_module(
1249-
pass_pipeline,
1250-
file_with_addresses_module,
1251-
npu_insts_file,
1252-
self.opts.verbose,
1253-
)
1254-
npu_insts = aiedialect.translate_npu_to_binary(
1255-
npu_insts_module.operation
1256-
)
1257-
with open(opts.insts_name, "wb") as f:
1258-
f.write(struct.pack("I" * len(npu_insts), *npu_insts))
1259-
12601243
# fmt: off
12611244
if opts.unified:
12621245
file_opt_with_addresses = self.prepend_tmp("input_opt_with_addresses.mlir")
@@ -1349,6 +1332,30 @@ async def run_flow(self):
13491332
if (opts.cdo or opts.xcl or opts.pdi) and opts.execute:
13501333
await self.process_cdo(input_physical_str)
13511334

1335+
# Optionally generate insts.txt for NPU instruction stream
1336+
if opts.npu and not opts.ctrlpkt:
1337+
with Context(), Location.unknown():
1338+
file_with_addresses_module = Module.parse(
1339+
await read_file_async(file_with_addresses)
1340+
)
1341+
pass_pipeline = NPU_LOWERING_PIPELINE.materialize(module=True)
1342+
npu_insts_file = (
1343+
self.prepend_tmp("npu_insts.mlir")
1344+
if self.opts.verbose
1345+
else None
1346+
)
1347+
npu_insts_module = run_passes_module(
1348+
pass_pipeline,
1349+
file_with_addresses_module,
1350+
npu_insts_file,
1351+
self.opts.verbose,
1352+
)
1353+
npu_insts = aiedialect.translate_npu_to_binary(
1354+
npu_insts_module.operation
1355+
)
1356+
with open(opts.insts_name, "wb") as f:
1357+
f.write(struct.pack("I" * len(npu_insts), *npu_insts))
1358+
13521359
processes = []
13531360
if opts.xcl:
13541361
processes.append(self.process_xclbin_gen())
@@ -1363,7 +1370,7 @@ async def run_flow(self):
13631370
if opts.ctrlpkt and opts.execute:
13641371
processes.append(self.process_ctrlpkt(input_physical_str))
13651372

1366-
if opts.elf and opts.execute:
1373+
if opts.elf and not opts.ctrlpkt and opts.execute:
13671374
processes.append(self.process_elf(input_physical_str))
13681375

13691376
await asyncio.gather(*processes)

test/npu-xrt/ctrl_packet_reconfig/aie2.mlir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,13 @@ module {
6767

6868
aie.shim_dma_allocation @objFifo_in0(MM2S, 0, 0)
6969

70-
aiex.runtime_sequence @run(%arg0: memref<?xi8>, %arg1: memref<64x64xi8>, %arg2: memref<64x64xi8>) {
70+
aiex.runtime_sequence @run(%arg1: memref<64x64xi8>, %arg2: memref<64x64xi8>) {
7171
%c0_i64 = arith.constant 0 : i64
7272
%c1_i64 = arith.constant 1 : i64
7373
%c56_i64 = arith.constant 56 : i64
7474
%c61_i64 = arith.constant 61 : i64
7575
%c64_i64 = arith.constant 64 : i64
76+
aiex.npu.configure
7677
aiex.npu.dma_memcpy_nd (%arg1[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c64_i64, %c64_i64][%c0_i64, %c0_i64, %c64_i64, %c1_i64], packet = <pkt_id = 3, pkt_type = 0>) {id = 0 : i64, metadata = @objFifo_in0} : memref<64x64xi8>
7778
aiex.npu.dma_memcpy_nd (%arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c64_i64, %c64_i64][%c0_i64, %c0_i64, %c64_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0, issue_token = true} : memref<64x64xi8>
7879
aiex.npu.dma_wait { symbol = @objFifo_out0 }

0 commit comments

Comments
 (0)