31
31
import aie .compiler .aiecc .cl_arguments
32
32
import aie .compiler .aiecc .configure
33
33
from aie .dialects import aie as aiedialect
34
+ from aie .dialects import aiex as aiexdialect
34
35
from aie .ir import Context , Location , Module
35
36
from aie .passmanager import PassManager
36
37
@@ -614,7 +615,9 @@ async def process_txn(self, module_str):
614
615
print (f"copy { tmp } to { opts .txn_name } " )
615
616
shutil .copy (tmp , opts .txn_name )
616
617
617
- async def aiebu_asm (self , input_file , output_file , ctrl_packet_file = None ):
618
+ async def aiebu_asm (
619
+ self , input_file , output_file , ctrl_packet_file = None , ctrl_packet_idx = 0
620
+ ):
618
621
619
622
# find aiebu-asm binary
620
623
asm_bin = "aiebu-asm"
@@ -645,7 +648,7 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
645
648
exteral_buffers_json = {
646
649
"external_buffers" : {
647
650
"buffer_ctrl" : {
648
- "xrt_id" : 0 ,
651
+ "xrt_id" : ctrl_packet_idx ,
649
652
"logical_id" : - 1 ,
650
653
"size_in_bytes" : ctrl_packet_size ,
651
654
"ctrl_pkt_buffer" : 1 ,
@@ -665,49 +668,53 @@ async def aiebu_asm(self, input_file, output_file, ctrl_packet_file=None):
665
668
await self .do_call (None , args )
666
669
667
670
async def process_ctrlpkt (self , module_str ):
668
- with Context (), Location .unknown ():
669
- run_passes (
670
- "builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
671
- + self .tmpdirname
672
- + "}))" ,
673
- module_str ,
671
+ run_passes (
672
+ "builtin.module(aie.device(convert-aie-to-control-packets{elf-dir="
673
+ + self .tmpdirname
674
+ + "}))" ,
675
+ module_str ,
676
+ self .prepend_tmp ("ctrlpkt.mlir" ),
677
+ self .opts .verbose ,
678
+ )
679
+ await self .do_call (
680
+ None ,
681
+ [
682
+ "aie-translate" ,
683
+ "-aie-ctrlpkt-to-bin" ,
674
684
self .prepend_tmp ("ctrlpkt.mlir" ),
675
- self .opts .verbose ,
676
- )
677
- await self .do_call (
678
- None ,
679
- [
680
- "aie-translate" ,
681
- "-aie-ctrlpkt-to-bin" ,
682
- "-aie-sequence-name" ,
683
- "configure" ,
684
- self .prepend_tmp ("ctrlpkt.mlir" ),
685
- "-o" ,
686
- "ctrlpkt.bin" ,
687
- ],
688
- )
689
- ctrlpkt_mlir_str = await read_file_async (self .prepend_tmp ("ctrlpkt.mlir" ))
690
- run_passes (
691
- "builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))" ,
692
- ctrlpkt_mlir_str ,
685
+ "-o" ,
686
+ "ctrlpkt.bin" ,
687
+ ],
688
+ )
689
+ ctrlpkt_mlir_str = await read_file_async (self .prepend_tmp ("ctrlpkt.mlir" ))
690
+ run_passes (
691
+ "builtin.module(aie.device(aie-ctrl-packet-to-dma,aie-dma-to-npu))" ,
692
+ ctrlpkt_mlir_str ,
693
+ self .prepend_tmp ("ctrlpkt_dma_seq.mlir" ),
694
+ self .opts .verbose ,
695
+ )
696
+ await self .do_call (
697
+ None ,
698
+ [
699
+ "aie-translate" ,
700
+ "-aie-npu-to-binary" ,
693
701
self .prepend_tmp ("ctrlpkt_dma_seq.mlir" ),
694
- self .opts .verbose ,
695
- )
696
- await self .do_call (
697
- None ,
698
- [
699
- "aie-translate" ,
700
- "-aie-npu-to-binary" ,
701
- "-aie-sequence-name" ,
702
- "configure" ,
703
- self .prepend_tmp ("ctrlpkt_dma_seq.mlir" ),
704
- "-o" ,
705
- "ctrlpkt_dma_seq.bin" ,
706
- ],
707
- )
708
- await self .aiebu_asm (
709
- "ctrlpkt_dma_seq.bin" , "ctrlpkt_dma_seq.elf" , "ctrlpkt.bin"
702
+ "-o" ,
703
+ opts .insts_name ,
704
+ ],
705
+ )
706
+ ctrl_idx = 0
707
+ ctrl_seq_str = await read_file_async (self .prepend_tmp ("ctrlpkt_dma_seq.mlir" ))
708
+ with Context (), Location .unknown ():
709
+ dma_seq_module = Module .parse (ctrl_seq_str )
710
+ # walk through the dma sequence module to find runtime sequence
711
+ seqs = find_ops (
712
+ dma_seq_module .operation ,
713
+ lambda o : isinstance (o .operation .opview , aiexdialect .RuntimeSequenceOp ),
710
714
)
715
+ if seqs :
716
+ ctrl_idx = len (seqs [0 ].regions [0 ].blocks [0 ].arguments .types ) - 1
717
+ await self .aiebu_asm (opts .insts_name , opts .elf_name , "ctrlpkt.bin" , ctrl_idx )
711
718
712
719
async def process_elf (self , module_str ):
713
720
with Context (), Location .unknown ():
@@ -1233,30 +1240,6 @@ async def run_flow(self):
1233
1240
exit (- 3 )
1234
1241
aie_peano_target = aie_target .lower () + "-none-unknown-elf"
1235
1242
1236
- # Optionally generate insts.txt for NPU instruction stream
1237
- if opts .npu :
1238
- with Context (), Location .unknown ():
1239
- file_with_addresses_module = Module .parse (
1240
- await read_file_async (file_with_addresses )
1241
- )
1242
- pass_pipeline = NPU_LOWERING_PIPELINE .materialize (module = True )
1243
- npu_insts_file = (
1244
- self .prepend_tmp ("npu_insts.mlir" )
1245
- if self .opts .verbose
1246
- else None
1247
- )
1248
- npu_insts_module = run_passes_module (
1249
- pass_pipeline ,
1250
- file_with_addresses_module ,
1251
- npu_insts_file ,
1252
- self .opts .verbose ,
1253
- )
1254
- npu_insts = aiedialect .translate_npu_to_binary (
1255
- npu_insts_module .operation
1256
- )
1257
- with open (opts .insts_name , "wb" ) as f :
1258
- f .write (struct .pack ("I" * len (npu_insts ), * npu_insts ))
1259
-
1260
1243
# fmt: off
1261
1244
if opts .unified :
1262
1245
file_opt_with_addresses = self .prepend_tmp ("input_opt_with_addresses.mlir" )
@@ -1349,6 +1332,30 @@ async def run_flow(self):
1349
1332
if (opts .cdo or opts .xcl or opts .pdi ) and opts .execute :
1350
1333
await self .process_cdo (input_physical_str )
1351
1334
1335
+ # Optionally generate insts.txt for NPU instruction stream
1336
+ if opts .npu and not opts .ctrlpkt :
1337
+ with Context (), Location .unknown ():
1338
+ file_with_addresses_module = Module .parse (
1339
+ await read_file_async (file_with_addresses )
1340
+ )
1341
+ pass_pipeline = NPU_LOWERING_PIPELINE .materialize (module = True )
1342
+ npu_insts_file = (
1343
+ self .prepend_tmp ("npu_insts.mlir" )
1344
+ if self .opts .verbose
1345
+ else None
1346
+ )
1347
+ npu_insts_module = run_passes_module (
1348
+ pass_pipeline ,
1349
+ file_with_addresses_module ,
1350
+ npu_insts_file ,
1351
+ self .opts .verbose ,
1352
+ )
1353
+ npu_insts = aiedialect .translate_npu_to_binary (
1354
+ npu_insts_module .operation
1355
+ )
1356
+ with open (opts .insts_name , "wb" ) as f :
1357
+ f .write (struct .pack ("I" * len (npu_insts ), * npu_insts ))
1358
+
1352
1359
processes = []
1353
1360
if opts .xcl :
1354
1361
processes .append (self .process_xclbin_gen ())
@@ -1363,7 +1370,7 @@ async def run_flow(self):
1363
1370
if opts .ctrlpkt and opts .execute :
1364
1371
processes .append (self .process_ctrlpkt (input_physical_str ))
1365
1372
1366
- if opts .elf and opts .execute :
1373
+ if opts .elf and not opts . ctrlpkt and opts .execute :
1367
1374
processes .append (self .process_elf (input_physical_str ))
1368
1375
1369
1376
await asyncio .gather (* processes )
0 commit comments