Skip to content

Commit 03f2a63

Browse files
committed
[AIEX] Reorder PostInc and PreInc Memory Instr
1 parent 64e5db9 commit 03f2a63

File tree

10 files changed

+290
-152
lines changed

10 files changed

+290
-152
lines changed

llvm/lib/Target/AIE/AIEGlobalCombiner.cpp

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,12 @@ AIEGlobalCombiner::findBeneficialCombiners() {
6969
CombineCandidates.filterOut(FixedCombiners);
7070
CombineCandidates.filterOut(FoundCombiners);
7171

72-
for (auto *Combiner :
73-
CombineCandidates.searchCombinerSet(OwnedCombineCandidates))
74-
FoundCombiners.push_back(Combiner);
72+
auto Combiners =
73+
CombineCandidates.searchCombinerSet(OwnedCombineCandidates);
74+
reorderCombinerInsertions(Combiners);
75+
76+
FoundCombiners.insert(FoundCombiners.end(), Combiners.begin(),
77+
Combiners.end());
7578
}
7679

7780
LLVM_DEBUG(dbgs() << "[Global Combiner] Found " << FoundCombiners.size()
@@ -80,6 +83,27 @@ AIEGlobalCombiner::findBeneficialCombiners() {
8083
return FoundCombiners;
8184
}
8285

86+
void AIEGlobalCombiner::reorderCombinerInsertions(
87+
std::vector<GenericCombiner *> &Combiners) const {
88+
for (auto *Combiner : Combiners) {
89+
if (!Combiner->canReorder())
90+
continue;
91+
92+
auto It = std::find_if(Combiners.rbegin(), Combiners.rend(),
93+
[Combiner](GenericCombiner *Candidate) {
94+
return Combiner->isReorderCandidate(Candidate);
95+
});
96+
97+
if (It == Combiners.rend())
98+
continue;
99+
100+
auto *BestCandidate = *It;
101+
Combiner->copyInsertionPoint(BestCandidate);
102+
LLVM_DEBUG(dbgs() << "Reordering \n"; Combiner->dumpFull();
103+
BestCandidate->dumpFull(););
104+
}
105+
}
106+
83107
void AIEGlobalCombiner::calculateCombineCandidates(
84108
SUnit &CombineRoot, const GenericCombiner *Combiner) {
85109
assert(MDT);
@@ -198,7 +222,7 @@ std::vector<CombineCandidates> AIEGlobalCombiner::getCombineCandidates(
198222

199223
// -------------------------- CombineCandidates ------------------------------//
200224

201-
std::vector<const GenericCombiner *> CombineCandidates::searchCombinerSet(
225+
std::vector<GenericCombiner *> CombineCandidates::searchCombinerSet(
202226
const std::vector<std::unique_ptr<GenericCombiner>>
203227
&OwnedCombineCandidates) {
204228
if (Combiners.empty())
@@ -299,7 +323,7 @@ std::vector<const GenericCombiner *> CombineCandidates::searchCombinerSet(
299323
LLVM_DEBUG(dbgs() << "Search Result " << BestSolution.getGain() << "\n");
300324

301325
// Save best Candidate to FixedCombiners
302-
std::vector<const GenericCombiner *> Result;
326+
std::vector<GenericCombiner *> Result;
303327
BitVector CombinerBitVec = BestSolution.getCombinersBitVector();
304328
for (int Idx = CombinerBitVec.find_first(); Idx != -1;
305329
Idx = CombinerBitVec.find_next(Idx)) {
@@ -517,6 +541,16 @@ void GenericCombiner::setGlobalID(unsigned GlobalID) {
517541
this->GlobalID = GlobalID;
518542
}
519543

544+
bool GenericCombiner::isReorderCandidate(
545+
const GenericCombiner *Candidate) const {
546+
return false;
547+
}
548+
549+
void GenericCombiner::copyInsertionPoint(const GenericCombiner *Candidate) {
550+
CombinerData.InsertionPoint = Candidate->CombinerData.InsertionPoint;
551+
InsertionPointNodeNum = Candidate->InsertionPointNodeNum;
552+
}
553+
520554
/// \return whether a Combiner is used after a Remove-Combiner, that
521555
/// are part of the same Cluster. The Ordering of the Combiners \p A and \p B is
522556
/// irrelevant.

llvm/lib/Target/AIE/AIEGlobalCombiner.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,16 @@ class GenericCombiner {
256256

257257
/// Set unique Identifier for this Combiner to \p GlobalID
258258
void setGlobalID(unsigned GlobalID);
259+
260+
/// \return whether this Combiner could be moved before another Combiner
261+
virtual bool canReorder() const = 0;
262+
263+
/// \return whether \p Candidate is a ReorderCandidate, i.e. if this combiner
264+
/// can be inserted right before \p Candidate
265+
virtual bool isReorderCandidate(const GenericCombiner *Candidate) const;
266+
267+
/// Set InsertionPoint of this combiner to the same as \p Candidate
268+
void copyInsertionPoint(const GenericCombiner *Candidate);
259269
};
260270

261271
raw_ostream &operator<<(raw_ostream &OS, const GenericCombiner &Val);
@@ -338,7 +348,7 @@ class CombineCandidates {
338348

339349
/// \return Combiners from \p OwnedCombineCandidates that maximize the gain
340350
/// when applied
341-
std::vector<const GenericCombiner *>
351+
std::vector<GenericCombiner *>
342352
searchCombinerSet(const std::vector<std::unique_ptr<GenericCombiner>>
343353
&OwnedCombineCandidates);
344354

@@ -396,6 +406,9 @@ class AIEGlobalCombiner {
396406

397407
void calculateCombinerConflicts();
398408

409+
void
410+
reorderCombinerInsertions(std::vector<GenericCombiner *> &Combiners) const;
411+
399412
/// \return CombineCandidates sorted by highest potential gain
400413
std::vector<CombineCandidates> getCombineCandidates(
401414
std::map<Register, CombineCandidates> ClusteredCombiners);

llvm/lib/Target/AIE/AIEGlobalCombinerPtrMods.cpp

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,25 @@ std::unique_ptr<GenericCombiner> OffsetCombiner::clone() const {
315315

316316
std::optional<std::pair<std::vector<SUnit *>, std::vector<SUnit *>>>
317317
OffsetCombiner::getInstructionsToMove(const AIE::DataDependenceHelper &DAG) {
318-
return {{/*MoveUp*/ {}, /*MoveDown*/ {}}};
318+
auto *PtrAdd = getPtrInc();
319+
if (!getImm(*PtrAdd, *MRI)) {
320+
/// Offset is not an immediate and the OffsetCombiner is not eligible for
321+
/// reordering.
322+
// Since the Offset already dominates the MemoryInstruction (where the
323+
// insertion happens), no checks have to be performed.
324+
return {{/*MoveUp=*/{}, /*MoveDown=*/{}}};
325+
}
326+
327+
auto *SUnitPtrAdd = DAG.getSUnit(PtrAdd);
328+
if (!SUnitPtrAdd) {
329+
/// PtrAdd is an Immediate but it is outside of the MBB, so it already
330+
/// dominates the MemoryInstruction. No checks have to be performed.
331+
return {{/*MoveUp=*/{}, /*MoveDown=*/{}}};
332+
}
333+
334+
/// Immediate Offset can be a reordering Candidate. Therefore, track Immediate
335+
/// Offset, so it can be moved in case of a reordering.
336+
return {{/*MoveUp=*/{SUnitPtrAdd}, /*MoveDown=*/{}}};
319337
}
320338

321339
void OffsetCombiner::adjustGain(const MachineDominatorTree &MDT) {
@@ -335,7 +353,7 @@ void OffsetCombiner::adjustGain(const MachineDominatorTree &MDT) {
335353
Gain.setPtrMod(0);
336354
}
337355

338-
std::optional<APInt> ImmOffset = getImm(*PtrAdd, *MRI);
356+
ImmOffset = getImm(*PtrAdd, *MRI);
339357
if (!ImmOffset)
340358
return;
341359

@@ -363,6 +381,58 @@ std::optional<unsigned> OffsetCombiner::getOpCode(MachineInstr *PtrInc,
363381
return TII->getOffsetMemOpcode(MemI->getOpcode());
364382
}
365383

384+
bool OffsetCombiner::canReorder() const { return ImmOffset.has_value(); }
385+
386+
bool OffsetCombiner::isReorderCandidate(
387+
const GenericCombiner *PostIncCombiner) const {
388+
auto GetInputPtr = [&](const MachineInstr *PtrMod) {
389+
auto InputPtrIdx = PtrModSupport.getInputPtrIdx(*PtrMod);
390+
assert(InputPtrIdx);
391+
return PtrMod->getOperand(*InputPtrIdx);
392+
};
393+
394+
const PointerModifierCombiner *PtrModCombiner =
395+
static_cast<const PointerModifierCombiner *>(PostIncCombiner);
396+
if (!PtrModCombiner->isPostInc())
397+
return false;
398+
399+
// only allow loads to be reordered
400+
if (getMemI()->mayStore() || PtrModCombiner->getMemI()->mayStore())
401+
return false;
402+
403+
// Same MBB check
404+
auto *PtrAdd = getPtrInc();
405+
auto *PostIncPtrMod = PtrModCombiner->getPtrInc();
406+
if (PtrAdd->getParent() != PostIncPtrMod->getParent())
407+
return false;
408+
409+
// Same Input Ptr Check
410+
auto InputPtr = GetInputPtr(PtrAdd);
411+
auto PostIncInputPtr = GetInputPtr(PostIncPtrMod);
412+
if (!InputPtr.isIdenticalTo(PostIncInputPtr))
413+
return false;
414+
415+
// Check if Store Instruction of Offset dominates PostInc
416+
auto *MemI = getMemI();
417+
if (MemI->mayStore()) {
418+
auto Source = MemI->getOperand(0);
419+
assert(Source.isReg());
420+
auto *DefSource = MRI->getUniqueVRegDef(Source.getReg());
421+
if (!DefSource)
422+
return false;
423+
auto *DefSUnit = DAG->getSUnit(DefSource);
424+
if (DefSUnit &&
425+
DefSUnit->NodeNum > PostIncCombiner->InsertionPointNodeNum) {
426+
// Source of Offset-Store would be after the new InsertionPoint and thus
427+
// generate invalid mir
428+
return false;
429+
}
430+
}
431+
432+
// OffsetCombiner occurs after PostIncCombiner
433+
return InsertionPointNodeNum > PostIncCombiner->InsertionPointNodeNum;
434+
}
435+
366436
// -------------------------- PostIncCombiner --------------------------------//
367437

368438
bool PostIncCombiner::isCombineCandidate(MachineInstr &MemI,

llvm/lib/Target/AIE/AIEGlobalCombinerPtrMods.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,13 @@ class PointerModifierCombiner : public GenericCombiner {
118118

119119
/// \return whether Opcode can be set
120120
bool tryToSetCombinedOpCode() override;
121+
122+
virtual bool isPostInc() const = 0;
121123
};
122124

123125
class OffsetCombiner : public PointerModifierCombiner {
126+
std::optional<APInt> ImmOffset;
127+
124128
protected:
125129
std::optional<unsigned> getOpCode(MachineInstr *PtrInc,
126130
MachineInstr *MemI) const override;
@@ -141,6 +145,12 @@ class OffsetCombiner : public PointerModifierCombiner {
141145

142146
std::optional<std::pair<std::vector<SUnit *>, std::vector<SUnit *>>>
143147
getInstructionsToMove(const AIE::DataDependenceHelper &DAG) override;
148+
149+
bool isReorderCandidate(const GenericCombiner *Candidate) const override;
150+
151+
bool canReorder() const override;
152+
153+
bool isPostInc() const override { return false; }
144154
};
145155

146156
class PostIncCombiner : public PointerModifierCombiner {
@@ -176,6 +186,14 @@ class PostIncCombiner : public PointerModifierCombiner {
176186
void adjustGain(const MachineDominatorTree &MDT) override;
177187

178188
std::vector<MachineInstr *> getPtrInstrs(MachineInstr *MI) const override;
189+
190+
bool isReorderCandidate(const GenericCombiner *Candidate) const override {
191+
return false;
192+
}
193+
194+
bool canReorder() const override { return false; }
195+
196+
bool isPostInc() const override { return true; }
179197
};
180198

181199
} // namespace llvm::AIE

llvm/lib/Target/AIE/AIEPtrModOptimizer.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ void FoundCombiners::remapCombiner(AIE::Combiner &Combiner) const {
167167
getRemappedInstrs(Combiner.DelayInstrToInsertionPoint);
168168
Combiner.DelayInstrPastInsertionPoint =
169169
getRemappedInstrs(Combiner.DelayInstrPastInsertionPoint);
170+
171+
std::vector<MachineInstr *> InsertionPointVec = {Combiner.InsertionPoint};
172+
Combiner.InsertionPoint = getRemappedInstrs(InsertionPointVec)[0];
170173
}
171174

172175
const std::map<MachineInstr *, AIE::Combiner> &

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/gemm.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ body: |
4040
; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, %9(p0), %bb.1
4141
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI]], [[C2]]
4242
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[ADD]](s32), [[C]]
43-
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<32 x s16>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[PHI1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]] :: (load (<32 x s16>))
4443
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 64
4544
; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<32 x s16>) = G_AIE_OFFSET_LOAD [[PHI1]](p0), [[C4]](s20) :: (load (<32 x s16>))
45+
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<32 x s16>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[PHI1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]], [[C1]] :: (load (<32 x s16>))
4646
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[PHI1]], %configZero(s20)
4747
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 0
4848
; CHECK-NEXT: [[AIE_OFFSET_LOAD1:%[0-9]+]]:_(<32 x s16>) = G_AIE_OFFSET_LOAD [[PTR_ADD]](p0), [[C5]](s20) :: (load (<32 x s16>))

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/post-inc-eagerness.mir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# increasing register pressure.
1818
# FIXME: reorder instructions
1919

20+
# Do not reorder Store Instructions.
2021
---
2122
name: post-inc-reg-pressure-store
2223
legalized: true
@@ -46,6 +47,7 @@ body: |
4647
PseudoRET implicit $lr, implicit %3
4748
...
4849

50+
# Reorder Load Instructions.
4951
# similar example as above, but with load instructions
5052
---
5153
name: post-inc-reg-pressure-load
@@ -60,9 +62,9 @@ body: |
6062
; CHECK-NEXT: {{ $}}
6163
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
6264
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64
63-
; CHECK-NEXT: %lZero:_(s32), %7:_(p0), %8:_(s20), %9:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
6465
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
6566
; CHECK-NEXT: %lOne:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C1]](s20) :: (load (s32))
67+
; CHECK-NEXT: %lZero:_(s32), %7:_(p0), %8:_(s20), %9:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
6668
; CHECK-NEXT: PseudoRET implicit $lr, implicit %7(p0), implicit %lZero(s32), implicit %lOne(s32)
6769
%0:_(p0) = COPY $p0
6870
%1:_(s20) = G_CONSTANT i20 64

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/reorder-Mem-Instrs.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ body: |
2828
; CHECK-NEXT: {{ $}}
2929
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
3030
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64
31-
; CHECK-NEXT: %sZero:_(s32), %6:_(p0), %7:_(s20), %8:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
3231
; CHECK-NEXT: %sOne:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32))
32+
; CHECK-NEXT: %sZero:_(s32), %6:_(p0), %7:_(s20), %8:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
3333
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD %sZero, %sOne
3434
; CHECK-NEXT: G_STORE [[ADD]](s32), %6(p0) :: (store (s32))
3535
; CHECK-NEXT: PseudoRET implicit $lr

llvm/test/CodeGen/AIE/aie2p/GlobalIsel/global-combiners/user-intrinsics.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ body: |
2929
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
3030
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1
3131
; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 8
32-
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
3332
; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (s32))
34-
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD4:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_LOAD5:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD6:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD7:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY1]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
33+
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
3534
; CHECK-NEXT: [[AIE_OFFSET_LOAD1:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY1]](p0), [[C]](s20) :: (load (s32))
35+
; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD4:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_LOAD5:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD6:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD7:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY1]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (s32))
3636
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_POSTINC_3D_LOAD]](s32), implicit [[AIE_OFFSET_LOAD]](s32), implicit [[AIE_POSTINC_3D_LOAD4]](s32), implicit [[AIE_OFFSET_LOAD1]](s32), implicit [[AIE_POSTINC_3D_LOAD1]](p0), implicit [[AIE_POSTINC_3D_LOAD5]](p0)
3737
%0:_(p0) = COPY $p0
3838
%1:_(p0) = COPY $p1

0 commit comments

Comments
 (0)