Skip to content

Commit ff7c816

Browse files
committed
[VPlan] Run narrowInterleaveGroups during general VPlan optimizations.
Move narrowInterleaveGroups to to general VPlan optimization stage. To do so, narrowInterleaveGroups now has to find a suitable VF where all interleave groups are consecutive and saturate the full vector width. If such a VF is found, the original VPlan is split into 2: a) a new clone which contains all VFs of Plan, except VFToOptimize, and b) the original Plan with VFToOptimize as single VF. The original Plan is then optimized. If a new copy for the other VFs has been created, it is returned and the caller has to add it to the list of candidate plans. Together with llvm#149702, this allows to take the narrowed interleave groups into account when interleaving.
1 parent e138c95 commit ff7c816

File tree

6 files changed

+184
-71
lines changed

6 files changed

+184
-71
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7253,9 +7253,6 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72537253
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
72547254
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
72557255
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
7256-
VPlanTransforms::narrowInterleaveGroups(
7257-
BestVPlan, BestVF,
7258-
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
72597256
VPlanTransforms::removeDeadRecipes(BestVPlan);
72607257

72617258
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
@@ -8364,6 +8361,14 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
83648361
!VPlanTransforms::runPass(VPlanTransforms::tryAddExplicitVectorLength,
83658362
*Plan, CM.getMaxSafeElements()))
83668363
break;
8364+
8365+
if (auto P = VPlanTransforms::narrowInterleaveGroups(
8366+
*Plan,
8367+
TTI.getRegisterBitWidth(
8368+
TargetTransformInfo::RGK_FixedWidthVector),
8369+
SubRange))
8370+
VPlans.push_back(std::move(P));
8371+
83678372
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
83688373
VPlans.push_back(std::move(Plan));
83698374
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -976,6 +976,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
976976
} else {
977977
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
978978
}
979+
980+
this->UF.setUnderlyingValue(ConstantInt::get(TCTy, UF));
979981
}
980982

981983
VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
@@ -1252,6 +1254,7 @@ VPlan *VPlan::duplicate() {
12521254
}
12531255
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
12541256
Old2NewVPValues[&VF] = &NewPlan->VF;
1257+
Old2NewVPValues[&UF] = &NewPlan->UF;
12551258
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
12561259
if (BackedgeTakenCount) {
12571260
NewPlan->BackedgeTakenCount = new VPValue();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3895,6 +3895,9 @@ class VPlan {
38953895
/// Represents the vectorization factor of the loop.
38963896
VPValue VF;
38973897

3898+
/// Represents the symbolic unroll factor of the loop.
3899+
VPValue UF;
3900+
38983901
/// Represents the loop-invariant VF * UF of the vector loop region.
38993902
VPValue VFxUF;
39003903

@@ -4050,6 +4053,9 @@ class VPlan {
40504053
/// Returns the VF of the vector loop region.
40514054
VPValue &getVF() { return VF; };
40524055

4056+
/// Returns the symbolic UF of the vector loop region.
4057+
VPValue &getSymbolicUF() { return UF; };
4058+
40534059
/// Returns VF * UF of the vector loop region.
40544060
VPValue &getVFxUF() { return VFxUF; }
40554061

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3146,19 +3146,20 @@ static bool isAlreadyNarrow(VPValue *VPV) {
31463146
return RepR && RepR->isSingleScalar();
31473147
}
31483148

3149-
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
3150-
unsigned VectorRegWidth) {
3149+
std::unique_ptr<VPlan>
3150+
VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, unsigned VectorRegWidth,
3151+
VFRange &Range) {
31513152
using namespace llvm::VPlanPatternMatch;
31523153
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
3153-
if (VF.isScalable() || !VectorLoop)
3154-
return;
3154+
if (Plan.hasScalableVF() || !VectorLoop)
3155+
return nullptr;
31553156

31563157
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
31573158
Type *CanonicalIVType = CanonicalIV->getScalarType();
31583159
VPTypeAnalysis TypeInfo(CanonicalIVType);
31593160

3160-
unsigned FixedVF = VF.getFixedValue();
31613161
SmallVector<VPInterleaveRecipe *> StoreGroups;
3162+
std::optional<unsigned> VFToOptimize;
31623163
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
31633164
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
31643165
match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
@@ -3173,30 +3174,47 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
31733174
// * recipes writing to memory except interleave groups
31743175
// Only support plans with a canonical induction phi.
31753176
if (R.isPhi())
3176-
return;
3177+
return nullptr;
31773178

31783179
auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
31793180
if (R.mayWriteToMemory() && !InterleaveR)
3180-
return;
3181+
return nullptr;
31813182

31823183
// Do not narrow interleave groups if there are VectorPointer recipes and
31833184
// the plan was unrolled. The recipe implicitly uses VF from
31843185
// VPTransformState.
31853186
// TODO: Remove restriction once the VF for the VectorPointer offset is
31863187
// modeled explicitly as operand.
31873188
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
3188-
return;
3189+
return nullptr;
31893190

31903191
// All other ops are allowed, but we reject uses that cannot be converted
31913192
// when checking all allowed consumers (store interleave groups) below.
31923193
if (!InterleaveR)
31933194
continue;
31943195

3195-
// Bail out on non-consecutive interleave groups.
3196-
if (!isConsecutiveInterleaveGroup(InterleaveR, FixedVF, TypeInfo,
3197-
VectorRegWidth))
3198-
return;
3199-
3196+
// Try to find a single VF, where all interleave groups are consecutive and
3197+
// saturate the full vector width. If we already have a candidate VF, check
3198+
// if it is applicable for the current InterleaveR, otherwise look for a
3199+
// suitable VF across the Plans VFs.
3200+
//
3201+
if (VFToOptimize) {
3202+
if (!isConsecutiveInterleaveGroup(InterleaveR, *VFToOptimize, TypeInfo,
3203+
VectorRegWidth))
3204+
return nullptr;
3205+
} else {
3206+
for (ElementCount VF : Plan.vectorFactors()) {
3207+
if (!VF.isFixed())
3208+
continue;
3209+
if (isConsecutiveInterleaveGroup(InterleaveR, VF.getFixedValue(),
3210+
TypeInfo, VectorRegWidth)) {
3211+
VFToOptimize = VF.getFixedValue();
3212+
break;
3213+
}
3214+
}
3215+
if (!VFToOptimize)
3216+
return nullptr;
3217+
}
32003218
// Skip read interleave groups.
32013219
if (InterleaveR->getStoredValues().empty())
32023220
continue;
@@ -3232,24 +3250,44 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
32323250
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
32333251
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
32343252
if (!WideMember0)
3235-
return;
3253+
return nullptr;
32363254
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
32373255
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
32383256
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
32393257
R->getNumOperands() > 2)
3240-
return;
3258+
return nullptr;
32413259
if (any_of(enumerate(R->operands()),
32423260
[WideMember0, Idx = I](const auto &P) {
32433261
const auto &[OpIdx, OpV] = P;
32443262
return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
32453263
}))
3246-
return;
3264+
return nullptr;
32473265
}
32483266
StoreGroups.push_back(InterleaveR);
32493267
}
32503268

32513269
if (StoreGroups.empty())
3252-
return;
3270+
return nullptr;
3271+
3272+
// All interleave groups in Plan can be narrowed for VFToOptimize. Split the
3273+
// original Plan into 2: a) a new clone which contains all VFs of Plan, except
3274+
// VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
3275+
std::unique_ptr<VPlan> NewPlan;
3276+
if (size(Plan.vectorFactors()) != 1) {
3277+
NewPlan = std::unique_ptr<VPlan>(Plan.duplicate());
3278+
Plan.setVF(ElementCount::getFixed(*VFToOptimize));
3279+
bool First = true;
3280+
for (ElementCount VF : NewPlan->vectorFactors()) {
3281+
if (VF.isFixed() && VF.getFixedValue() == *VFToOptimize)
3282+
continue;
3283+
if (First) {
3284+
NewPlan->setVF(VF);
3285+
First = false;
3286+
continue;
3287+
}
3288+
NewPlan->addVF(VF);
3289+
}
3290+
}
32533291

32543292
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
32553293
auto NarrowOp = [](VPValue *V) -> VPValue * {
@@ -3314,11 +3352,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
33143352
// original iteration.
33153353
auto *CanIV = Plan.getCanonicalIV();
33163354
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
3317-
Inc->setOperand(1, Plan.getOrAddLiveIn(ConstantInt::get(
3318-
CanIV->getScalarType(), 1 * Plan.getUF())));
3355+
Inc->setOperand(1, &Plan.getSymbolicUF());
33193356
Plan.getVF().replaceAllUsesWith(
33203357
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
33213358
removeDeadRecipes(Plan);
3359+
return NewPlan;
33223360
}
33233361

33243362
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -234,14 +234,19 @@ struct VPlanTransforms {
234234
/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
235235
static void materializeBroadcasts(VPlan &Plan);
236236

237-
/// Try to convert a plan with interleave groups with VF elements to a plan
238-
/// with the interleave groups replaced by wide loads and stores processing VF
239-
/// elements, if all transformed interleave groups access the full vector
240-
/// width (checked via \o VectorRegWidth). This effectively is a very simple
241-
/// form of loop-aware SLP, where we use interleave groups to identify
242-
/// candidates.
243-
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
244-
unsigned VectorRegWidth);
237+
/// Try to find a single VF among \p Plan's VFs for which all interleave
238+
/// groups (with VF elements) can be replaced by wide loads ans tores
239+
/// processing VF elements, if all transformed interleave groups access the
240+
/// full vector width (checked via \o VectorRegWidth). If the transformation
241+
/// can be applied, the original \p Plan will be split in 2, if is has
242+
/// multiple VFs: a) a new clone which contains all VFs of Plan, except
243+
/// VFToOptimize, and b) the original Plan with VFToOptimize as single VF. In
244+
/// that case, the new clone is returned.
245+
///
246+
/// This effectively is a very simple form of loop-aware SLP, where we use
247+
/// interleave groups to identify candidates.
248+
static std::unique_ptr<VPlan>
249+
narrowInterleaveGroups(VPlan &Plan, unsigned VectorRegWidth, VFRange &Range);
245250

246251
/// Predicate and linearize the control-flow in the only loop region of
247252
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop

0 commit comments

Comments
 (0)