@@ -3146,19 +3146,20 @@ static bool isAlreadyNarrow(VPValue *VPV) {
3146
3146
return RepR && RepR->isSingleScalar ();
3147
3147
}
3148
3148
3149
- void VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, ElementCount VF,
3150
- unsigned VectorRegWidth) {
3149
+ std::unique_ptr<VPlan>
3150
+ VPlanTransforms::narrowInterleaveGroups (VPlan &Plan, unsigned VectorRegWidth,
3151
+ VFRange &Range) {
3151
3152
using namespace llvm ::VPlanPatternMatch;
3152
3153
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion ();
3153
- if (VF. isScalable () || !VectorLoop)
3154
- return ;
3154
+ if (Plan. hasScalableVF () || !VectorLoop)
3155
+ return nullptr ;
3155
3156
3156
3157
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV ();
3157
3158
Type *CanonicalIVType = CanonicalIV->getScalarType ();
3158
3159
VPTypeAnalysis TypeInfo (CanonicalIVType);
3159
3160
3160
- unsigned FixedVF = VF.getFixedValue ();
3161
3161
SmallVector<VPInterleaveRecipe *> StoreGroups;
3162
+ std::optional<unsigned > VFToOptimize;
3162
3163
for (auto &R : *VectorLoop->getEntryBasicBlock ()) {
3163
3164
if (isa<VPCanonicalIVPHIRecipe>(&R) ||
3164
3165
match (&R, m_BranchOnCount (m_VPValue (), m_VPValue ())))
@@ -3173,30 +3174,47 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
3173
3174
// * recipes writing to memory except interleave groups
3174
3175
// Only support plans with a canonical induction phi.
3175
3176
if (R.isPhi ())
3176
- return ;
3177
+ return nullptr ;
3177
3178
3178
3179
auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
3179
3180
if (R.mayWriteToMemory () && !InterleaveR)
3180
- return ;
3181
+ return nullptr ;
3181
3182
3182
3183
// Do not narrow interleave groups if there are VectorPointer recipes and
3183
3184
// the plan was unrolled. The recipe implicitly uses VF from
3184
3185
// VPTransformState.
3185
3186
// TODO: Remove restriction once the VF for the VectorPointer offset is
3186
3187
// modeled explicitly as operand.
3187
3188
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF () > 1 )
3188
- return ;
3189
+ return nullptr ;
3189
3190
3190
3191
// All other ops are allowed, but we reject uses that cannot be converted
3191
3192
// when checking all allowed consumers (store interleave groups) below.
3192
3193
if (!InterleaveR)
3193
3194
continue ;
3194
3195
3195
- // Bail out on non-consecutive interleave groups.
3196
- if (!isConsecutiveInterleaveGroup (InterleaveR, FixedVF, TypeInfo,
3197
- VectorRegWidth))
3198
- return ;
3199
-
3196
+ // Try to find a single VF, where all interleave groups are consecutive and
3197
+ // saturate the full vector width. If we already have a candidate VF, check
3198
+ // if it is applicable for the current InterleaveR, otherwise look for a
3199
+ // suitable VF across the Plans VFs.
3200
+ //
3201
+ if (VFToOptimize) {
3202
+ if (!isConsecutiveInterleaveGroup (InterleaveR, *VFToOptimize, TypeInfo,
3203
+ VectorRegWidth))
3204
+ return nullptr ;
3205
+ } else {
3206
+ for (ElementCount VF : Plan.vectorFactors ()) {
3207
+ if (!VF.isFixed ())
3208
+ continue ;
3209
+ if (isConsecutiveInterleaveGroup (InterleaveR, VF.getFixedValue (),
3210
+ TypeInfo, VectorRegWidth)) {
3211
+ VFToOptimize = VF.getFixedValue ();
3212
+ break ;
3213
+ }
3214
+ }
3215
+ if (!VFToOptimize)
3216
+ return nullptr ;
3217
+ }
3200
3218
// Skip read interleave groups.
3201
3219
if (InterleaveR->getStoredValues ().empty ())
3202
3220
continue ;
@@ -3232,24 +3250,44 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
3232
3250
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
3233
3251
InterleaveR->getStoredValues ()[0 ]->getDefiningRecipe ());
3234
3252
if (!WideMember0)
3235
- return ;
3253
+ return nullptr ;
3236
3254
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues ())) {
3237
3255
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe ());
3238
3256
if (!R || R->getOpcode () != WideMember0->getOpcode () ||
3239
3257
R->getNumOperands () > 2 )
3240
- return ;
3258
+ return nullptr ;
3241
3259
if (any_of (enumerate(R->operands ()),
3242
3260
[WideMember0, Idx = I](const auto &P) {
3243
3261
const auto &[OpIdx, OpV] = P;
3244
3262
return !canNarrowLoad (WideMember0, OpIdx, OpV, Idx);
3245
3263
}))
3246
- return ;
3264
+ return nullptr ;
3247
3265
}
3248
3266
StoreGroups.push_back (InterleaveR);
3249
3267
}
3250
3268
3251
3269
if (StoreGroups.empty ())
3252
- return ;
3270
+ return nullptr ;
3271
+
3272
+ // All interleave groups in Plan can be narrowed for VFToOptimize. Split the
3273
+ // original Plan into 2: a) a new clone which contains all VFs of Plan, except
3274
+ // VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
3275
+ std::unique_ptr<VPlan> NewPlan;
3276
+ if (size (Plan.vectorFactors ()) != 1 ) {
3277
+ NewPlan = std::unique_ptr<VPlan>(Plan.duplicate ());
3278
+ Plan.setVF (ElementCount::getFixed (*VFToOptimize));
3279
+ bool First = true ;
3280
+ for (ElementCount VF : NewPlan->vectorFactors ()) {
3281
+ if (VF.isFixed () && VF.getFixedValue () == *VFToOptimize)
3282
+ continue ;
3283
+ if (First) {
3284
+ NewPlan->setVF (VF);
3285
+ First = false ;
3286
+ continue ;
3287
+ }
3288
+ NewPlan->addVF (VF);
3289
+ }
3290
+ }
3253
3291
3254
3292
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
3255
3293
auto NarrowOp = [](VPValue *V) -> VPValue * {
@@ -3314,11 +3352,11 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
3314
3352
// original iteration.
3315
3353
auto *CanIV = Plan.getCanonicalIV ();
3316
3354
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue ());
3317
- Inc->setOperand (1 , Plan.getOrAddLiveIn (ConstantInt::get (
3318
- CanIV->getScalarType (), 1 * Plan.getUF ())));
3355
+ Inc->setOperand (1 , &Plan.getSymbolicUF ());
3319
3356
Plan.getVF ().replaceAllUsesWith (
3320
3357
Plan.getOrAddLiveIn (ConstantInt::get (CanIV->getScalarType (), 1 )));
3321
3358
removeDeadRecipes (Plan);
3359
+ return NewPlan;
3322
3360
}
3323
3361
3324
3362
// / Add branch weight metadata, if the \p Plan's middle block is terminated by a
0 commit comments