We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent dc813e5 commit 5edb171Copy full SHA for 5edb171
paddle/phi/kernels/fusion/cutlass/cutlass_extensions/gemm/threadblock/dq_mma_base.h
@@ -169,7 +169,8 @@ class DqMmaBase {
169
AlignedBuffer<typename Operator::ElementB, ShapeB::kCount> operand_B;
170
171
/// Buffer to hold scales for threadblock
172
- AlignedBuffer<ElementScale, Shape::kN> operand_scale;
+ /// Note(zkk): allocate more used in group wise weight-only.
173
+ AlignedBuffer<ElementScale, Shape::kN * kStages> operand_scale;
174
175
public:
176
//
0 commit comments