Skip to content

Commit dd30f9d

Browse files
APozdniakovkardymondsyumkamGrigoriyPA
authored
old stable to new stable (#26123)
Co-authored-by: Dmitry Kardymon <kardymon-d@ydb.tech> Co-authored-by: yumkam <yumkam7@ydb.tech> Co-authored-by: Pisarenko Grigoriy <grigoriypisar@ydb.tech>
1 parent 98eed05 commit dd30f9d

File tree

16 files changed

+256
-181
lines changed

16 files changed

+256
-181
lines changed

ydb/core/fq/libs/row_dispatcher/format_handler/filters/filters_set.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class TTopicFilters : public ITopicFilters {
3030
, Counters_(std::move(counters))
3131
{}
3232

33-
void ProcessData(const TVector<ui64>& columnIndex, const TVector<ui64>& offsets, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 numberRows) override {
33+
void ProcessData(const TVector<ui64>& columnIndex, const TVector<ui64>& offsets, const TVector<std::span<NYql::NUdf::TUnboxedValue>>& values, ui64 numberRows) override {
3434
LOG_ROW_DISPATCHER_TRACE("ProcessData for " << RunHandlers_.size() << " clients, number rows: " << numberRows);
3535

3636
if (!numberRows) {
@@ -220,18 +220,18 @@ class TTopicFilters : public ITopicFilters {
220220
RunHandlers_.erase(iter);
221221
}
222222

223-
void PushToRunner(IProgramRunHandler::TPtr programRunHandler, const TVector<ui64>& /* offsets */, const TVector<ui64>& columnIndex, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 numberRows) {
223+
void PushToRunner(IProgramRunHandler::TPtr programRunHandler, const TVector<ui64>& /* offsets */, const TVector<ui64>& columnIndex, const TVector<std::span<NYql::NUdf::TUnboxedValue>>& values, ui64 numberRows) {
224224
const auto consumer = programRunHandler->GetConsumer();
225225
const auto& columnIds = consumer->GetColumnIds();
226226

227-
TVector<const TVector<NYql::NUdf::TUnboxedValue>*> result;
227+
TVector<std::span<NYql::NUdf::TUnboxedValue>> result;
228228
result.reserve(columnIds.size());
229229
for (ui64 columnId : columnIds) {
230230
Y_ENSURE(columnId < columnIndex.size(), "Unexpected column id " << columnId << ", it is larger than index array size " << columnIndex.size());
231231
const ui64 index = columnIndex[columnId];
232232

233233
Y_ENSURE(index < values.size(), "Unexpected column index " << index << ", it is larger than values array size " << values.size());
234-
if (const auto value = values[index]) {
234+
if (const auto value = values[index]; !value.empty()) {
235235
result.emplace_back(value);
236236
} else {
237237
LOG_ROW_DISPATCHER_TRACE("Ignore processing for " << consumer->GetClientId() << ", client got parsing error for column " << columnId);

ydb/core/fq/libs/row_dispatcher/format_handler/filters/filters_set.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class ITopicFilters : public TThrRefBase, public TNonCopyable {
1515

1616
public:
1717
// columnIndex - mapping from stable column id to index in values array
18-
virtual void ProcessData(const TVector<ui64>& columnIndex, const TVector<ui64>& offsets, const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 numberRows) = 0;
18+
virtual void ProcessData(const TVector<ui64>& columnIndex, const TVector<ui64>& offsets, const TVector<std::span<NYql::NUdf::TUnboxedValue>>& values, ui64 numberRows) = 0;
1919
virtual void OnCompileResponse(TEvRowDispatcher::TEvPurecalcCompileResponse::TPtr& ev) = 0;
2020

2121
virtual TStatus AddPrograms(IProcessedDataConsumer::TPtr consumer, std::unordered_map<TString, IProgramHolder::TPtr> programHolders) = 0;

ydb/core/fq/libs/row_dispatcher/format_handler/filters/purecalc_filter.cpp

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ NYT::TNode CreateTypeNode(NYT::TNode&& typeNode) {
2121
return CreateNamedNode("DataType", std::move(typeNode));
2222
}
2323

24+
NYT::TNode CreateOptionalTypeNode(NYT::TNode&& typeNode) {
25+
return CreateNamedNode("OptionalType", std::move(typeNode));
26+
}
27+
2428
NYT::TNode CreateStructTypeNode(NYT::TNode&& membersNode) {
2529
return CreateNamedNode("StructType", std::move(membersNode));
2630
}
@@ -70,12 +74,12 @@ NYT::TNode MakeWatermarkOutputSchema() {
7074
return CreateStructTypeNode(
7175
NYT::TNode::CreateList()
7276
.Add(CreateFieldNode(OFFSET_FIELD_NAME, CreateTypeNode("Uint64")))
73-
.Add(CreateFieldNode(WATERMARK_FIELD_NAME, CreateTypeNode("Timestamp")))
77+
.Add(CreateFieldNode(WATERMARK_FIELD_NAME, CreateOptionalTypeNode(CreateTypeNode("Timestamp"))))
7478
);
7579
}
7680

7781
struct TInputType {
78-
const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& Values;
82+
const TVector<std::span<NYql::NUdf::TUnboxedValue>>& Values;
7983
ui64 NumberRows;
8084
};
8185

@@ -152,8 +156,9 @@ class TInputConsumer : public NYql::NPureCalc::IConsumer<TInputType> {
152156

153157
items[OffsetPosition] = NYql::NUdf::TUnboxedValuePod(rowId);
154158

155-
for (ui64 fieldId = 0; const auto column : input.Values) {
156-
items[FieldsPositions[fieldId++]] = column->at(rowId);
159+
for (ui64 fieldId = 0; const auto& column : input.Values) {
160+
Y_DEBUG_ABORT_UNLESS(column.size() > rowId);
161+
items[FieldsPositions[fieldId++]] = column[rowId];
157162
}
158163

159164
Worker->Push(std::move(result));
@@ -415,7 +420,7 @@ class TProgramRunHandler final : public IProgramRunHandler, public TNonCopyable
415420
ActiveFilters_->Dec();
416421
}
417422

418-
void ProcessData(const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 numberRows) const override {
423+
void ProcessData(const TVector<std::span<NYql::NUdf::TUnboxedValue>>& values, ui64 numberRows) const override {
419424
LOG_ROW_DISPATCHER_TRACE("ProcessData for " << numberRows << " rows");
420425

421426
if (!ProgramHolder_) {
@@ -456,21 +461,10 @@ class TProgramRunHandler final : public IProgramRunHandler, public TNonCopyable
456461

457462
TStringBuilder sb;
458463
sb << R"(PRAGMA config.flags("LLVM", ")" << (settings.EnabledLLVM ? "ON" : "OFF") << R"(");)" << '\n';
459-
sb << "$input ="
460-
<< " SELECT "
461-
<< OFFSET_FIELD_NAME << ", "
462-
<< watermarkExpr << " AS " << WATERMARK_FIELD_NAME
463-
<< " FROM Input;\n";
464-
sb << "$output ="
465-
<< " SELECT "
466-
<< OFFSET_FIELD_NAME << ", "
467-
<< WATERMARK_FIELD_NAME
468-
<< " FROM $input"
469-
<< " WHERE " << WATERMARK_FIELD_NAME << " IS NOT NULL;\n";
470464
sb << "SELECT "
471465
<< OFFSET_FIELD_NAME << ", "
472-
<< "Unwrap(" << WATERMARK_FIELD_NAME << ") AS " << WATERMARK_FIELD_NAME
473-
<< " FROM $output;\n";
466+
<< watermarkExpr << " AS " << WATERMARK_FIELD_NAME
467+
<< " FROM Input;\n";
474468

475469
TString result = sb;
476470
LOG_ROW_DISPATCHER_DEBUG("Generated sql:\n" << result);

ydb/core/fq/libs/row_dispatcher/format_handler/filters/purecalc_filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class IProgramRunHandler : public TThrRefBase {
7373
return ProgramHolder_;
7474
}
7575

76-
virtual void ProcessData(const TVector<const TVector<NYql::NUdf::TUnboxedValue>*>& values, ui64 numberRows) const = 0;
76+
virtual void ProcessData(const TVector<std::span<NYql::NUdf::TUnboxedValue>>& values, ui64 numberRows) const = 0;
7777

7878
protected:
7979
TString Name_;

ydb/core/fq/libs/row_dispatcher/format_handler/format_handler.cpp

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
7979
void OnParsedData(ui64 numberRows) override {
8080
LOG_ROW_DISPATCHER_TRACE("Got parsed data, number rows: " << numberRows);
8181

82-
Self.ParsedData.assign(ParerSchema.size(), nullptr);
82+
Self.ParsedData.assign(ParerSchema.size(), std::span<NYql::NUdf::TUnboxedValue>());
8383
for (size_t i = 0; i < ParerSchema.size(); ++i) {
8484
auto columnStatus = Self.Parser->GetParsedColumn(i);
8585
if (Y_LIKELY(columnStatus.IsSuccess())) {
@@ -221,17 +221,31 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
221221
Client->StartClientSession();
222222
}
223223

224+
private:
225+
void OnWatermark(const NYql::NUdf::TUnboxedValue& rowIdValue, const NYql::NUdf::TUnboxedValue& maybeWatermark) {
226+
if (!maybeWatermark) {
227+
return;
228+
}
229+
auto rowId = rowIdValue.Get<ui64>();
230+
Offset = Self.Offsets->at(rowId);
231+
auto watermark = TInstant::MicroSeconds(maybeWatermark.Get<ui64>());
232+
if (Watermark < watermark) {
233+
Watermark = watermark;
234+
}
235+
LOG_ROW_DISPATCHER_TRACE("OnWatermark, row id: " << rowId << ", watermark: " << watermark);
236+
}
237+
238+
public:
224239
void OnData(const NYql::NUdf::TUnboxedValue* value) override {
225240
ui64 rowId;
226-
TMaybe<ui64> watermarkUs;
227241
if (value->IsEmbedded()) {
228242
rowId = value->Get<ui64>();
229243
} else if (value->IsBoxed()) {
230244
if (value->GetListLength() == 1) {
231245
rowId = value->GetElement(0).Get<ui64>();
232246
} else if (value->GetListLength() == 2) {
233-
rowId = value->GetElement(0).Get<ui64>();
234-
watermarkUs = value->GetElement(1).Get<ui64>();
247+
OnWatermark(value->GetElement(0), value->GetElement(1));
248+
return;
235249
} else {
236250
Y_ENSURE(false, "Unexpected output schema size");
237251
}
@@ -246,23 +260,18 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
246260
}
247261

248262
FilteredOffsets.insert(Offset);
249-
if (watermarkUs) {
250-
WatermarksUs.push_back(*watermarkUs);
251-
252-
const auto watermark = WatermarksUs.empty() ? Nothing() : TMaybe<TInstant>{TInstant::MicroSeconds(WatermarksUs.back())};
253-
LOG_ROW_DISPATCHER_TRACE("OnData, row id: " << rowId << ", offset: " << Offset << ", watermark: " << watermark);
254-
255-
return;
256-
}
257263

258264
Y_DEFER {
259265
// Values allocated on parser allocator and should be released
260266
FilteredRow.assign(Columns.size(), NYql::NUdf::TUnboxedValue());
261267
};
262268

263269
for (size_t i = 0; const ui64 columnId : ColumnsIds) {
270+
auto& parsedData = Self.ParsedData[Self.ParserSchemaIndex[columnId]];
271+
Y_DEBUG_ABORT_UNLESS(parsedData.size() > rowId);
272+
264273
// All data was locked in parser, so copy is safe
265-
FilteredRow[i++] = Self.ParsedData[Self.ParserSchemaIndex[columnId]]->at(rowId);
274+
FilteredRow[i++] = parsedData[rowId];
266275
}
267276
DataPacker->AddWideItem(FilteredRow.data(), FilteredRow.size());
268277

@@ -272,7 +281,7 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
272281
}
273282

274283
void OnBatchFinish() override {
275-
if (NewNumberRows == NumberRows && NewDataPackerSize == DataPackerSize && WatermarksUs.empty()) {
284+
if (NewNumberRows == NumberRows && NewDataPackerSize == DataPackerSize && !Watermark) {
276285
return;
277286
}
278287
if (const auto nextOffset = Client->GetNextMessageOffset(); nextOffset && Offset < *nextOffset) {
@@ -282,11 +291,10 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
282291

283292
const auto numberRows = NewNumberRows - NumberRows;
284293
const auto rowSize = NewDataPackerSize - DataPackerSize;
285-
const auto watermark = WatermarksUs.empty() ? Nothing() : TMaybe<TInstant>{TInstant::MicroSeconds(WatermarksUs.back())};
286294

287-
LOG_ROW_DISPATCHER_TRACE("OnBatchFinish, offset: " << Offset << ", number rows: " << numberRows << ", row size: " << rowSize << ", watermark: " << watermark);
295+
LOG_ROW_DISPATCHER_TRACE("OnBatchFinish, offset: " << Offset << ", number rows: " << numberRows << ", row size: " << rowSize << ", watermark: " << Watermark);
288296

289-
Client->AddDataToClient(Offset, numberRows, rowSize, watermark);
297+
Client->AddDataToClient(Offset, numberRows, rowSize, Watermark);
290298

291299
NumberRows = NewNumberRows;
292300
DataPackerSize = NewDataPackerSize;
@@ -315,15 +323,18 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
315323
}
316324

317325
void FinishPacking() {
318-
if (!DataPacker->IsEmpty() || !WatermarksUs.empty()) {
326+
if (!DataPacker->IsEmpty() || !Watermark.Empty()) {
319327
LOG_ROW_DISPATCHER_TRACE("FinishPacking, batch size: " << DataPackerSize << ", number rows: " << FilteredOffsets.size());
320-
ClientData.emplace(NYql::MakeReadOnlyRope(DataPacker->Finish()), FilteredOffsets, WatermarksUs);
328+
if (FilteredOffsets.empty()) {
329+
FilteredOffsets.emplace(Offset);
330+
}
331+
ClientData.emplace(NYql::MakeReadOnlyRope(DataPacker->Finish()), std::move(FilteredOffsets), Watermark);
321332
NumberRows = 0;
322333
NewNumberRows = 0;
323334
DataPackerSize = 0;
324335
NewDataPackerSize = 0;
325336
FilteredOffsets.clear();
326-
WatermarksUs.clear();
337+
Watermark.Clear();
327338
}
328339
}
329340

@@ -345,7 +356,7 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
345356
TVector<NYql::NUdf::TUnboxedValue> FilteredRow; // Temporary value holder for DataPacket
346357
std::unique_ptr<NKikimr::NMiniKQL::TValuePackerTransport<true>> DataPacker;
347358
TSet<ui64> FilteredOffsets; // Offsets of current batch in DataPacker
348-
TVector<ui64> WatermarksUs;
359+
TMaybe<TInstant> Watermark;
349360
TQueue<TDataBatch> ClientData;
350361
};
351362

@@ -653,7 +664,7 @@ class TTopicFormatHandler : public NActors::TActor<TTopicFormatHandler>, public
653664

654665
// Parsed data
655666
const TVector<ui64>* Offsets;
656-
TVector<const TVector<NYql::NUdf::TUnboxedValue>*> ParsedData;
667+
TVector<std::span<NYql::NUdf::TUnboxedValue>> ParsedData;
657668
bool RefreshScheduled = false;
658669

659670
// Metrics

ydb/core/fq/libs/row_dispatcher/format_handler/format_handler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class IClientDataConsumer : public TThrRefBase {
3838
struct TDataBatch {
3939
TRope SerializedData;
4040
TSet<ui64> Offsets;
41-
TVector<ui64> WatermarksUs;
41+
TMaybe<TInstant> Watermark;
4242
};
4343

4444
class ITopicFormatHandler : public TNonCopyable {

0 commit comments

Comments
 (0)