Skip to content
Draft
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
ee56018
Initial plumbing for an ES|QL extract_snippets function
kderusso Jul 18, 2025
eb0a876
Add HighlighterExpressionEvaluator
kderusso Jul 29, 2025
8c0f312
Pair programming session
carlosdelest Jul 30, 2025
86dc82a
Create highlight query
kderusso Jul 30, 2025
4f4f157
Make extract snippets rewriteable
kderusso Aug 1, 2025
d68c2e8
Add comments from session with Carlos
kderusso Aug 4, 2025
0571100
Make translation aware and get further down the rewrite cycle (still …
kderusso Aug 4, 2025
9fe7654
Move building highlight query to extract snippets
kderusso Aug 4, 2025
8adea56
Cherry-pick: Initial incomplete work for creating the Highlighter in …
carlosdelest Aug 4, 2025
6be55b4
Hack in highlighter so it actually produces a response
kderusso Aug 6, 2025
60e3ce6
[CI] Auto commit changes from spotless
Aug 7, 2025
b6fb4f3
Change LuceneQueryEvaluator to use Blocks instead of Vectors to make …
carlosdelest Aug 11, 2025
f6a8079
Add rewritability
carlosdelest Aug 11, 2025
1ca0b58
Solve params via fold
carlosdelest Aug 11, 2025
34c10f5
Use SORT to push down the EVAL clause, so it's executed on local nodes
carlosdelest Aug 11, 2025
02cebe7
[CI] Auto commit changes from spotless
Aug 11, 2025
b923a2e
Workaround for rewrite
kderusso Aug 12, 2025
5b9347c
Make highlighters accessible
kderusso Aug 12, 2025
44b1bc4
[CI] Auto commit changes from spotless
Aug 12, 2025
82412d8
Return semantic highlight results
kderusso Aug 12, 2025
1bc3d16
Merge main into kderusso/esql-extract-snippets
kderusso Aug 12, 2025
d4ba21d
[CI] Auto commit changes from spotless
Aug 12, 2025
932864a
Cleanup
kderusso Aug 13, 2025
632df21
[CI] Auto commit changes from spotless
Aug 13, 2025
838b054
Move highlighters from EvalMapper to SearchContext
kderusso Aug 14, 2025
0b0487e
Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/…
kderusso Aug 14, 2025
eee88be
[CI] Auto commit changes from spotless
Aug 14, 2025
77b44d5
Cleanup how we pull field attributes in extract snippets
kderusso Aug 14, 2025
5ab3c56
Fix compilation error due to auto-commit suggestion
kderusso Aug 14, 2025
a6a0f11
Add queryBuilder to ExtractSnippets#info
kderusso Aug 18, 2025
9c7609c
Move construction of objects to ctor when possible
kderusso Aug 18, 2025
4a37634
Refactor highlighting logic into util class
kderusso Aug 18, 2025
675e78b
Fix EsqlNodeSubclassTests#testReplaceChildren
kderusso Aug 18, 2025
d5c9d91
Start adding CSV tests
kderusso Aug 18, 2025
bd369f7
Fix initialization error
kderusso Aug 19, 2025
ccda43d
Clean up duplication when creating highlighter
kderusso Aug 19, 2025
35120e6
Support default parameters when not specified
kderusso Aug 19, 2025
de46fef
Fix char encoding bug for text fields (not semantic_text)
kderusso Aug 19, 2025
ff3f3c1
Merge main into kderusso/esql-extract-snippets
kderusso Aug 20, 2025
5f20480
Truncate snippets that are longer than requested size
kderusso Aug 20, 2025
ae92c83
Fix most extractSnippets CSV tests, add some more test cases
kderusso Aug 20, 2025
48c2825
Remove changes to AnalyzerTests
kderusso Aug 21, 2025
80d1056
Spotless
kderusso Aug 21, 2025
ec3ac7a
Add preview = true
kderusso Aug 21, 2025
694bf6a
Add ExtractSnippetTests and associated generated documentation
kderusso Aug 21, 2025
0ef8fce
Add integration test for extract_snippets
kderusso Aug 21, 2025
e15f824
Merge main into kderusso/esql-extract-snippets
kderusso Aug 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity) {
this(nestedTopDocId, id, nestedIdentity, null);
}

private SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
public SearchHit(int nestedTopDocId, String id, NestedIdentity nestedIdentity, @Nullable RefCounted refCounted) {
this(
nestedTopDocId,
DEFAULT_SCORE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
*/
public class SearchModule {
private static volatile Map<String, Highlighter> staticHighlighters = Map.of();

public static final Setting<Integer> INDICES_MAX_CLAUSE_COUNT_SETTING = Setting.intSetting(
"indices.query.bool.max_clause_count",
4096,
Expand Down Expand Up @@ -921,6 +923,10 @@ private static Map<String, Highlighter> setupHighlighters(Settings settings, Lis
return unmodifiableMap(highlighters.getRegistry());
}

public static Map<String, Highlighter> getStaticHighlighters() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's tough that the highlighter code is so deeply ingrained into the fetch phase 😢

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - this was the best/cleanest solution that I could come up with, if you have better suggestions I'd be happy to talk about them!

return staticHighlighters;
}

private void registerScoreFunctions(List<SearchPlugin> plugins) {
// ScriptScoreFunctionBuilder has it own named writable because of a new script_score query
namedWriteables.add(
Expand Down Expand Up @@ -1060,6 +1066,9 @@ private void registerFetchSubPhases(List<SearchPlugin> plugins) {
registerFetchSubPhase(new HighlightPhase(highlighters));
registerFetchSubPhase(new FetchScorePhase());

// Store highlighters in a static map for other plugins to access
staticHighlighters = Map.copyOf(highlighters);

FetchPhaseConstructionContext context = new FetchPhaseConstructionContext(highlighters);
registerFromPlugin(plugins, p -> p.getFetchSubPhases(context), this::registerFetchSubPhase);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc

CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) {
IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings();
Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html")
Encoder encoder = "html".equals(fieldContext.field.fieldOptions().encoder())
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,16 +185,16 @@ public Map<String, Object> options() {
return options;
}

static class Builder {
public static class Builder {

private final FieldOptions fieldOptions = new FieldOptions();

Builder fragmentCharSize(int fragmentCharSize) {
public Builder fragmentCharSize(int fragmentCharSize) {
fieldOptions.fragmentCharSize = fragmentCharSize;
return this;
}

Builder numberOfFragments(int numberOfFragments) {
public Builder numberOfFragments(int numberOfFragments) {
fieldOptions.numberOfFragments = numberOfFragments;
return this;
}
Expand All @@ -209,17 +209,17 @@ Builder encoder(String encoder) {
return this;
}

Builder preTags(String[] preTags) {
public Builder preTags(String[] preTags) {
fieldOptions.preTags = preTags;
return this;
}

Builder postTags(String[] postTags) {
public Builder postTags(String[] postTags) {
fieldOptions.postTags = postTags;
return this;
}

Builder scoreOrdered(boolean scoreOrdered) {
public Builder scoreOrdered(boolean scoreOrdered) {
fieldOptions.scoreOrdered = scoreOrdered;
return this;
}
Expand All @@ -229,7 +229,7 @@ Builder highlightFilter(boolean highlightFilter) {
return this;
}

Builder requireFieldMatch(boolean requireFieldMatch) {
public Builder requireFieldMatch(boolean requireFieldMatch) {
fieldOptions.requireFieldMatch = requireFieldMatch;
return this;
}
Expand Down Expand Up @@ -269,7 +269,7 @@ Builder boundaryScannerLocale(Locale boundaryScannerLocale) {
return this;
}

Builder highlightQuery(Query highlightQuery) {
public Builder highlightQuery(Query highlightQuery) {
fieldOptions.highlightQuery = highlightQuery;
return this;
}
Expand All @@ -294,7 +294,7 @@ Builder options(Map<String, Object> options) {
return this;
}

FieldOptions build() {
public FieldOptions build() {
return fieldOptions;
}

Expand Down
1 change: 1 addition & 0 deletions x-pack/plugin/esql/compute/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
requires org.elasticsearch.geo;
requires org.elasticsearch.xcore;
requires hppc;
requires org.apache.lucene.highlighter;

exports org.elasticsearch.compute;
exports org.elasticsearch.compute.aggregation;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.compute.lucene;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BlockFactory;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Supplier;

import static org.elasticsearch.core.RefCounted.ALWAYS_REFERENCED;

public class HighlighterExpressionEvaluator extends LuceneQueryEvaluator<BytesRefBlock.Builder>
implements
EvalOperator.ExpressionEvaluator {

private final String fieldName;
private final Integer numFragments;
private final Integer fragmentLength;
private final SearchContext searchContext;
private final Map<String, Highlighter> highlighters;

HighlighterExpressionEvaluator(
BlockFactory blockFactory,
ShardConfig[] shardConfigs,
String fieldName,
Integer numFragments,
Integer fragmentLength,
SearchContext searchContext,
Map<String, Highlighter> highlighters
) {
super(blockFactory, shardConfigs);
this.fieldName = fieldName;
this.numFragments = numFragments;
this.fragmentLength = fragmentLength;
this.searchContext = searchContext;
this.highlighters = highlighters;
}

@Override
protected ScoreMode scoreMode() {
return ScoreMode.COMPLETE;
}

@Override
protected Block createNoMatchBlock(BlockFactory blockFactory, int size) {
return blockFactory.newConstantNullBlock(size);
}

@Override
protected BytesRefBlock.Builder createBlockBuilder(BlockFactory blockFactory, int size) {
return blockFactory.newBytesRefBlockBuilder(size * numFragments);
}

@Override
protected void appendMatch(BytesRefBlock.Builder builder, Scorable scorer, int docId, LeafReaderContext leafReaderContext, Query query)
throws IOException {

// TODO: Can we build a custom highlighter directly here, so we don't have to rely on fetch phase classes?

// Create a source loader for highlighter use
SourceLoader sourceLoader = searchContext.newSourceLoader(null);
FetchContext fetchContext = new FetchContext(searchContext, sourceLoader);
MappedFieldType fieldType = searchContext.getSearchExecutionContext().getFieldType(fieldName);
SearchHit searchHit = new SearchHit(docId, null, null, ALWAYS_REFERENCED);
Source source = Source.lazy(lazyStoredSourceLoader(leafReaderContext, docId));
Highlighter highlighter = highlighters.getOrDefault(fieldType.getDefaultHighlighter(), new DefaultHighlighter());

// TODO: Consolidate these options with the ones built in the text similarity reranker
SearchHighlightContext.FieldOptions.Builder optionsBuilder = new SearchHighlightContext.FieldOptions.Builder();
optionsBuilder.numberOfFragments(numFragments != null ? numFragments : HighlightBuilder.DEFAULT_NUMBER_OF_FRAGMENTS);
optionsBuilder.fragmentCharSize(fragmentLength != null ? fragmentLength : HighlightBuilder.DEFAULT_FRAGMENT_CHAR_SIZE);
optionsBuilder.preTags(new String[] { "" });
optionsBuilder.postTags(new String[] { "" });
optionsBuilder.requireFieldMatch(false);
optionsBuilder.scoreOrdered(true);
optionsBuilder.highlightQuery(query);
SearchHighlightContext.Field field = new SearchHighlightContext.Field(fieldName, optionsBuilder.build());

FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(searchHit, leafReaderContext, docId, Map.of(), source, null);
FieldHighlightContext highlightContext = new FieldHighlightContext(
fieldName,
field,
fieldType,
fetchContext,
hitContext,
query,
new HashMap<>()
);
HighlightField highlight = highlighter.highlight(highlightContext);

if (highlight != null) {
boolean multivalued = highlight.fragments().length > 1;
if (multivalued) {
builder.beginPositionEntry();
}
for (Text highlightText : highlight.fragments()) {
builder.appendBytesRef(new BytesRef(highlightText.bytes().bytes()));
}
if (multivalued) {
builder.endPositionEntry();
}
}
}

private static Supplier<Source> lazyStoredSourceLoader(LeafReaderContext ctx, int doc) {
return () -> {
StoredFieldLoader rootLoader = StoredFieldLoader.create(true, Collections.emptySet());
try {
LeafStoredFieldLoader leafRootLoader = rootLoader.getLoader(ctx, null);
leafRootLoader.advanceTo(doc);
return Source.fromBytes(leafRootLoader.source());
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
}

@Override
protected void appendNoMatch(BytesRefBlock.Builder builder) {
builder.appendNull();
}

@Override
public Block eval(Page page) {
return executeQuery(page);
}

public record Factory(
ShardConfig[] shardConfigs,
String fieldName,
Integer numFragments,
Integer fragmentSize,
SearchContext searchContext,
Map<String, Highlighter> highlighters
) implements EvalOperator.ExpressionEvaluator.Factory {
@Override
public EvalOperator.ExpressionEvaluator get(DriverContext context) {
return new HighlighterExpressionEvaluator(
context.blockFactory(),
shardConfigs,
fieldName,
numFragments,
fragmentSize,
searchContext,
highlighters
);
}
}
}
Loading