Skip to content

Commit e90fcd7

Browse files
Plugins can provide VectorsFormatProvider
Plugins can provide VectorsFormatProvider that provides new KnnVectorsFormat for different VectorIndexTypes. If there formats provided by plugins they are used instead of standard
1 parent d27b62f commit e90fcd7

File tree

26 files changed

+183
-44
lines changed

26 files changed

+183
-44
lines changed

server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
9898
if (mapperService != null) {
9999
Mapper mapper = mapperService.mappingLookup().getMapper(field);
100100
if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
101-
return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat);
101+
return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat, mapperService.getIndexSettings());
102102
}
103103
}
104104
return knnVectorsFormat;

server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context
806806
DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
807807
fieldName,
808808
context.indexSettings().getIndexVersionCreated(),
809-
IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings())
809+
IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(context.indexSettings().getSettings()),
810+
context.getVectorFormatProviers()
810811
);
811812
builder.dimensions(mappers.size());
812813
DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.index.IndexSettings;
2020
import org.elasticsearch.index.analysis.IndexAnalyzers;
2121
import org.elasticsearch.index.mapper.MapperService.MergeReason;
22+
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
2223
import org.elasticsearch.xcontent.FilterXContentParserWrapper;
2324
import org.elasticsearch.xcontent.FlatteningXContentParser;
2425
import org.elasticsearch.xcontent.XContentBuilder;
@@ -299,6 +300,10 @@ public final MetadataFieldMapper getMetadataMapper(String mapperName) {
299300
return mappingLookup.getMapping().getMetadataMapperByName(mapperName);
300301
}
301302

303+
public final List<VectorsFormatProvider> getVectorFormatProviers() {
304+
return mappingParserContext.getVectorsFormatProviders();
305+
}
306+
302307
public final MappingParserContext dynamicTemplateParserContext(DateFormatter dateFormatter) {
303308
return mappingParserContext.createDynamicTemplateContext(dateFormatter);
304309
}

server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@
1111

1212
import org.elasticsearch.index.IndexVersion;
1313
import org.elasticsearch.index.IndexVersions;
14+
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
1415
import org.elasticsearch.plugins.FieldPredicate;
1516
import org.elasticsearch.plugins.MapperPlugin;
1617

1718
import java.util.Collections;
1819
import java.util.LinkedHashMap;
20+
import java.util.List;
1921
import java.util.Map;
2022
import java.util.function.Function;
2123

@@ -31,16 +33,19 @@ public final class MapperRegistry {
3133
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers6x;
3234
private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers5x;
3335
private final Function<String, FieldPredicate> fieldFilter;
36+
private final List<VectorsFormatProvider> vectorsFormatProviders;
3437

3538
public MapperRegistry(
3639
Map<String, Mapper.TypeParser> mapperParsers,
3740
Map<String, RuntimeField.Parser> runtimeFieldParsers,
3841
Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers,
39-
Function<String, FieldPredicate> fieldFilter
42+
Function<String, FieldPredicate> fieldFilter,
43+
List<VectorsFormatProvider> vectorsFormatProviders
4044
) {
4145
this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers));
4246
this.runtimeFieldParsers = runtimeFieldParsers;
4347
this.metadataMapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(metadataMapperParsers));
48+
this.vectorsFormatProviders = vectorsFormatProviders;
4449
Map<String, MetadataFieldMapper.TypeParser> metadata7x = new LinkedHashMap<>(metadataMapperParsers);
4550
metadata7x.remove(NestedPathFieldMapper.NAME);
4651
this.metadataMapperParsers7x = metadata7x;
@@ -72,6 +77,10 @@ public Map<String, RuntimeField.Parser> getRuntimeFieldParsers() {
7277
return runtimeFieldParsers;
7378
}
7479

80+
public List<VectorsFormatProvider> getVectorsFormatProviders() {
81+
return vectorsFormatProviders;
82+
}
83+
7584
/**
7685
* Return a map of the meta mappers that have been registered. The
7786
* returned map uses the name of the field as a key.

server/src/main/java/org/elasticsearch/index/mapper/MapperService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,8 @@ public MapperService(
245245
indexAnalyzers,
246246
indexSettings,
247247
idFieldMapper,
248-
bitSetProducer
248+
bitSetProducer,
249+
mapperRegistry.getVectorsFormatProviders()
249250
);
250251
this.documentParser = new DocumentParser(parserConfiguration, this.mappingParserContextSupplier.get());
251252
Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers = mapperRegistry.getMetadataMapperParsers(

server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
import org.elasticsearch.index.IndexSettings;
1818
import org.elasticsearch.index.IndexVersion;
1919
import org.elasticsearch.index.analysis.IndexAnalyzers;
20+
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
2021
import org.elasticsearch.index.query.SearchExecutionContext;
2122
import org.elasticsearch.index.similarity.SimilarityProvider;
2223
import org.elasticsearch.script.ScriptCompiler;
2324

25+
import java.util.List;
2426
import java.util.function.Function;
2527
import java.util.function.Supplier;
2628

@@ -41,6 +43,7 @@ public class MappingParserContext {
4143
private final IndexSettings indexSettings;
4244
private final IdFieldMapper idFieldMapper;
4345
private final Function<Query, BitSetProducer> bitSetProducer;
46+
private final List<VectorsFormatProvider> vectorsFormatProviders;
4447
private final long mappingObjectDepthLimit;
4548
private long mappingObjectDepth = 0;
4649

@@ -55,7 +58,8 @@ public MappingParserContext(
5558
IndexAnalyzers indexAnalyzers,
5659
IndexSettings indexSettings,
5760
IdFieldMapper idFieldMapper,
58-
Function<Query, BitSetProducer> bitSetProducer
61+
Function<Query, BitSetProducer> bitSetProducer,
62+
List<VectorsFormatProvider> vectorsFormatProviders
5963
) {
6064
this.similarityLookupService = similarityLookupService;
6165
this.typeParsers = typeParsers;
@@ -69,6 +73,7 @@ public MappingParserContext(
6973
this.idFieldMapper = idFieldMapper;
7074
this.mappingObjectDepthLimit = indexSettings.getMappingDepthLimit();
7175
this.bitSetProducer = bitSetProducer;
76+
this.vectorsFormatProviders = vectorsFormatProviders;
7277
}
7378

7479
public IndexAnalyzers getIndexAnalyzers() {
@@ -142,6 +147,10 @@ public BitSetProducer bitSetProducer(Query query) {
142147
return bitSetProducer.apply(query);
143148
}
144149

150+
public List<VectorsFormatProvider> getVectorsFormatProviders() {
151+
return vectorsFormatProviders;
152+
}
153+
145154
void incrementMappingObjectDepth() throws MapperParsingException {
146155
mappingObjectDepth++;
147156
if (mappingObjectDepth > mappingObjectDepthLimit) {
@@ -170,7 +179,8 @@ private static class MultiFieldParserContext extends MappingParserContext {
170179
in.indexAnalyzers,
171180
in.indexSettings,
172181
in.idFieldMapper,
173-
in.bitSetProducer
182+
in.bitSetProducer,
183+
in.vectorsFormatProviders
174184
);
175185
}
176186

@@ -200,7 +210,8 @@ private static class DynamicTemplateParserContext extends MappingParserContext {
200210
in.indexAnalyzers,
201211
in.indexSettings,
202212
in.idFieldMapper,
203-
in.bitSetProducer
213+
in.bitSetProducer,
214+
in.vectorsFormatProviders
204215
);
205216
this.dateFormatter = dateFormatter;
206217
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.elasticsearch.common.util.FeatureFlag;
4949
import org.elasticsearch.common.xcontent.support.XContentMapValues;
5050
import org.elasticsearch.features.NodeFeature;
51+
import org.elasticsearch.index.IndexSettings;
5152
import org.elasticsearch.index.IndexVersion;
5253
import org.elasticsearch.index.IndexVersions;
5354
import org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat;
@@ -121,9 +122,6 @@
121122
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER;
122123
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER;
123124

124-
/**
125-
* A {@link FieldMapper} for indexing a dense vector of floats.
126-
*/
127125
public class DenseVectorFieldMapper extends FieldMapper {
128126
public static final String COSINE_MAGNITUDE_FIELD_SUFFIX = "._magnitude";
129127
private static final float EPS = 1e-3f;
@@ -256,8 +254,14 @@ public static class Builder extends FieldMapper.Builder {
256254

257255
final IndexVersion indexVersionCreated;
258256
final boolean isSyntheticVector;
257+
private final List<VectorsFormatProvider> vectorsFormatProviders;
259258

260-
public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheticVector) {
259+
public Builder(
260+
String name,
261+
IndexVersion indexVersionCreated,
262+
boolean isSyntheticVector,
263+
List<VectorsFormatProvider> vectorsFormatProviders
264+
) {
261265
super(name);
262266
this.indexVersionCreated = indexVersionCreated;
263267
// This is defined as updatable because it can be updated once, from [null] to a valid dim size,
@@ -290,6 +294,7 @@ public Builder(String name, IndexVersion indexVersionCreated, boolean isSyntheti
290294
}
291295
});
292296
this.isSyntheticVector = isSyntheticVector;
297+
this.vectorsFormatProviders = vectorsFormatProviders;
293298
final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION);
294299
final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW);
295300
final boolean defaultBBQ8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW);
@@ -427,6 +432,7 @@ public Builder indexOptions(DenseVectorIndexOptions indexOptions) {
427432
}
428433

429434
@Override
435+
430436
public DenseVectorFieldMapper build(MapperBuilderContext context) {
431437
// Validate again here because the dimensions or element type could have been set programmatically,
432438
// which affects index option validity
@@ -448,7 +454,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) {
448454
builderParams(this, context),
449455
indexOptions.getValue(),
450456
indexVersionCreated,
451-
isSyntheticVectorFinal
457+
isSyntheticVectorFinal,
458+
vectorsFormatProviders
452459
);
453460
}
454461
}
@@ -2382,7 +2389,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
23822389
(n, c) -> new Builder(
23832390
n,
23842391
c.getIndexSettings().getIndexVersionCreated(),
2385-
INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
2392+
INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.get(c.getIndexSettings().getSettings()),
2393+
c.getVectorsFormatProviders()
23862394
),
23872395
notInMultiFields(CONTENT_TYPE)
23882396
);
@@ -2841,19 +2849,22 @@ public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValu
28412849
private final DenseVectorIndexOptions indexOptions;
28422850
private final IndexVersion indexCreatedVersion;
28432851
private final boolean isSyntheticVector;
2852+
private final List<VectorsFormatProvider> extraVectorsFormatProviders;
28442853

28452854
private DenseVectorFieldMapper(
28462855
String simpleName,
28472856
MappedFieldType mappedFieldType,
28482857
BuilderParams params,
28492858
DenseVectorIndexOptions indexOptions,
28502859
IndexVersion indexCreatedVersion,
2851-
boolean isSyntheticVector
2860+
boolean isSyntheticVector,
2861+
List<VectorsFormatProvider> vectorsFormatProviders
28522862
) {
28532863
super(simpleName, mappedFieldType, params);
28542864
this.indexOptions = indexOptions;
28552865
this.indexCreatedVersion = indexCreatedVersion;
28562866
this.isSyntheticVector = isSyntheticVector;
2867+
this.extraVectorsFormatProviders = vectorsFormatProviders;
28572868
}
28582869

28592870
@Override
@@ -2975,7 +2986,7 @@ protected String contentType() {
29752986

29762987
@Override
29772988
public FieldMapper.Builder getMergeBuilder() {
2978-
return new Builder(leafName(), indexCreatedVersion, isSyntheticVector).init(this);
2989+
return new Builder(leafName(), indexCreatedVersion, isSyntheticVector, extraVectorsFormatProviders).init(this);
29792990
}
29802991

29812992
private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) {
@@ -2998,12 +3009,22 @@ private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Objec
29983009
* @return the custom kNN vectors format that is configured for this field or
29993010
* {@code null} if the default format should be used.
30003011
*/
3001-
public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat) {
3012+
public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat, IndexSettings indexSettings) {
30023013
final KnnVectorsFormat format;
30033014
if (indexOptions == null) {
30043015
format = fieldType().elementType == ElementType.BIT ? new ES815HnswBitVectorsFormat() : defaultFormat;
30053016
} else {
3006-
format = indexOptions.getVectorsFormat(fieldType().elementType);
3017+
// if plugins provided alternative KnnVectorsFormat for this indexOptions, use it instead of standard
3018+
List<KnnVectorsFormat> extraKnnFormats = new ArrayList<>();
3019+
for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) {
3020+
KnnVectorsFormat extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions);
3021+
extraKnnFormats.add(extraKnnFormat);
3022+
}
3023+
if (extraKnnFormats.size() > 0) {
3024+
format = extraKnnFormats.get(0);
3025+
} else {
3026+
format = indexOptions.getVectorsFormat(fieldType().elementType);
3027+
}
30073028
}
30083029
// It's legal to reuse the same format name as this is the same on-disk format.
30093030
return new KnnVectorsFormat(format.getName()) {
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper.vectors;
11+
12+
import org.apache.lucene.codecs.KnnVectorsFormat;
13+
import org.elasticsearch.index.IndexSettings;
14+
15+
/**
16+
* A service provider interface for obtaining Lucene {@link KnnVectorsFormat} instances.
17+
* Plugins can implement this interface to provide custom vector formats
18+
*/
19+
public interface VectorsFormatProvider {
20+
21+
/**
22+
* Returns a {@link KnnVectorsFormat} instance based on the provided index settings and vector index options.
23+
*
24+
* @param indexSettings The index settings.
25+
* @param indexOptions The dense vector index options.
26+
* @return A KnnVectorsFormat instance.
27+
*/
28+
KnnVectorsFormat getKnnVectorsFormat(IndexSettings indexSettings, DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions);
29+
}

server/src/main/java/org/elasticsearch/indices/IndicesModule.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper;
6969
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
7070
import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
71+
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
7172
import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction;
7273
import org.elasticsearch.index.seqno.RetentionLeaseSyncAction;
7374
import org.elasticsearch.index.seqno.RetentionLeaseSyncer;
@@ -80,6 +81,7 @@
8081
import org.elasticsearch.xcontent.NamedXContentRegistry;
8182
import org.elasticsearch.xcontent.ParseField;
8283

84+
import java.util.ArrayList;
8385
import java.util.Arrays;
8486
import java.util.Collections;
8587
import java.util.LinkedHashMap;
@@ -99,7 +101,8 @@ public IndicesModule(List<MapperPlugin> mapperPlugins) {
99101
getMappers(mapperPlugins),
100102
getRuntimeFields(mapperPlugins),
101103
getMetadataMappers(mapperPlugins),
102-
getFieldFilter(mapperPlugins)
104+
getFieldFilter(mapperPlugins),
105+
getVectorFormatProviders(mapperPlugins)
103106
);
104107
}
105108

@@ -221,6 +224,17 @@ public static Map<String, Mapper.TypeParser> getMappers(List<MapperPlugin> mappe
221224
return Collections.unmodifiableMap(mappers);
222225
}
223226

227+
private static List<VectorsFormatProvider> getVectorFormatProviders(List<MapperPlugin> mapperPlugins) {
228+
List<VectorsFormatProvider> vectorsFormatProviders = new ArrayList<>();
229+
for (MapperPlugin mapperPlugin : mapperPlugins) {
230+
VectorsFormatProvider vectorsFormatProvider = mapperPlugin.getVectorsFormatProvider();
231+
if (vectorsFormatProvider != null) {
232+
vectorsFormatProviders.add(vectorsFormatProvider);
233+
}
234+
}
235+
return Collections.unmodifiableList(vectorsFormatProviders);
236+
}
237+
224238
private static Map<String, RuntimeField.Parser> getRuntimeFields(List<MapperPlugin> mapperPlugins) {
225239
Map<String, RuntimeField.Parser> runtimeParsers = new LinkedHashMap<>();
226240
runtimeParsers.put(BooleanFieldMapper.CONTENT_TYPE, BooleanScriptFieldType.PARSER);

server/src/main/java/org/elasticsearch/plugins/MapperPlugin.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.index.mapper.Mapper;
1313
import org.elasticsearch.index.mapper.MetadataFieldMapper;
1414
import org.elasticsearch.index.mapper.RuntimeField;
15+
import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
1516

1617
import java.util.Collections;
1718
import java.util.Map;
@@ -65,6 +66,13 @@ default Function<String, FieldPredicate> getFieldFilter() {
6566
return NOOP_FIELD_FILTER;
6667
}
6768

69+
/**
70+
* Returns {VectorFormatProvider} implementations added by this plugin.
71+
*/
72+
default VectorsFormatProvider getVectorsFormatProvider() {
73+
return null;
74+
}
75+
6876
/**
6977
* The default field filter applied, which doesn't filter anything. That means that by default get mappings, get index
7078
* get field mappings and field capabilities API will return every field that's present in the mappings.

0 commit comments

Comments
 (0)