diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml new file mode 100644 index 0000000000000..fe4e2f8cefd70 --- /dev/null +++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml @@ -0,0 +1,21 @@ +steps: + - label: "{{matrix.GRADLE_TASK}}" + command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} + timeout_in_minutes: 300 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-32 + buildDirectory: /dev/shm/bk + zones: us-central1-b,us-central1-c + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" + matrix: + setup: + GRADLE_TASK: + - checkPart1 + - checkPart2 + - checkPart3 + - checkPart4 + - checkPart5 + - checkPart6 diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml new file mode 100644 index 0000000000000..6011926d782bb --- /dev/null +++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml @@ -0,0 +1,14 @@ +steps: + - label: "Smoke test and update new cuVS snapshot" + command: .buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-16 + zones: us-central1-b,us-central1-c + diskSizeGb: 150 + - wait: ~ + - trigger: "elasticsearch-cuvs-run-tests" + build: + branch: "${BUILDKITE_BRANCH}" + async: true diff --git a/.buildkite/pipelines/pull-request/.defaults.yml b/.buildkite/pipelines/pull-request/.defaults.yml index 84d73cbd738a2..a0c82d9ecdded 100644 --- a/.buildkite/pipelines/pull-request/.defaults.yml +++ b/.buildkite/pipelines/pull-request/.defaults.yml @@ -1,5 +1,6 @@ config: - skip-labels: ">test-mute" + skip-labels: + - ">test-mute" excluded-regions: - ^docs/.* - ^x-pack/docs/.* diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml index 739deb2db92c4..961f6ae6d612e 100644 --- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml +++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml @@ -78,4 +78,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml new file mode 100644 index 0000000000000..40afa272ead7c --- /dev/null +++ b/.buildkite/pipelines/pull-request/gpu.yml @@ -0,0 +1,28 @@ +config: + allow-labels: test-gpu + skip-labels: + - ">test-mute" +steps: + - group: gpu-tests + steps: + - label: "{{matrix.GRADLE_TASK}} / gpu-tests" + key: "packaging-tests-unix" + command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}} + timeout_in_minutes: 300 + agents: + provider: gcp + image: family/elasticsearch-ubuntu-2404-nvidia + machineType: g2-standard-32 + buildDirectory: /dev/shm/bk + zones: us-central1-b,us-central1-c + env: + GRADLE_TASK: "{{matrix.GRADLE_TASK}}" + matrix: + setup: + GRADLE_TASK: + - checkPart1 + - checkPart2 + - checkPart3 + - checkPart4 + - checkPart5 + - checkPart6 diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh new file mode 100755 index 0000000000000..241d5f78900e0 --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/configure.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -euo pipefail + +if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then + export JAVA_HOME="$HOME/.java/openjdk24" + export PATH="$JAVA_HOME/bin:$PATH" + + # Setup LD_LIBRARY_PATH, PATH + + export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}" + source /etc/profile.d/elastic-nvidia.sh +fi + +# Not running this before the tests results in an error when running the tests +# No idea why... +nvidia-smi + +CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)" + +CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/current-snapshot-version)}" +CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz" +CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE" + +CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")} +CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION" + +curl -O "$CUVS_URL" +tar -xzf "$CUVS_ARCHIVE" + +CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout) + +LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//') +LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH" +export LD_LIBRARY_PATH + +cd "$CUVS_DIR/cuvs-java/target" +mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" -DartifactId=elastic-cuvs-java -DgeneratePom=true + +cd "$ELASTICSEARCH_REPO_DIR" +PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle +sed -i "s|implementation 'com.nvidia.cuvs:elastic-cuvs-java:.*'|implementation 'com.nvidia.cuvs:elastic-cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE" diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version new file mode 100644 index 0000000000000..3bb6b7db4687c --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version @@ -0,0 +1 @@ +fdb8bfb8 diff --git a/.buildkite/scripts/cuvs-snapshot/run-gradle.sh b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh new file mode 100755 index 0000000000000..4824981f5817f --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -euo pipefail + +source .buildkite/scripts/cuvs-snapshot/configure.sh + +cd "$WORKSPACE" + +.ci/scripts/run-gradle.sh "$@" diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh new file mode 100755 index 0000000000000..17c83e2f5504c --- /dev/null +++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +set -euo pipefail + +SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version +BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}" + +if [[ -z "${CUVS_SNAPSHOT_VERSION:-}" ]]; then + echo "CUVS_SNAPSHOT_VERSION not set. Set this to update the current snapshot version." + exit 1 +fi + +if [[ "$CUVS_SNAPSHOT_VERSION" == "$(cat $SNAPSHOT_VERSION_FILE)" ]]; then + echo "Current snapshot version already set to '$CUVS_SNAPSHOT_VERSION'. No need to update." + exit 0 +fi + +echo "--- Configuring libcuvs/cuvs-java" +source .buildkite/scripts/cuvs-snapshot/configure.sh + +if [[ "${SKIP_TESTING:-}" != "true" ]]; then + echo "--- Testing snapshot before updating" + ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S +fi + +echo "--- Updating snapshot" + +echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE" + +CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true + +gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \ + -f branch="$BRANCH_TO_UPDATE" \ + -f message="Update cuvs snapshot version to $CUVS_VERSION" \ + -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \ + -f sha="$CURRENT_SHA" diff --git a/docs/changelog/135545.yaml b/docs/changelog/135545.yaml new file mode 100644 index 0000000000000..bbd87fa047476 --- /dev/null +++ b/docs/changelog/135545.yaml @@ -0,0 +1,5 @@ +pr: 135545 +summary: Add GPUPlugin for indexing vectors on GPU +area: Vector Search +type: feature +issues: [] diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b4b17c450200f..5b5c9aea68ac3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -1199,6 +1199,11 @@ + + + + + diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle index 41064d2bb3451..b0223791797dd 100644 --- a/qa/vector/build.gradle +++ b/qa/vector/build.gradle @@ -22,6 +22,12 @@ tasks.named("dependencyLicenses").configure { tasks.named('forbiddenApisMain').configure { enabled = false } +repositories { + mavenLocal() + maven { + url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots") + } +} dependencies { api "org.apache.lucene:lucene-core:${versions.lucene}" @@ -31,6 +37,7 @@ dependencies { implementation project(':libs:native') implementation project(':libs:logging') implementation project(':server') + implementation project(':x-pack:plugin:gpu') } /** * Task to run the KnnIndexTester with the provided parameters. diff --git a/qa/vector/src/main/java/module-info.java b/qa/vector/src/main/java/module-info.java index b6647aafeb01f..0bcb7bc98b651 100644 --- a/qa/vector/src/main/java/module-info.java +++ b/qa/vector/src/main/java/module-info.java @@ -18,4 +18,5 @@ requires org.elasticsearch.logging; requires java.management; requires jdk.management; + requires org.elasticsearch.gpu; } diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index 421375f038475..9e4dca46f0c18 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -39,6 +39,8 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; import java.io.IOException; import java.io.InputStream; @@ -76,7 +78,8 @@ public class KnnIndexTester { enum IndexType { HNSW, FLAT, - IVF + IVF, + GPU_HNSW } enum MergePolicyType { @@ -90,6 +93,8 @@ private static String formatIndexPath(CmdLineArgs args) { List suffix = new ArrayList<>(); if (args.indexType() == IndexType.FLAT) { suffix.add("flat"); + } else if (args.indexType() == IndexType.GPU_HNSW) { + suffix.add("gpu_hnsw"); } else if (args.indexType() == IndexType.IVF) { suffix.add("ivf"); suffix.add(Integer.toString(args.ivfClusterSize())); @@ -107,6 +112,16 @@ static Codec createCodec(CmdLineArgs args) { final KnnVectorsFormat format; if (args.indexType() == IndexType.IVF) { format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER); + } else if (args.indexType() == IndexType.GPU_HNSW) { + if (args.quantizeBits() == 32) { + format = new ES92GpuHnswVectorsFormat(); + } else if (args.quantizeBits() == 7) { + format = new ES92GpuHnswSQVectorsFormat(); + } else { + throw new IllegalArgumentException( + "GPU HNSW index type only supports 7 or 32 bits quantization, but got: " + args.quantizeBits() + ); + } } else { if (args.quantizeBits() == 1) { if (args.indexType() == IndexType.FLAT) { diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 68dc8da3b7d15..cd375474797be 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -400,7 +400,8 @@ org.elasticsearch.settings.secure, org.elasticsearch.serverless.constants, org.elasticsearch.serverless.apifiltering, - org.elasticsearch.internal.security; + org.elasticsearch.internal.security, + org.elasticsearch.gpu; exports org.elasticsearch.telemetry.tracing; exports org.elasticsearch.telemetry; @@ -486,7 +487,7 @@ exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference; exports org.elasticsearch.lucene.util.automaton; exports org.elasticsearch.index.codec.perfield; - exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn; + exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu; exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn; exports org.elasticsearch.inference.telemetry; exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn; diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 8c0756f25286f..2ed1aa6c9f17f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -120,7 +120,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { if (mapperService != null) { Mapper mapper = mapperService.mappingLookup().getMapper(field); if (mapper instanceof DenseVectorFieldMapper vectorMapper) { - return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat); + return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat, mapperService.getIndexSettings()); } } return knnVectorsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 56710d49b5a7a..5925e81091238 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -130,7 +130,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException ); } - static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { + public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter { final Lucene99ScalarQuantizedVectorsWriter delegate; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 4112f9108d3ee..5ca5761f7a33f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -800,7 +800,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder( fieldName, context.indexSettings().getIndexVersionCreated(), - IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings()) + IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings()), + context.getVectorFormatProviders() ); builder.dimensions(mappers.size()); DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 0b1a64713857a..b0e002d6c0aff 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -20,6 +20,7 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.mapper.MapperService.MergeReason; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.xcontent.FilterXContentParserWrapper; import org.elasticsearch.xcontent.FlatteningXContentParser; import org.elasticsearch.xcontent.XContentBuilder; @@ -305,6 +306,10 @@ public final MetadataFieldMapper getMetadataMapper(String mapperName) { return mappingLookup.getMapping().getMetadataMapperByName(mapperName); } + public final List getVectorFormatProviders() { + return mappingParserContext.getVectorsFormatProviders(); + } + public final MappingParserContext dynamicTemplateParserContext(DateFormatter dateFormatter) { return mappingParserContext.createDynamicTemplateContext(dateFormatter); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java index 03026655cdca4..b545397e400f5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java @@ -11,11 +11,13 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.plugins.FieldPredicate; import org.elasticsearch.plugins.MapperPlugin; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.function.Function; @@ -32,14 +34,16 @@ public final class MapperRegistry { private final Map metadataMapperParsers5x; private final Function fieldFilter; private final RootObjectMapperNamespaceValidator namespaceValidator; + private final List vectorsFormatProviders; public MapperRegistry( Map mapperParsers, Map runtimeFieldParsers, Map metadataMapperParsers, - Function fieldFilter + Function fieldFilter, + List vectorsFormatProviders ) { - this(mapperParsers, runtimeFieldParsers, metadataMapperParsers, fieldFilter, null); + this(mapperParsers, runtimeFieldParsers, metadataMapperParsers, fieldFilter, vectorsFormatProviders, null); } public MapperRegistry( @@ -47,6 +51,7 @@ public MapperRegistry( Map runtimeFieldParsers, Map metadataMapperParsers, Function fieldFilter, + List vectorsFormatProviders, RootObjectMapperNamespaceValidator namespaceValidator ) { this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers)); @@ -62,6 +67,7 @@ public MapperRegistry( this.metadataMapperParsers5x = metadata5x; this.fieldFilter = fieldFilter; this.namespaceValidator = namespaceValidator; + this.vectorsFormatProviders = vectorsFormatProviders; } /** @@ -88,6 +94,10 @@ public RootObjectMapperNamespaceValidator getNamespaceValidator() { return namespaceValidator; } + public List getVectorsFormatProviders() { + return vectorsFormatProviders; + } + /** * Return a map of the meta mappers that have been registered. The * returned map uses the name of the field as a key. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index e0a4aca3d83f0..af817f5827b17 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -246,6 +246,7 @@ public MapperService( indexSettings, idFieldMapper, bitSetProducer, + mapperRegistry.getVectorsFormatProviders(), mapperRegistry.getNamespaceValidator() ); this.documentParser = new DocumentParser(parserConfiguration, this.mappingParserContextSupplier.get()); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java index 5e5488c5f9acd..b6ca0ed2259a7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java @@ -17,10 +17,12 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.script.ScriptCompiler; +import java.util.List; import java.util.function.Function; import java.util.function.Supplier; @@ -43,6 +45,7 @@ public class MappingParserContext { private final Function bitSetProducer; private final long mappingObjectDepthLimit; private long mappingObjectDepth = 0; + private final List vectorsFormatProviders; private final RootObjectMapperNamespaceValidator namespaceValidator; public MappingParserContext( @@ -57,7 +60,9 @@ public MappingParserContext( IndexSettings indexSettings, IdFieldMapper idFieldMapper, Function bitSetProducer, + List vectorsFormatProviders, RootObjectMapperNamespaceValidator namespaceValidator + ) { this.similarityLookupService = similarityLookupService; this.typeParsers = typeParsers; @@ -71,6 +76,7 @@ public MappingParserContext( this.idFieldMapper = idFieldMapper; this.mappingObjectDepthLimit = indexSettings.getMappingDepthLimit(); this.bitSetProducer = bitSetProducer; + this.vectorsFormatProviders = vectorsFormatProviders; this.namespaceValidator = namespaceValidator; } @@ -85,7 +91,8 @@ public MappingParserContext( IndexAnalyzers indexAnalyzers, IndexSettings indexSettings, IdFieldMapper idFieldMapper, - Function bitSetProducer + Function bitSetProducer, + List vectorsFormatProviders ) { this( similarityLookupService, @@ -99,6 +106,7 @@ public MappingParserContext( indexSettings, idFieldMapper, bitSetProducer, + vectorsFormatProviders, null ); } @@ -178,6 +186,10 @@ public BitSetProducer bitSetProducer(Query query) { return bitSetProducer.apply(query); } + public List getVectorsFormatProviders() { + return vectorsFormatProviders; + } + void incrementMappingObjectDepth() throws MapperParsingException { mappingObjectDepth++; if (mappingObjectDepth > mappingObjectDepthLimit) { @@ -207,6 +219,7 @@ private static class MultiFieldParserContext extends MappingParserContext { in.indexSettings, in.idFieldMapper, in.bitSetProducer, + in.vectorsFormatProviders, in.namespaceValidator ); } @@ -238,6 +251,7 @@ private static class DynamicTemplateParserContext extends MappingParserContext { in.indexSettings, in.idFieldMapper, in.bitSetProducer, + in.vectorsFormatProviders, in.namespaceValidator ); this.dateFormatter = dateFormatter; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 7b8c9934f8104..ea31f18c39dae 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -44,6 +44,7 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat; @@ -251,10 +252,17 @@ public static class Builder extends FieldMapper.Builder { final IndexVersion indexVersionCreated; final boolean isExcludeSourceVectors; + private final List vectorsFormatProviders; - public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) { + public Builder( + String name, + IndexVersion indexVersionCreated, + boolean isExcludeSourceVectors, + List vectorsFormatProviders + ) { super(name); this.indexVersionCreated = indexVersionCreated; + this.vectorsFormatProviders = vectorsFormatProviders; // This is defined as updatable because it can be updated once, from [null] to a valid dim size, // by a dynamic mapping update. Once it has been set, however, the value cannot be changed. this.dims = new Parameter<>("dims", true, () -> null, (n, c, o) -> { @@ -443,7 +451,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { builderParams(this, context), indexOptions.getValue(), indexVersionCreated, - isExcludeSourceVectorsFinal + isExcludeSourceVectorsFinal, + vectorsFormatProviders ); } } @@ -1887,6 +1896,18 @@ public boolean isFlat() { return false; } + public int m() { + return m; + } + + public int efConstruction() { + return efConstruction; + } + + public Float confidenceInterval() { + return confidenceInterval; + } + @Override public String toString() { return "{type=" @@ -1922,7 +1943,7 @@ public boolean updatableTo(DenseVectorIndexOptions update) { } } - static class HnswIndexOptions extends DenseVectorIndexOptions { + public static class HnswIndexOptions extends DenseVectorIndexOptions { private final int m; private final int efConstruction; @@ -1983,6 +2004,14 @@ public boolean isFlat() { return false; } + public int m() { + return m; + } + + public int efConstruction() { + return efConstruction; + } + @Override public String toString() { return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + "}"; @@ -2203,7 +2232,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws (n, c) -> new Builder( n, c.getIndexSettings().getIndexVersionCreated(), - INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()) + INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()), + c.getVectorsFormatProviders() ), notInMultiFields(CONTENT_TYPE) ); @@ -2608,11 +2638,11 @@ private Query createKnnFloatQuery( return knnQuery; } - VectorSimilarity getSimilarity() { + public VectorSimilarity getSimilarity() { return similarity; } - int getVectorDimensions() { + public int getVectorDimensions() { return dims; } @@ -2666,6 +2696,7 @@ public List fetchValues(Source source, int doc, List ignoredValu private final DenseVectorIndexOptions indexOptions; private final IndexVersion indexCreatedVersion; private final boolean isExcludeSourceVectors; + private final List extraVectorsFormatProviders; private DenseVectorFieldMapper( String simpleName, @@ -2673,12 +2704,14 @@ private DenseVectorFieldMapper( BuilderParams params, DenseVectorIndexOptions indexOptions, IndexVersion indexCreatedVersion, - boolean isExcludeSourceVectorsFinal + boolean isExcludeSourceVectorsFinal, + List vectorsFormatProviders ) { super(simpleName, mappedFieldType, params); this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; this.isExcludeSourceVectors = isExcludeSourceVectorsFinal; + this.extraVectorsFormatProviders = vectorsFormatProviders; } @Override @@ -2800,7 +2833,7 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors).init(this); + return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors, extraVectorsFormatProviders).init(this); } private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) { @@ -2823,12 +2856,20 @@ private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Objec * @return the custom kNN vectors format that is configured for this field or * {@code null} if the default format should be used. */ - public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat) { + public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat, IndexSettings indexSettings) { final KnnVectorsFormat format; if (indexOptions == null) { format = fieldType().element.elementType() == ElementType.BIT ? new ES815HnswBitVectorsFormat() : defaultFormat; } else { - format = indexOptions.getVectorsFormat(fieldType().element.elementType()); + // if plugins provided alternative KnnVectorsFormat for this indexOptions, use it instead of standard + KnnVectorsFormat extraKnnFormat = null; + for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) { + extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions); + if (extraKnnFormat != null) { + break; + } + } + format = extraKnnFormat != null ? extraKnnFormat : indexOptions.getVectorsFormat(fieldType().element.elementType()); } // It's legal to reuse the same format name as this is the same on-disk format. return new KnnVectorsFormat(format.getName()) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java new file mode 100644 index 0000000000000..4bc338e6680ec --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper.vectors; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.index.IndexSettings; + +/** + * A service provider interface for obtaining Lucene {@link KnnVectorsFormat} instances. + * Plugins can implement this interface to provide custom vector formats + */ +public interface VectorsFormatProvider { + + /** + * Returns a {@link KnnVectorsFormat} instance based on the provided index settings and vector index options. + * May return {@code null} if the provider does not support the format for the given index settings or vector index options. + * + * @param indexSettings The index settings. + * @param indexOptions The dense vector index options. + * @return A KnnVectorsFormat instance. + */ + KnnVectorsFormat getKnnVectorsFormat(IndexSettings indexSettings, DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions); +} diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java index 98734e373ba17..ad1608bfdff27 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java @@ -69,6 +69,7 @@ import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction; import org.elasticsearch.index.seqno.RetentionLeaseSyncAction; import org.elasticsearch.index.seqno.RetentionLeaseSyncer; @@ -78,9 +79,11 @@ import org.elasticsearch.injection.guice.AbstractModule; import org.elasticsearch.plugins.FieldPredicate; import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; @@ -95,18 +98,23 @@ public class IndicesModule extends AbstractModule { private final MapperRegistry mapperRegistry; - public IndicesModule(List mapperPlugins, RootObjectMapperNamespaceValidator namespaceValidator) { + public IndicesModule( + List mapperPlugins, + List vectorFormatProviderPlugins, + RootObjectMapperNamespaceValidator namespaceValidator + ) { this.mapperRegistry = new MapperRegistry( getMappers(mapperPlugins), getRuntimeFields(mapperPlugins), getMetadataMappers(mapperPlugins), getFieldFilter(mapperPlugins), + getVectorFormatProviders(vectorFormatProviderPlugins), namespaceValidator ); } public IndicesModule(List mapperPlugins) { - this(mapperPlugins, null); + this(mapperPlugins, Collections.emptyList(), null); } public static List getNamedWriteables() { @@ -227,6 +235,19 @@ public static Map getMappers(List mappe return Collections.unmodifiableMap(mappers); } + private static List getVectorFormatProviders( + List vectorFormatProviderPlugins + ) { + List vectorsFormatProviders = new ArrayList<>(); + for (InternalVectorFormatProviderPlugin plugin : vectorFormatProviderPlugins) { + VectorsFormatProvider vectorsFormatProvider = plugin.getVectorsFormatProvider(); + if (vectorsFormatProvider != null) { + vectorsFormatProviders.add(vectorsFormatProvider); + } + } + return Collections.unmodifiableList(vectorsFormatProviders); + } + private static Map getRuntimeFields(List mapperPlugins) { Map runtimeParsers = new LinkedHashMap<>(); runtimeParsers.put(BooleanFieldMapper.CONTENT_TYPE, BooleanScriptFieldType.PARSER); diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 548ee6f4da22e..7a598475fc456 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -185,6 +185,7 @@ import org.elasticsearch.plugins.TelemetryPlugin; import org.elasticsearch.plugins.internal.DocumentParsingProvider; import org.elasticsearch.plugins.internal.DocumentParsingProviderPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.plugins.internal.LoggingDataProvider; import org.elasticsearch.plugins.internal.ReloadAwarePlugin; import org.elasticsearch.plugins.internal.RestExtension; @@ -822,7 +823,11 @@ private void construct( )::onNewInfo ); - IndicesModule indicesModule = new IndicesModule(pluginsService.filterPlugins(MapperPlugin.class).toList(), namespaceValidator); + IndicesModule indicesModule = new IndicesModule( + pluginsService.filterPlugins(MapperPlugin.class).toList(), + pluginsService.filterPlugins(InternalVectorFormatProviderPlugin.class).toList(), + namespaceValidator + ); modules.add(indicesModule); modules.add(new GatewayModule()); diff --git a/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java new file mode 100644 index 0000000000000..84b3d964fd2ba --- /dev/null +++ b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.plugins.internal; + +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; + +public interface InternalVectorFormatProviderPlugin { + + /** + * Returns {VectorFormatProvider} implementations added by this plugin. + */ + default VectorsFormatProvider getVectorsFormatProvider() { + return null; + } +} diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java index 074c495e53db2..42513896f84fd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java @@ -299,7 +299,13 @@ private IndexMetadataVerifier getIndexMetadataVerifier() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + null + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java index 691ca7682f30c..ea18734190d7f 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java @@ -857,7 +857,13 @@ public void testIndexMapperDynamic() { Settings.EMPTY, null, xContentRegistry(), - new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER), + new MapperRegistry( + Collections.emptyMap(), + Collections.emptyMap(), + Collections.emptyMap(), + MapperPlugin.NOOP_FIELD_FILTER, + null + ), IndexScopedSettings.DEFAULT_SCOPED_SETTINGS, null, MapperMetrics.NOOP @@ -915,4 +921,5 @@ public void testSame() { } assertTrue(IndexSettings.same(settings, differentOtherSettingBuilder.build())); } + } diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index a2ff440facaf0..331d84d9e4f61 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -128,7 +128,8 @@ private CodecService createCodecService() throws IOException { Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), - MapperPlugin.NOOP_FIELD_FILTER + MapperPlugin.NOOP_FIELD_FILTER, + null ); BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(settings, BitsetFilterCache.Listener.NOOP); MapperService service = new MapperService( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java index 6ab4432c1efd3..ef3607476fb47 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java @@ -65,7 +65,8 @@ private static MappingParser createMappingParser(Settings settings, IndexVersion indexAnalyzers, indexSettings, indexSettings.getMode().idFieldMapperWithoutFieldData(), - bitsetFilterCache::getBitSetProducer + bitsetFilterCache::getBitSetProducer, + null ); Map metadataMapperParsers = mapperRegistry.getMetadataMapperParsers( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java index a161bcbc5d6d2..154f7d774bc9a 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java @@ -272,7 +272,8 @@ private static TestMapper fromMapping( mapperService.getIndexSettings().getMode().idFieldMapperWithoutFieldData(), query -> { throw new UnsupportedOperationException(); - } + }, + null ); if (fromDynamicTemplate) { pc = pc.createDynamicTemplateContext(null); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java index 9a30e7d696b68..5b0f823ac1e17 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java @@ -106,7 +106,8 @@ public void testMultiFieldWithinMultiField() throws IOException { ProvidedIdFieldMapper.NO_FIELD_DATA, query -> { throw new UnsupportedOperationException(); - } + }, + null ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index e3a3a43dc591e..eed68d4c3ac0c 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -2176,7 +2176,7 @@ public void testValidateOnBuild() { int dimensions = randomIntBetween(64, 1024); // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options - DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false).elementType( + DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false, List.of()).elementType( ElementType.FLOAT ).dimensions(dimensions).build(context); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index b56b66767c7d7..75d7e8609bf56 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -63,13 +63,39 @@ private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() { return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false)); } - private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() { + private static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() { return randomFrom( new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000)), new DenseVectorFieldMapper.FlatIndexOptions() ); } + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomFlatIndexOptions() { + return randomFrom( + new DenseVectorFieldMapper.FlatIndexOptions(), + new DenseVectorFieldMapper.Int8FlatIndexOptions( + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ), + new DenseVectorFieldMapper.Int4FlatIndexOptions( + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ) + ); + } + + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomGpuSupportedIndexOptions() { + return randomFrom( + new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 3199)), + new DenseVectorFieldMapper.Int8HnswIndexOptions( + randomIntBetween(1, 100), + randomIntBetween(1, 3199), + randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), + randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + ) + ); + } + public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsAll() { List options = new ArrayList<>( Arrays.asList( diff --git a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java index 9bb56c71f4324..9c7f9a6d1e35e 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java @@ -630,7 +630,7 @@ private static MapperService createMapperServiceWithNamespaceValidator( RootObjectMapperNamespaceValidator namespaceValidator ) { IndexAnalyzers indexAnalyzers = IndexAnalyzers.of(singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, null))); - IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), namespaceValidator); + IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), Collections.emptyList(), namespaceValidator); MapperRegistry mapperRegistry = indicesModule.getMapperRegistry(); Supplier searchExecutionContextSupplier = () -> { throw new UnsupportedOperationException(); }; MapperService mapperService = mock(MapperService.class); @@ -650,6 +650,7 @@ private static MapperService createMapperServiceWithNamespaceValidator( query -> { throw new UnsupportedOperationException(); }, + null, namespaceValidator ) ); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 4261e5845f3a8..8ccab79c1c6ae 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -63,6 +63,7 @@ import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.TelemetryPlugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.script.ScriptContext; @@ -312,6 +313,10 @@ public MapperService build() { SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of()); MapperRegistry mapperRegistry = new IndicesModule( plugins.stream().filter(p -> p instanceof MapperPlugin).map(p -> (MapperPlugin) p).collect(toList()), + plugins.stream() + .filter(p -> p instanceof InternalVectorFormatProviderPlugin) + .map(p -> (InternalVectorFormatProviderPlugin) p) + .collect(toList()), namespaceValidator ).getMapperRegistry(); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java index 799f0da58f827..f11e428f1274c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java @@ -68,7 +68,8 @@ private TestDocumentParserContext(MappingLookup mappingLookup, SourceToParse sou null, query -> { throw new UnsupportedOperationException(); - } + }, + null ), source, mappingLookup.getMapping().getRoot(), diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 0d89d40d982c5..f199fcaabd29b 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -1410,7 +1410,8 @@ private static class MockParserContext extends MappingParserContext { null, query -> { throw new UnsupportedOperationException(); - } + }, + null ); } diff --git a/x-pack/plugin/gpu/README b/x-pack/plugin/gpu/README new file mode 100644 index 0000000000000..a56d18aef7ec5 --- /dev/null +++ b/x-pack/plugin/gpu/README @@ -0,0 +1,51 @@ + + +For local development on mac, where cuVS is not easily built one can +minimally get an IDEA environment with compile-time support by building +a minimal cuvs-java "api" jar. Test cannot be run. + +1. Clone cuvs + git clone https://github.com/rapidsai/cuvs/; cd cuvs + git checkout branch-25.08 + cd java/cuvs-java + +2. Remove the implementation compile target from the pom.xml, e.g. + +$ git diff +diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml +index 15e0193a..446f2b61 100644 +--- a/java/cuvs-java/pom.xml ++++ b/java/cuvs-java/pom.xml +@@ -123,20 +123,6 @@ + + + +- +- compile-java-22 +- compile +- +- compile +- +- +- 22 +- +- ${project.basedir}/src/main/java22 +- +- true +- +- + + + + +3. Build and install into local maven repository + + export JAVA_HOME=/Users/chegar/binaries/jdk-22.0.2.jdk/Contents/Home/ + cd java/cuvs-java + mvn install + Installs into maven local repository, e.g: + /Users/chegar/.m2/repository/com/nvidia/cuvs/cuvs-java/25.08.0/ + + Might need to modify gradle metadata sha in gradle/verification-metadata.xml + + diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle new file mode 100644 index 0000000000000..3b9330371fc47 --- /dev/null +++ b/x-pack/plugin/gpu/build.gradle @@ -0,0 +1,36 @@ +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.internal-cluster-test' +apply plugin: 'elasticsearch.internal-yaml-rest-test' +apply plugin: 'elasticsearch.mrjar' + +esplugin { + name = 'gpu' + description = 'A plugin for doing vector search in GPU' + classname = 'org.elasticsearch.xpack.gpu.GPUPlugin' + extendedPlugins = ['x-pack-core'] +} +base { + archivesName = 'x-pack-gpu' +} + +repositories { + maven { + url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots") + } +} + +dependencies { + compileOnly project(path: xpackModule('core')) + compileOnly project(':server') + implementation('com.nvidia.cuvs:cuvs-java:25.10.0') { + changing = true // Ensure that we get updates even when the version number doesn't change. We can remove this once things stabilize + } + testImplementation(testArtifact(project(xpackModule('core')))) + testImplementation(testArtifact(project(':server'))) + yamlRestTestImplementation(project(xpackModule('gpu'))) + clusterModules project(xpackModule('gpu')) +} + +artifacts { + restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test")) +} diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt new file mode 100644 index 0000000000000..1a89b9054d669 --- /dev/null +++ b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 NVIDIA Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java new file mode 100644 index 0000000000000..d0f8e85ef6070 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import com.nvidia.cuvs.BruteForceIndex; +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CuVSDeviceMatrix; +import com.nvidia.cuvs.CuVSHostMatrix; +import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.HnswIndex; +import com.nvidia.cuvs.TieredIndex; +import com.nvidia.cuvs.spi.CuVSProvider; + +import java.lang.invoke.MethodHandle; +import java.nio.file.Path; + +class CuVSProviderDelegate implements CuVSProvider { + private final CuVSProvider delegate; + + CuVSProviderDelegate(CuVSProvider delegate) { + this.delegate = delegate; + } + + @Override + public CuVSResources newCuVSResources(Path path) throws Throwable { + return delegate.newCuVSResources(path); + } + + @Override + public CuVSMatrix.Builder newHostMatrixBuilder(long l, long l1, CuVSMatrix.DataType dataType) { + return delegate.newHostMatrixBuilder(l, l1, dataType); + } + + @Override + public CuVSMatrix.Builder newDeviceMatrixBuilder( + CuVSResources cuVSResources, + long l, + long l1, + CuVSMatrix.DataType dataType + ) { + return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, dataType); + } + + @Override + public CuVSMatrix.Builder newDeviceMatrixBuilder( + CuVSResources cuVSResources, + long l, + long l1, + int i, + int i1, + CuVSMatrix.DataType dataType + ) { + return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, i, i1, dataType); + } + + @Override + public MethodHandle newNativeMatrixBuilder() { + return delegate.newNativeMatrixBuilder(); + } + + @Override + public CuVSMatrix newMatrixFromArray(float[][] floats) { + return delegate.newMatrixFromArray(floats); + } + + @Override + public CuVSMatrix newMatrixFromArray(int[][] ints) { + return delegate.newMatrixFromArray(ints); + } + + @Override + public CuVSMatrix newMatrixFromArray(byte[][] bytes) { + return delegate.newMatrixFromArray(bytes); + } + + @Override + public BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newBruteForceIndexBuilder(cuVSResources); + } + + @Override + public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newCagraIndexBuilder(cuVSResources); + } + + @Override + public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newHnswIndexBuilder(cuVSResources); + } + + @Override + public TieredIndex.Builder newTieredIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException { + return delegate.newTieredIndexBuilder(cuVSResources); + } + + @Override + public CagraIndex mergeCagraIndexes(CagraIndex[] cagraIndices) throws Throwable { + return delegate.mergeCagraIndexes(cagraIndices); + } + + @Override + public GPUInfoProvider gpuInfoProvider() { + return delegate.gpuInfoProvider(); + } +} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java new file mode 100644 index 0000000000000..b00d8d83143a9 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java @@ -0,0 +1,225 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.vectors.KnnSearchBuilder; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.Assert; +import org.junit.BeforeClass; + +import java.util.Collection; +import java.util.List; +import java.util.Locale; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; + +@LuceneTestCase.SuppressCodecs("*") // use our custom codec +public class GPUIndexIT extends ESIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return List.of(GPUPlugin.class); + } + + @BeforeClass + public static void checkGPUSupport() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + + public void testBasic() { + String indexName = "index1"; + final int dims = randomIntBetween(4, 128); + final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) }; + createIndex(indexName, dims, false); + int totalDocs = 0; + for (int i = 0; i < numDocs.length; i++) { + indexDocs(indexName, numDocs[i], dims, i * 100); + totalDocs += numDocs[i]; + } + refresh(); + assertSearch(indexName, randomFloatVector(dims), totalDocs); + } + + @AwaitsFix(bugUrl = "Fix sorted index") + public void testSortedIndexReturnsSameResultsAsUnsorted() { + String indexName1 = "index_unsorted"; + String indexName2 = "index_sorted"; + final int dims = randomIntBetween(4, 128); + createIndex(indexName1, dims, false); + createIndex(indexName2, dims, true); + + final int[] numDocs = new int[] { randomIntBetween(50, 100), randomIntBetween(50, 100) }; + for (int i = 0; i < numDocs.length; i++) { + BulkRequestBuilder bulkRequest1 = client().prepareBulk(); + BulkRequestBuilder bulkRequest2 = client().prepareBulk(); + for (int j = 0; j < numDocs[i]; j++) { + String id = String.valueOf(i * 100 + j); + String keywordValue = String.valueOf(numDocs[i] - j); + float[] vector = randomFloatVector(dims); + bulkRequest1.add(prepareIndex(indexName1).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue)); + bulkRequest2.add(prepareIndex(indexName2).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue)); + } + BulkResponse bulkResponse1 = bulkRequest1.get(); + assertFalse("Bulk request failed: " + bulkResponse1.buildFailureMessage(), bulkResponse1.hasFailures()); + BulkResponse bulkResponse2 = bulkRequest2.get(); + assertFalse("Bulk request failed: " + bulkResponse2.buildFailureMessage(), bulkResponse2.hasFailures()); + } + refresh(); + + float[] queryVector = randomFloatVector(dims); + int k = 10; + int numCandidates = k * 10; + + var searchResponse1 = prepareSearch(indexName1).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) + .get(); + + var searchResponse2 = prepareSearch(indexName2).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) + .get(); + + try { + SearchHit[] hits1 = searchResponse1.getHits().getHits(); + SearchHit[] hits2 = searchResponse2.getHits().getHits(); + Assert.assertEquals(hits1.length, hits2.length); + for (int i = 0; i < hits1.length; i++) { + Assert.assertEquals(hits1[i].getId(), hits2[i].getId()); + Assert.assertEquals(hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue()); + Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.001f); + } + } finally { + searchResponse1.decRef(); + searchResponse2.decRef(); + } + + // Force merge and search again + assertNoFailures(indicesAdmin().prepareForceMerge(indexName1).get()); + assertNoFailures(indicesAdmin().prepareForceMerge(indexName2).get()); + ensureGreen(); + + var searchResponse3 = prepareSearch(indexName1).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) + .get(); + + var searchResponse4 = prepareSearch(indexName2).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))) + .get(); + + try { + SearchHit[] hits3 = searchResponse3.getHits().getHits(); + SearchHit[] hits4 = searchResponse4.getHits().getHits(); + Assert.assertEquals(hits3.length, hits4.length); + for (int i = 0; i < hits3.length; i++) { + Assert.assertEquals(hits3[i].getId(), hits4[i].getId()); + Assert.assertEquals(hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue()); + Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.01f); + } + } finally { + searchResponse3.decRef(); + searchResponse4.decRef(); + } + } + + public void testSearchWithoutGPU() { + String indexName = "index1"; + final int dims = randomIntBetween(4, 128); + final int numDocs = randomIntBetween(1, 500); + createIndex(indexName, dims, false); + ensureGreen(); + + indexDocs(indexName, numDocs, dims, 0); + refresh(); + + // update settings to disable GPU usage + Settings.Builder settingsBuilder = Settings.builder().put("index.vectors.indexing.use_gpu", false); + assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(settingsBuilder.build())); + ensureGreen(); + assertSearch(indexName, randomFloatVector(dims), numDocs); + } + + private void createIndex(String indexName, int dims, boolean sorted) { + var settings = Settings.builder().put(indexSettings()); + settings.put("index.number_of_shards", 1); + settings.put("index.vectors.indexing.use_gpu", true); + if (sorted) { + settings.put("index.sort.field", "my_keyword"); + } + + String type = randomFrom("hnsw", "int8_hnsw"); + String mapping = String.format(Locale.ROOT, """ + { + "properties": { + "my_vector": { + "type": "dense_vector", + "dims": %d, + "similarity": "l2_norm", + "index_options": { + "type": "%s" + } + }, + "my_keyword": { + "type": "keyword" + } + } + } + """, dims, type); + assertAcked(prepareCreate(indexName).setSettings(settings.build()).setMapping(mapping)); + ensureGreen(); + } + + private void indexDocs(String indexName, int numDocs, int dims, int startDoc) { + BulkRequestBuilder bulkRequest = client().prepareBulk(); + for (int i = 0; i < numDocs; i++) { + String id = String.valueOf(startDoc + i); + String keywordValue = String.valueOf(numDocs - i); + var indexRequest = prepareIndex(indexName).setId(id) + .setSource("my_vector", randomFloatVector(dims), "my_keyword", keywordValue); + bulkRequest.add(indexRequest); + } + BulkResponse bulkResponse = bulkRequest.get(); + assertFalse("Bulk request failed: " + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures()); + } + + private void assertSearch(String indexName, float[] queryVector, int totalDocs) { + int k = Math.min(randomIntBetween(1, 20), totalDocs); + int numCandidates = k * 10; + assertNoFailuresAndResponse( + prepareSearch(indexName).setSize(k) + .setFetchSource(false) + .addFetchField("my_keyword") + .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))), + response -> assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length) + ); + } + + private static float[] randomFloatVector(int dims) { + float[] vector = new float[dims]; + for (int i = 0; i < dims; i++) { + vector[i] = randomFloat(); + } + return vector; + } +} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java new file mode 100644 index 0000000000000..65d8daf14d31e --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java @@ -0,0 +1,283 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.plugin.gpu; + +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.CuVSResourcesInfo; +import com.nvidia.cuvs.GPUInfo; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; +import com.nvidia.cuvs.spi.CuVSServiceProvider; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.junit.After; + +import java.util.Collection; +import java.util.List; +import java.util.function.Function; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.startsWith; + +public class GPUPluginInitializationIT extends ESIntegTestCase { + + private static final Function SUPPORTED_GPU_PROVIDER = + p -> new TestCuVSServiceProvider.TestGPUInfoProvider( + List.of( + new GPUInfo( + 0, + "TestGPU", + 8 * 1024 * 1024 * 1024L, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR, + true, + true + ) + ) + ); + + private static final Function NO_GPU_PROVIDER = p -> new TestCuVSServiceProvider.TestGPUInfoProvider( + List.of() + ); + + @Override + protected Collection> nodePlugins() { + return List.of(GPUPlugin.class); + } + + public static class TestCuVSServiceProvider extends CuVSServiceProvider { + + static final Function BUILTIN_GPU_INFO_PROVIDER = CuVSProvider::gpuInfoProvider; + static Function mockedGPUInfoProvider = BUILTIN_GPU_INFO_PROVIDER; + + @Override + public CuVSProvider get(CuVSProvider builtin) { + return new CuVSProviderDelegate(builtin) { + @Override + public GPUInfoProvider gpuInfoProvider() { + return mockedGPUInfoProvider.apply(builtin); + } + }; + } + + private static class TestGPUInfoProvider implements GPUInfoProvider { + private final List gpuList; + + private TestGPUInfoProvider(List gpuList) { + this.gpuList = gpuList; + } + + @Override + public List availableGPUs() { + return gpuList; + } + + @Override + public List compatibleGPUs() { + return gpuList; + } + + @Override + public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) { + return null; + } + } + } + + @After + public void disableMock() { + TestCuVSServiceProvider.mockedGPUInfoProvider = TestCuVSServiceProvider.BUILTIN_GPU_INFO_PROVIDER; + } + + public void testFFOff() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(null, null); + assertNull(format); + } + + public void testFFOffIndexSettingNotSupported() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + IllegalArgumentException exception = expectThrows( + IllegalArgumentException.class, + () -> createIndex( + "index1", + Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build() + ) + ); + assertThat(exception.getMessage(), containsString("unknown setting [index.vectors.indexing.use_gpu]")); + } + + public void testFFOffGPUFormatNull() { + assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.EMPTY); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + public void testIndexSettingOnIndexTypeSupportedGPUSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNotNull(format); + } + + public void testIndexSettingOnIndexTypeNotSupportedThrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat(ex.getMessage(), startsWith("[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of")); + } + + public void testIndexSettingOnGPUNotSupportedThrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat( + ex.getMessage(), + equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.") + ); + } + + public void testIndexSettingOnGPUSupportThrowsRethrows() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + // Mocks a cuvs-java UnsupportedProvider + TestCuVSServiceProvider.mockedGPUInfoProvider = p -> { throw new UnsupportedOperationException("cuvs-java UnsupportedProvider"); }; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions)); + assertThat( + ex.getMessage(), + equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.") + ); + } + + public void testIndexSettingAutoIndexTypeSupportedGPUSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNotNull(format); + } + + public void testIndexSettingAutoGPUNotSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + public void testIndexSettingAutoIndexTypeNotSupported() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + public void testIndexSettingOff() { + assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled()); + TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER; + + GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class); + VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider(); + + createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.FALSE).build()); + IndexSettings settings = getIndexSettings(); + final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions(); + + var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions); + assertNull(format); + } + + private IndexSettings getIndexSettings() { + ensureGreen("index1"); + IndexSettings settings = null; + for (IndicesService service : internalCluster().getInstances(IndicesService.class)) { + IndexService indexService = service.indexService(resolveIndex("index1")); + if (indexService != null) { + settings = indexService.getIndexSettings(); + break; + } + } + assertNotNull(settings); + return settings; + } +} diff --git a/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider new file mode 100644 index 0000000000000..d5d524f49aa81 --- /dev/null +++ b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider @@ -0,0 +1,8 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +# + +org.elasticsearch.plugin.gpu.GPUPluginInitializationIT$TestCuVSServiceProvider diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java new file mode 100644 index 0000000000000..dcada289c1376 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/module-info.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; + +/** Provides GPU-accelerated support for vector indexing. */ +module org.elasticsearch.gpu { + requires org.elasticsearch.logging; + requires org.apache.lucene.core; + requires org.elasticsearch.xcontent; + requires org.elasticsearch.server; + requires org.elasticsearch.base; + requires com.nvidia.cuvs; + + exports org.elasticsearch.xpack.gpu.codec; + + provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures; + provides org.apache.lucene.codecs.KnnVectorsFormat with ES92GpuHnswVectorsFormat, ES92GpuHnswSQVectorsFormat; +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java new file mode 100644 index 0000000000000..cf9ed7b7e5a46 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +public class GPUFeatures implements FeatureSpecification { + + public static final NodeFeature VECTORS_INDEXING_USE_GPU = new NodeFeature("vectors.indexing.use_gpu"); + + @Override + public Set getFeatures() { + return Set.of(); + } + + @Override + public Set getTestFeatures() { + return Set.of(VECTORS_INDEXING_USE_GPU); + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java new file mode 100644 index 0000000000000..62190bc0fb752 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.util.hnsw.HnswGraphBuilder; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat; +import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat; + +import java.util.List; + +public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin { + + public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_vectors_indexing"); + + /** + * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting. + */ + public enum GpuMode { + TRUE, + FALSE, + AUTO + } + + /** + * Setting to control whether to use GPU for vectors indexing. + * Currently only applicable for index_options.type: hnsw. + * + * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' index type. + * If set to true, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available, + * an exception will be thrown. + * If set to false, GPU will not be used for vectors indexing. + */ + public static final Setting VECTORS_INDEXING_USE_GPU_SETTING = Setting.enumSetting( + GpuMode.class, + "index.vectors.indexing.use_gpu", + GpuMode.AUTO, + Setting.Property.IndexScope, + Setting.Property.Dynamic + ); + + @Override + public List> getSettings() { + if (GPU_FORMAT.isEnabled()) { + return List.of(VECTORS_INDEXING_USE_GPU_SETTING); + } else { + return List.of(); + } + } + + @Override + public VectorsFormatProvider getVectorsFormatProvider() { + return (indexSettings, indexOptions) -> { + if (GPU_FORMAT.isEnabled()) { + GpuMode gpuMode = indexSettings.getValue(VECTORS_INDEXING_USE_GPU_SETTING); + if (gpuMode == GpuMode.TRUE) { + if (vectorIndexTypeSupported(indexOptions.getType()) == false) { + throw new IllegalArgumentException( + "[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of [" + indexOptions.getType() + "]." + ); + } + if (GPUSupport.isSupported(true) == false) { + throw new IllegalArgumentException( + "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node." + ); + } + return getVectorsFormat(indexOptions); + } + if (gpuMode == GpuMode.AUTO && vectorIndexTypeSupported(indexOptions.getType()) && GPUSupport.isSupported(false)) { + return getVectorsFormat(indexOptions); + } + } + return null; + }; + } + + private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) { + return type == DenseVectorFieldMapper.VectorIndexType.HNSW || type == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW; + } + + private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions) { + if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) { + DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions; + int efConstruction = hnswIndexOptions.efConstruction(); + if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { + efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + } + return new ES92GpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction); + } else if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW) { + DenseVectorFieldMapper.Int8HnswIndexOptions int8HnswIndexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) indexOptions; + int efConstruction = int8HnswIndexOptions.efConstruction(); + if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) { + efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128 + } + return new ES92GpuHnswSQVectorsFormat( + int8HnswIndexOptions.m(), + efConstruction, + int8HnswIndexOptions.confidenceInterval(), + 7, + false + ); + } else { + throw new IllegalArgumentException( + "GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]" + ); + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java new file mode 100644 index 0000000000000..c21bda894790a --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java @@ -0,0 +1,119 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu; + +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +public class GPUSupport { + + private static final Logger LOG = LogManager.getLogger(GPUSupport.class); + + // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API + private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L; + + /** Tells whether the platform supports cuvs. */ + public static boolean isSupported(boolean logError) { + try { + var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider(); + var availableGPUs = gpuInfoProvider.availableGPUs(); + if (availableGPUs.isEmpty()) { + if (logError) { + LOG.warn("No GPU found"); + } + return false; + } + + for (var gpu : availableGPUs) { + if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR + && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) { + if (logError) { + LOG.warn( + "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])", + gpu.name(), + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR, + GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR, + gpu.computeCapabilityMajor(), + gpu.computeCapabilityMinor() + ); + } + } else if (gpu.totalDeviceMemoryInBytes() < MIN_DEVICE_MEMORY_IN_BYTES) { + if (logError) { + LOG.warn( + "GPU [{}] does not have minimum memory required (required: [{}], found: [{}])", + gpu.name(), + MIN_DEVICE_MEMORY_IN_BYTES, + gpu.totalDeviceMemoryInBytes() + ); + } + } else { + if (logError) { + LOG.info("Found compatible GPU [{}] (id: [{}])", gpu.name(), gpu.gpuId()); + } + return true; + } + } + + } catch (UnsupportedOperationException uoe) { + if (logError) { + final String msg; + if (uoe.getMessage() == null) { + msg = Strings.format( + "runtime Java version [%d], OS [%s], arch [%s]", + Runtime.version().feature(), + System.getProperty("os.name"), + System.getProperty("os.arch") + ); + } else { + msg = uoe.getMessage(); + } + LOG.warn("GPU based vector indexing is not supported on this platform; " + msg); + } + } catch (Throwable t) { + if (logError) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources", t); + } + } + return false; + } + + /** Returns a resources if supported, otherwise null. */ + public static CuVSResources cuVSResourcesOrNull(boolean logError) { + try { + var resources = CuVSResources.create(); + return resources; + } catch (UnsupportedOperationException uoe) { + if (logError) { + String msg = ""; + if (uoe.getMessage() == null) { + msg = "Runtime Java version: " + Runtime.version().feature(); + } else { + msg = ": " + uoe.getMessage(); + } + LOG.warn("GPU based vector indexing is not supported on this platform or java version; " + msg); + } + } catch (Throwable t) { + if (logError) { + if (t instanceof ExceptionInInitializerError ex) { + t = ex.getCause(); + } + LOG.warn("Exception occurred during creation of cuvs resources", t); + } + } + return null; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java new file mode 100644 index 0000000000000..44240a848268b --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java @@ -0,0 +1,270 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.GPUInfoProvider; +import com.nvidia.cuvs.spi.CuVSProvider; + +import org.elasticsearch.core.Strings; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.gpu.GPUSupport; + +import java.nio.file.Path; +import java.util.Objects; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * A manager of {@link com.nvidia.cuvs.CuVSResources}. There is one manager per GPU. + * + *

All access to GPU resources is mediated through a manager. A manager helps coordinate usage threads to: + *

    + *
  • ensure single-threaded access to any particular resource at a time
  • + *
  • Control the total number of concurrent operations that may be performed on a GPU
  • + *
  • Pool resources, to avoid frequent creation and destruction, which are expensive operations.
  • + *
+ * + *

Fundamentally, a resource is used in compute and memory bound operations. The former occurs prior to the latter, e.g. + * index build (compute), followed by a copy/process of the newly built index (memory). The manager allows the resource + * user to indicate that compute is complete before releasing the resources. This can help improve parallelism of compute + * on the GPU - allowing the next compute operation to proceed before releasing the resources. + * + */ +public interface CuVSResourceManager { + + /** + * Acquires a resource from the manager. + * + *

A manager can use the given parameters, numVectors and dims, to estimate the potential + * effect on GPU memory and compute usage to determine whether to give out + * another resource or wait for a resources to be returned before giving out another. + */ + // numVectors and dims are currently unused, but could be used along with GPU metadata, + // memory, generation, etc, when acquiring for 10M x 1536 dims, or 100,000 x 128 dims, + // to give out a resources or not. + ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException; + + /** Marks the resources as finished with regard to compute. */ + void finishedComputation(ManagedCuVSResources resources); + + /** Returns the given resource to the manager. */ + void release(ManagedCuVSResources resources); + + /** Shuts down the manager, releasing all open resources. */ + void shutdown(); + + /** Returns the system-wide pooling manager. */ + static CuVSResourceManager pooling() { + return PoolingCuVSResourceManager.Holder.INSTANCE; + } + + /** + * A manager that maintains a pool of resources. + */ + class PoolingCuVSResourceManager implements CuVSResourceManager { + + static final Logger logger = LogManager.getLogger(CuVSResourceManager.class); + + /** A multiplier on input data to account for intermediate and output data size required while processing it */ + static final double GPU_COMPUTATION_MEMORY_FACTOR = 2.0; + static final int MAX_RESOURCES = 4; + + static class Holder { + static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager( + MAX_RESOURCES, + CuVSProvider.provider().gpuInfoProvider() + ); + } + + private final ManagedCuVSResources[] pool; + private final int capacity; + private final GPUInfoProvider gpuInfoProvider; + private int createdCount; + + ReentrantLock lock = new ReentrantLock(); + Condition enoughResourcesCondition = lock.newCondition(); + + public PoolingCuVSResourceManager(int capacity, GPUInfoProvider gpuInfoProvider) { + if (capacity < 1 || capacity > MAX_RESOURCES) { + throw new IllegalArgumentException("Resource count must be between 1 and " + MAX_RESOURCES); + } + this.capacity = capacity; + this.gpuInfoProvider = gpuInfoProvider; + this.pool = new ManagedCuVSResources[MAX_RESOURCES]; + } + + private ManagedCuVSResources getResourceFromPool() { + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + if (res.locked == false) { + return res; + } + } + if (createdCount < capacity) { + var res = new ManagedCuVSResources(Objects.requireNonNull(createNew())); + pool[createdCount++] = res; + return res; + } + return null; + } + + private int numLockedResources() { + int lockedResources = 0; + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + if (res.locked) { + lockedResources++; + } + } + return lockedResources; + } + + @Override + public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException { + try { + lock.lock(); + + boolean allConditionsMet = false; + ManagedCuVSResources res = null; + while (allConditionsMet == false) { + res = getResourceFromPool(); + + final boolean enoughMemory; + if (res != null) { + long requiredMemoryInBytes = estimateRequiredMemory(numVectors, dims, dataType); + logger.debug( + "Estimated memory for [{}] vectors, [{}] dims of type [{}] is [{} B]", + numVectors, + dims, + dataType.name(), + requiredMemoryInBytes + ); + + // Check immutable constraints + long totalDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).totalDeviceMemoryInBytes(); + if (requiredMemoryInBytes > totalDeviceMemoryInBytes) { + String message = Strings.format( + "Requested GPU memory for [%d] vectors, [%d] dims is greater than the GPU total memory [%d B]", + numVectors, + dims, + totalDeviceMemoryInBytes + ); + logger.error(message); + throw new IllegalArgumentException(message); + } + + // If no resource in the pool is locked, short circuit to avoid livelock + if (numLockedResources() == 0) { + logger.debug("No resources currently locked, proceeding"); + break; + } + + // Check resources availability + long freeDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).freeDeviceMemoryInBytes(); + enoughMemory = requiredMemoryInBytes <= freeDeviceMemoryInBytes; + logger.debug("Free device memory [{} B], enoughMemory[{}]", freeDeviceMemoryInBytes, enoughMemory); + } else { + logger.debug("No resources available in pool"); + enoughMemory = false; + } + // TODO: add enoughComputation / enoughComputationCondition here + allConditionsMet = enoughMemory; // && enoughComputation + if (allConditionsMet == false) { + enoughResourcesCondition.await(); + } + } + res.locked = true; + return res; + } finally { + lock.unlock(); + } + } + + private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) { + int elementTypeBytes = switch (dataType) { + case FLOAT -> Float.BYTES; + case INT, UINT -> Integer.BYTES; + case BYTE -> Byte.BYTES; + }; + return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes); + } + + // visible for testing + protected CuVSResources createNew() { + return GPUSupport.cuVSResourcesOrNull(true); + } + + @Override + public void finishedComputation(ManagedCuVSResources resources) { + logger.debug("Computation finished"); + // currently does nothing, but could allow acquire to return possibly blocked resources + // enoughResourcesCondition.signalAll() + } + + @Override + public void release(ManagedCuVSResources resources) { + logger.debug("Releasing resources to pool"); + try { + lock.lock(); + assert resources.locked; + resources.locked = false; + enoughResourcesCondition.signalAll(); + } finally { + lock.unlock(); + } + } + + @Override + public void shutdown() { + for (int i = 0; i < createdCount; ++i) { + var res = pool[i]; + assert res != null; + res.delegate.close(); + } + } + } + + /** A managed resource. Cannot be closed. */ + final class ManagedCuVSResources implements CuVSResources { + + final CuVSResources delegate; + boolean locked = false; + + ManagedCuVSResources(CuVSResources resources) { + this.delegate = resources; + } + + @Override + public ScopedAccess access() { + return delegate.access(); + } + + @Override + public int deviceId() { + return delegate.deviceId(); + } + + @Override + public void close() { + throw new UnsupportedOperationException("this resource is managed, cannot be closed by clients"); + } + + @Override + public Path tempDirectory() { + return null; + } + + @Override + public String toString() { + return "ManagedCuVSResources[delegate=" + delegate + "]"; + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java new file mode 100644 index 0000000000000..3a9fcb2c68cd8 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSMatrix; + +import org.apache.lucene.store.MemorySegmentAccessInput; + +import java.io.IOException; + +public interface DatasetUtils { + + static DatasetUtils getInstance() { + return DatasetUtilsImpl.getInstance(); + } + + /** Returns a Dataset over the vectors of type {@code dataType} in the input. */ + CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException; + + /** Returns a Dataset over an input slice */ + CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType) + throws IOException; +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java new file mode 100644 index 0000000000000..0dfb0960cebbe --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.spi.CuVSProvider; + +import org.apache.lucene.store.MemorySegmentAccessInput; + +import java.io.IOException; +import java.lang.foreign.MemorySegment; +import java.lang.invoke.MethodHandle; + +public class DatasetUtilsImpl implements DatasetUtils { + + private static final DatasetUtils INSTANCE = new DatasetUtilsImpl(); + + private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder(); + + static DatasetUtils getInstance() { + return INSTANCE; + } + + static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) { + try { + return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType); + } catch (Throwable e) { + if (e instanceof Error err) { + throw err; + } else if (e instanceof RuntimeException re) { + throw re; + } else { + throw new RuntimeException(e); + } + } + } + + private DatasetUtilsImpl() {} + + @Override + public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException { + if (numVectors < 0 || dims < 0) { + throwIllegalArgumentException(numVectors, dims); + } + return createCuVSMatrix(input, 0L, input.length(), numVectors, dims, dataType); + } + + @Override + public CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType) + throws IOException { + if (pos < 0 || len < 0) { + throw new IllegalArgumentException("pos and len must be positive"); + } + return createCuVSMatrix(input, pos, len, numVectors, dims, dataType); + } + + private static CuVSMatrix createCuVSMatrix( + MemorySegmentAccessInput input, + long pos, + long len, + int numVectors, + int dims, + CuVSMatrix.DataType dataType + ) throws IOException { + MemorySegment ms = input.segmentSliceOrNull(pos, len); + assert ms != null; // TODO: this can be null if larger than 16GB or ... + final int byteSize = dataType == CuVSMatrix.DataType.FLOAT ? Float.BYTES : Byte.BYTES; + if (((long) numVectors * dims * byteSize) > ms.byteSize()) { + throwIllegalArgumentException(ms, numVectors, dims); + } + return fromMemorySegment(ms, numVectors, dims, dataType); + } + + static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) { + var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + " dims"; + throw new IllegalArgumentException(s); + } + + static void throwIllegalArgumentException(int numVectors, int dims) { + String s; + if (numVectors < 0) { + s = "negative number of vectors: " + numVectors; + } else { + s = "negative vector dims: " + dims; + } + throw new IllegalArgumentException(s); + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java new file mode 100644 index 0000000000000..b62766fb39c3a --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_MAX_CONN; + +/** + * Codec format for GPU-accelerated scalar quantized HNSW vector indexes. + * HNSW graph is built on GPU, while scalar quantization and search is performed on CPU. + */ +public class ES92GpuHnswSQVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "Lucene99HnswVectorsFormat"; + static final int MAXIMUM_MAX_CONN = 512; + static final int MAXIMUM_BEAM_WIDTH = 3200; + private final int maxConn; + private final int beamWidth; + + /** The format for storing, reading, merging vectors on disk */ + private final FlatVectorsFormat flatVectorsFormat; + private final Supplier cuVSResourceManagerSupplier; + + public ES92GpuHnswSQVectorsFormat() { + this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false); + } + + public ES92GpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { + super(NAME); + this.cuVSResourceManagerSupplier = CuVSResourceManager::pooling; + if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { + throw new IllegalArgumentException( + "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn + ); + } + if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { + throw new IllegalArgumentException( + "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth + ); + } + this.maxConn = maxConn; + this.beamWidth = beamWidth; + this.flatVectorsFormat = new ES814ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress); + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new ES92GpuHnswVectorsWriter( + cuVSResourceManagerSupplier.get(), + state, + maxConn, + beamWidth, + flatVectorsFormat.fieldsWriter(state) + ); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", flatVectorFormat=" + + flatVectorsFormat + + ")"; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java new file mode 100644 index 0000000000000..8761b9e12f22a --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java @@ -0,0 +1,102 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; +import java.util.function.Supplier; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +/** + * Codec format for GPU-accelerated vector indexes. This format is designed to + * leverage GPU processing capabilities for vector search operations. + */ +public class ES92GpuHnswVectorsFormat extends KnnVectorsFormat { + public static final String NAME = "Lucene99HnswVectorsFormat"; + public static final int VERSION_GROUPVARINT = 1; + + static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta"; + static final String LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex"; + static final String LUCENE99_HNSW_META_EXTENSION = "vem"; + static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex"; + static final int LUCENE99_VERSION_CURRENT = VERSION_GROUPVARINT; + + static final int DEFAULT_MAX_CONN = 16; // graph degree + public static final int DEFAULT_BEAM_WIDTH = 128; // intermediate graph degree + static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2; + + private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer() + ); + + // How many nodes each node in the graph is connected to in the final graph + private final int maxConn; + // Intermediate graph degree, the number of connections for each node before pruning + private final int beamWidth; + private final Supplier cuVSResourceManagerSupplier; + + public ES92GpuHnswVectorsFormat() { + this(CuVSResourceManager::pooling, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); + } + + public ES92GpuHnswVectorsFormat(int maxConn, int beamWidth) { + this(CuVSResourceManager::pooling, maxConn, beamWidth); + }; + + public ES92GpuHnswVectorsFormat(Supplier cuVSResourceManagerSupplier, int maxConn, int beamWidth) { + super(NAME); + this.cuVSResourceManagerSupplier = cuVSResourceManagerSupplier; + this.maxConn = maxConn; + this.beamWidth = beamWidth; + } + + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new ES92GpuHnswVectorsWriter( + cuVSResourceManagerSupplier.get(), + state, + maxConn, + beamWidth, + flatVectorsFormat.fieldsWriter(state) + ); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return NAME + + "(name=" + + NAME + + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", flatVectorFormat=" + + flatVectorsFormat.getName() + + ")"; + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java new file mode 100644 index 0000000000000..f848f715f913b --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java @@ -0,0 +1,683 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CagraIndex; +import com.nvidia.cuvs.CagraIndexParams; +import com.nvidia.cuvs.CuVSMatrix; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnFieldVectorsWriter; +import org.apache.lucene.codecs.KnnVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; +import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; +import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.DocsWithFieldSet; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.KnnVectorValues; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.Sorter; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterIndexInput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MemorySegmentAccessInput; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.hnsw.HnswGraph; +import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator; +import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.apache.lucene.util.quantization.ScalarQuantizer; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; +import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles; +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_VERSION_CURRENT; +import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD; + +/** + * Writer that builds an Nvidia Carga Graph on GPU and then writes it into the Lucene99 HNSW format, + * so that it can be searched on CPU with Lucene99HNSWVectorReader. + */ +final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter { + private static final Logger logger = LogManager.getLogger(ES92GpuHnswVectorsWriter.class); + private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ES92GpuHnswVectorsWriter.class); + private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16; + + private final CuVSResourceManager cuVSResourceManager; + private final SegmentWriteState segmentWriteState; + private final IndexOutput meta, vectorIndex; + private final int M; + private final int beamWidth; + private final FlatVectorsWriter flatVectorWriter; + + private final List fields = new ArrayList<>(); + private boolean finished; + private final CuVSMatrix.DataType dataType; + + ES92GpuHnswVectorsWriter( + CuVSResourceManager cuVSResourceManager, + SegmentWriteState state, + int M, + int beamWidth, + FlatVectorsWriter flatVectorWriter + ) throws IOException { + assert cuVSResourceManager != null : "CuVSResources must not be null"; + this.cuVSResourceManager = cuVSResourceManager; + this.M = M; + this.beamWidth = beamWidth; + this.flatVectorWriter = flatVectorWriter; + if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) { + dataType = CuVSMatrix.DataType.BYTE; + } else { + assert flatVectorWriter instanceof Lucene99FlatVectorsWriter; + dataType = CuVSMatrix.DataType.FLOAT; + } + this.segmentWriteState = state; + String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LUCENE99_HNSW_META_EXTENSION); + String indexDataFileName = IndexFileNames.segmentFileName( + state.segmentInfo.name, + state.segmentSuffix, + LUCENE99_HNSW_VECTOR_INDEX_EXTENSION + ); + boolean success = false; + try { + meta = state.directory.createOutput(metaFileName, state.context); + vectorIndex = state.directory.createOutput(indexDataFileName, state.context); + CodecUtil.writeIndexHeader( + meta, + LUCENE99_HNSW_META_CODEC_NAME, + LUCENE99_VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + CodecUtil.writeIndexHeader( + vectorIndex, + LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME, + LUCENE99_VERSION_CURRENT, + state.segmentInfo.getId(), + state.segmentSuffix + ); + success = true; + } finally { + if (success == false) { + org.elasticsearch.core.IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public KnnFieldVectorsWriter addField(FieldInfo fieldInfo) throws IOException { + if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32) == false) { + throw new IllegalArgumentException( + "Field [" + fieldInfo.name + "] must have FLOAT32 encoding, got: " + fieldInfo.getVectorEncoding() + ); + } + @SuppressWarnings("unchecked") + FlatFieldVectorsWriter flatFieldWriter = (FlatFieldVectorsWriter) flatVectorWriter.addField(fieldInfo); + FieldWriter newField = new FieldWriter(flatFieldWriter, fieldInfo); + fields.add(newField); + return newField; + } + + /** + * Flushes vector data and associated data to disk. + *

+ * This method and the private helpers it calls only need to support FLOAT32. + * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to + * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type). + * During merging, we use quantized data, so we need to support byte[] too (see {@link ES92GpuHnswVectorsWriter#mergeOneField}), + * but not here. + * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging. + *

+ */ + @Override + // TODO: fix sorted index case + public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException { + flatVectorWriter.flush(maxDoc, sortMap); + try { + flushFieldsWithoutMemoryMappedFile(sortMap); + } catch (Throwable t) { + throw new IOException("Failed to flush GPU index: ", t); + } + } + + private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException, InterruptedException { + // No tmp file written, or the file cannot be mmapped + for (FieldWriter field : fields) { + var fieldInfo = field.fieldInfo; + + var numVectors = field.flatFieldVectorsWriter.getVectors().size(); + if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (logger.isDebugEnabled()) { + logger.debug( + "Skip building carga index; vectors length {} < {} (min for GPU)", + numVectors, + MIN_NUM_VECTORS_FOR_GPU_BUILD + ); + } + // Will not be indexed on the GPU + flushFieldWithMockGraph(fieldInfo, numVectors, sortMap); + } else { + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT); + try { + var builder = CuVSMatrix.deviceBuilder( + cuVSResources, + numVectors, + fieldInfo.getVectorDimension(), + CuVSMatrix.DataType.FLOAT + ); + for (var vector : field.flatFieldVectorsWriter.getVectors()) { + builder.addVector(vector); + } + try (var dataset = builder.build()) { + flushFieldWithGpuGraph(cuVSResources, fieldInfo, dataset, sortMap); + } + } finally { + cuVSResourceManager.release(cuVSResources); + } + } + } + } + + private void flushFieldWithMockGraph(FieldInfo fieldInfo, int numVectors, Sorter.DocMap sortMap) throws IOException { + if (sortMap == null) { + generateMockGraphAndWriteMeta(fieldInfo, numVectors); + } else { + // TODO: use sortMap + generateMockGraphAndWriteMeta(fieldInfo, numVectors); + } + } + + private void flushFieldWithGpuGraph( + CuVSResourceManager.ManagedCuVSResources resources, + FieldInfo fieldInfo, + CuVSMatrix dataset, + Sorter.DocMap sortMap + ) throws IOException { + if (sortMap == null) { + generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + } else { + // TODO: use sortMap + generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset); + } + } + + @Override + public void finish() throws IOException { + if (finished) { + throw new IllegalStateException("already finished"); + } + finished = true; + flatVectorWriter.finish(); + + if (meta != null) { + // write end of fields marker + meta.writeInt(-1); + CodecUtil.writeFooter(meta); + } + if (vectorIndex != null) { + CodecUtil.writeFooter(vectorIndex); + } + } + + @Override + public long ramBytesUsed() { + long total = SHALLOW_RAM_BYTES_USED; + for (FieldWriter field : fields) { + // the field tracks the delegate field usage + total += field.ramBytesUsed(); + } + return total; + } + + private void generateGpuGraphAndWriteMeta( + CuVSResourceManager.ManagedCuVSResources cuVSResources, + FieldInfo fieldInfo, + CuVSMatrix dataset + ) throws IOException { + try { + assert dataset.size() >= MIN_NUM_VECTORS_FOR_GPU_BUILD; + + long vectorIndexOffset = vectorIndex.getFilePointer(); + int[][] graphLevelNodeOffsets = new int[1][]; + final HnswGraph graph; + try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) { + assert index != null : "GPU index should be built for field: " + fieldInfo.name; + graph = writeGraph(index.getGraph(), graphLevelNodeOffsets); + } + long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, (int) dataset.size(), graph, graphLevelNodeOffsets); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); + } + } + + private void generateMockGraphAndWriteMeta(FieldInfo fieldInfo, int datasetSize) throws IOException { + try { + long vectorIndexOffset = vectorIndex.getFilePointer(); + int[][] graphLevelNodeOffsets = new int[1][]; + final HnswGraph graph = writeMockGraph(datasetSize, graphLevelNodeOffsets); + long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset; + writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetSize, graph, graphLevelNodeOffsets); + } catch (IOException e) { + throw e; + } catch (Throwable t) { + throw new IOException("Failed to write GPU index: ", t); + } + } + + private CagraIndex buildGPUIndex( + CuVSResourceManager.ManagedCuVSResources cuVSResources, + VectorSimilarityFunction similarityFunction, + CuVSMatrix dataset + ) throws Throwable { + CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) { + case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded; + case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct; + case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded; + }; + + // TODO: expose cagra index params for algorithm, NNDescentNumIterations + CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use? + .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT) + .withGraphDegree(M) + .withIntermediateGraphDegree(beamWidth) + .withMetric(distanceType) + .build(); + + long startTime = System.nanoTime(); + var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params); + var index = indexBuilder.build(); + cuVSResourceManager.finishedComputation(cuVSResources); + if (logger.isDebugEnabled()) { + logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size()); + } + return index; + } + + private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException { + long startTime = System.nanoTime(); + + int maxElementCount = (int) cagraGraph.size(); + int maxGraphDegree = (int) cagraGraph.columns(); + int[] neighbors = new int[maxGraphDegree]; + + levelNodeOffsets[0] = new int[maxElementCount]; + // write the cagra graph to the Lucene vectorIndex file + int[] scratch = new int[maxGraphDegree]; + for (int node = 0; node < maxElementCount; node++) { + cagraGraph.getRow(node).toArray(neighbors); + + // write to the Lucene vectorIndex file + long offsetStart = vectorIndex.getFilePointer(); + Arrays.sort(neighbors); + int actualSize = 0; + if (maxGraphDegree > 0) { + scratch[0] = neighbors[0]; + actualSize = 1; + } + for (int i = 1; i < maxGraphDegree; i++) { + assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount; + if (neighbors[i - 1] == neighbors[i]) { + continue; + } + scratch[actualSize++] = neighbors[i] - neighbors[i - 1]; + } + // Write the size after duplicates are removed + vectorIndex.writeVInt(actualSize); + vectorIndex.writeGroupVInts(scratch, actualSize); + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + if (logger.isDebugEnabled()) { + logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0); + } + return createMockGraph(maxElementCount, maxGraphDegree); + } + + // create a mock graph where every node is connected to every other node + private HnswGraph writeMockGraph(int elementCount, int[][] levelNodeOffsets) throws IOException { + if (elementCount == 0) { + return null; + } + int nodeDegree = elementCount - 1; + levelNodeOffsets[0] = new int[elementCount]; + + int[] neighbors = new int[nodeDegree]; + int[] scratch = new int[nodeDegree]; + for (int node = 0; node < elementCount; node++) { + if (nodeDegree > 0) { + for (int j = 0; j < nodeDegree; j++) { + neighbors[j] = j < node ? j : j + 1; // skip self + } + scratch[0] = neighbors[0]; + for (int i = 1; i < nodeDegree; i++) { + scratch[i] = neighbors[i] - neighbors[i - 1]; + } + } + + long offsetStart = vectorIndex.getFilePointer(); + vectorIndex.writeVInt(nodeDegree); + vectorIndex.writeGroupVInts(scratch, nodeDegree); + levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart); + } + return createMockGraph(elementCount, nodeDegree); + } + + private static HnswGraph createMockGraph(int elementCount, int graphDegree) { + return new HnswGraph() { + @Override + public int nextNeighbor() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public void seek(int level, int target) { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public int size() { + return elementCount; + } + + @Override + public int numLevels() { + return 1; + } + + @Override + public int maxConn() { + return graphDegree; + } + + @Override + public int entryNode() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public int neighborCount() { + throw new UnsupportedOperationException("Not supported on a mock graph"); + } + + @Override + public NodesIterator getNodesOnLevel(int level) { + return new ArrayNodesIterator(size()); + } + }; + } + + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + private static void deleteFilesIgnoringExceptions(Directory dir, String fileName) { + org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, fileName); + } + + // TODO check with deleted documents + @Override + // fix sorted index case + public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + flatVectorWriter.mergeOneField(fieldInfo, mergeState); + final int numVectors; + String tempRawVectorsFileName = null; + boolean success = false; + // save merged vector values to a temp file + try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) { + tempRawVectorsFileName = out.getName(); + if (dataType == CuVSMatrix.DataType.BYTE) { + numVectors = writeByteVectorValues(out, getMergedByteVectorValues(fieldInfo, mergeState)); + } else { + numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)); + } + CodecUtil.writeFooter(out); + success = true; + } finally { + if (success == false && tempRawVectorsFileName != null) { + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + } + } + try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) { + var input = FilterIndexInput.unwrapOnlyTest(in); + + if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) { + if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) { + // Direct access to mmapped file + final var dataset = DatasetUtils.getInstance() + .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType); + + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); + try { + generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); + } finally { + dataset.close(); + cuVSResourceManager.release(cuVSResources); + } + } else { + logger.debug( + () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]" + ); + + var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType); + try { + // Read vector-by-vector + var builder = CuVSMatrix.deviceBuilder(cuVSResources, numVectors, fieldInfo.getVectorDimension(), dataType); + + // During merging, we use quantized data, so we need to support byte[] too. + // That's how our current formats work: use floats during indexing, and quantized data to build a graph + // during merging. + if (dataType == CuVSMatrix.DataType.FLOAT) { + float[] vector = new float[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readFloats(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + } else { + assert dataType == CuVSMatrix.DataType.BYTE; + byte[] vector = new byte[fieldInfo.getVectorDimension()]; + for (int i = 0; i < numVectors; ++i) { + input.readBytes(vector, 0, fieldInfo.getVectorDimension()); + builder.addVector(vector); + } + } + try (var dataset = builder.build()) { + generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset); + } + } finally { + cuVSResourceManager.release(cuVSResources); + } + } + } else { + // we don't really need real value for vectors here, + // we just build a mock graph where every node is connected to every other node + generateMockGraphAndWriteMeta(fieldInfo, numVectors); + } + } catch (Throwable t) { + throw new IOException("Failed to merge GPU index: ", t); + } finally { + deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName); + } + } + + private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException { + // TODO: expose confidence interval from the format + final byte bits = 7; + final Float confidenceInterval = null; + ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits); + return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer); + } + + private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException { + int numVectors = 0; + byte[] vector; + final KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + numVectors++; + vector = vectorValues.vectorValue(iterator.index()); + out.writeBytes(vector, vector.length); + } + return numVectors; + } + + private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues) + throws IOException { + int numVectors = 0; + final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN); + final KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator(); + for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) { + numVectors++; + float[] vector = floatVectorValues.vectorValue(iterator.index()); + buffer.asFloatBuffer().put(vector); + out.writeBytes(buffer.array(), buffer.array().length); + } + return numVectors; + } + + private void writeMeta( + FieldInfo field, + long vectorIndexOffset, + long vectorIndexLength, + int count, + HnswGraph graph, + int[][] graphLevelNodeOffsets + ) throws IOException { + meta.writeInt(field.number); + meta.writeInt(field.getVectorEncoding().ordinal()); + meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction())); + meta.writeVLong(vectorIndexOffset); + meta.writeVLong(vectorIndexLength); + meta.writeVInt(field.getVectorDimension()); + meta.writeInt(count); + // write graph nodes on each level + if (graph == null) { + meta.writeVInt(M); + meta.writeVInt(0); + } else { + meta.writeVInt(graph.maxConn()); + meta.writeVInt(graph.numLevels()); + long valueCount = 0; + + for (int level = 0; level < graph.numLevels(); level++) { + NodesIterator nodesOnLevel = graph.getNodesOnLevel(level); + valueCount += nodesOnLevel.size(); + if (level > 0) { + int[] nol = new int[nodesOnLevel.size()]; + int numberConsumed = nodesOnLevel.consume(nol); + Arrays.sort(nol); + assert numberConsumed == nodesOnLevel.size(); + meta.writeVInt(nol.length); // number of nodes on a level + for (int i = nodesOnLevel.size() - 1; i > 0; --i) { + nol[i] -= nol[i - 1]; + } + for (int n : nol) { + assert n >= 0 : "delta encoding for nodes failed; expected nodes to be sorted"; + meta.writeVInt(n); + } + } else { + assert nodesOnLevel.size() == count : "Level 0 expects to have all nodes"; + } + } + long start = vectorIndex.getFilePointer(); + meta.writeLong(start); + meta.writeVInt(LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT); + final DirectMonotonicWriter memoryOffsetsWriter = DirectMonotonicWriter.getInstance( + meta, + vectorIndex, + valueCount, + LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT + ); + long cumulativeOffsetSum = 0; + for (int[] levelOffsets : graphLevelNodeOffsets) { + for (int v : levelOffsets) { + memoryOffsetsWriter.add(cumulativeOffsetSum); + cumulativeOffsetSum += v; + } + } + memoryOffsetsWriter.finish(); + meta.writeLong(vectorIndex.getFilePointer() - start); + } + } + + @Override + public void close() throws IOException { + IOUtils.close(meta, vectorIndex, flatVectorWriter); + } + + static int distFuncToOrd(VectorSimilarityFunction func) { + for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) { + if (SIMILARITY_FUNCTIONS.get(i).equals(func)) { + return (byte) i; + } + } + throw new IllegalArgumentException("invalid distance function: " + func); + } + + private static class FieldWriter extends KnnFieldVectorsWriter { + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class); + + private final FieldInfo fieldInfo; + private int lastDocID = -1; + private final FlatFieldVectorsWriter flatFieldVectorsWriter; + + FieldWriter(FlatFieldVectorsWriter flatFieldVectorsWriter, FieldInfo fieldInfo) { + this.fieldInfo = fieldInfo; + this.flatFieldVectorsWriter = Objects.requireNonNull(flatFieldVectorsWriter); + } + + @Override + public void addValue(int docID, float[] vectorValue) throws IOException { + if (docID == lastDocID) { + throw new IllegalArgumentException( + "VectorValuesField \"" + + fieldInfo.name + + "\" appears more than once in this document (only one value is allowed per field)" + ); + } + flatFieldVectorsWriter.addValue(docID, vectorValue); + lastDocID = docID; + } + + public DocsWithFieldSet getDocsWithFieldSet() { + return flatFieldVectorsWriter.getDocsWithFieldSet(); + } + + @Override + public float[] copyValue(float[] vectorValue) { + throw new UnsupportedOperationException(); + } + + @Override + public long ramBytesUsed() { + return SHALLOW_SIZE + flatFieldVectorsWriter.ramBytesUsed(); + } + } +} diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java new file mode 100644 index 0000000000000..4d3d5013dd381 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java @@ -0,0 +1,372 @@ +/* + * @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modifications copyright (C) 2025 Elasticsearch B.V. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.index.DocIDMerger; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.KnnVectorValues; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.VectorScorer; +import org.apache.lucene.util.VectorUtil; +import org.apache.lucene.util.quantization.QuantizedByteVectorValues; +import org.apache.lucene.util.quantization.QuantizedVectorsReader; +import org.apache.lucene.util.quantization.ScalarQuantizer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues; + +/** + * A copy from Lucene99ScalarQuantizedVectorsWriter to access mergeQuantizedByteVectorValues + * during segment merge. + */ +class MergedQuantizedVectorValues extends QuantizedByteVectorValues { + private static final float REQUANTIZATION_LIMIT = 0.2f; + + private final List subs; + private final DocIDMerger docIdMerger; + private final int size; + private QuantizedByteVectorValueSub current; + + private MergedQuantizedVectorValues(List subs, MergeState mergeState) throws IOException { + this.subs = subs; + docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); + int totalSize = 0; + for (QuantizedByteVectorValueSub sub : subs) { + totalSize += sub.values.size(); + } + size = totalSize; + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + return current.values.vectorValue(current.index()); + } + + @Override + public DocIndexIterator iterator() { + return new MergedQuantizedVectorValues.CompositeIterator(); + } + + @Override + public int size() { + return size; + } + + @Override + public int dimension() { + return subs.get(0).values.dimension(); + } + + @Override + public float getScoreCorrectionConstant(int ord) throws IOException { + return current.values.getScoreCorrectionConstant(current.index()); + } + + private class CompositeIterator extends DocIndexIterator { + private int docId; + private int ord; + + CompositeIterator() { + docId = -1; + ord = -1; + } + + @Override + public int index() { + return ord; + } + + @Override + public int docID() { + return docId; + } + + @Override + public int nextDoc() throws IOException { + current = docIdMerger.next(); + if (current == null) { + docId = NO_MORE_DOCS; + ord = NO_MORE_DOCS; + } else { + docId = current.mappedDocID; + ++ord; + } + return docId; + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long cost() { + return size; + } + } + + private static QuantizedVectorsReader getQuantizedKnnVectorsReader(KnnVectorsReader vectorsReader, String fieldName) { + if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) { + vectorsReader = candidateReader.getFieldReader(fieldName); + } + if (vectorsReader instanceof QuantizedVectorsReader reader) { + return reader; + } + return null; + } + + static MergedQuantizedVectorValues mergeQuantizedByteVectorValues( + FieldInfo fieldInfo, + MergeState mergeState, + ScalarQuantizer scalarQuantizer + ) throws IOException { + assert fieldInfo != null && fieldInfo.hasVectorValues(); + + List subs = new ArrayList<>(); + for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) { + if (hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) { + QuantizedVectorsReader reader = getQuantizedKnnVectorsReader(mergeState.knnVectorsReaders[i], fieldInfo.name); + assert scalarQuantizer != null; + final QuantizedByteVectorValueSub sub; + // Either our quantization parameters are way different than the merged ones + // Or we have never been quantized. + if (reader == null || reader.getQuantizationState(fieldInfo.name) == null + // For smaller `bits` values, we should always recalculate the quantiles + // TODO: this is very conservative, could we reuse information for even int4 + // quantization? + || scalarQuantizer.getBits() <= 4 + || shouldRequantize(reader.getQuantizationState(fieldInfo.name), scalarQuantizer)) { + FloatVectorValues toQuantize = mergeState.knnVectorsReaders[i].getFloatVectorValues(fieldInfo.name); + if (fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE) { + toQuantize = new NormalizedFloatVectorValues(toQuantize); + } + sub = new QuantizedByteVectorValueSub( + mergeState.docMaps[i], + new QuantizedFloatVectorValues(toQuantize, fieldInfo.getVectorSimilarityFunction(), scalarQuantizer) + ); + } else { + sub = new QuantizedByteVectorValueSub( + mergeState.docMaps[i], + new OffsetCorrectedQuantizedByteVectorValues( + reader.getQuantizedVectorValues(fieldInfo.name), + fieldInfo.getVectorSimilarityFunction(), + scalarQuantizer, + reader.getQuantizationState(fieldInfo.name) + ) + ); + } + subs.add(sub); + } + } + return new MergedQuantizedVectorValues(subs, mergeState); + } + + private static boolean shouldRequantize(ScalarQuantizer existingQuantiles, ScalarQuantizer newQuantiles) { + float tol = REQUANTIZATION_LIMIT * (newQuantiles.getUpperQuantile() - newQuantiles.getLowerQuantile()) / 128f; + if (Math.abs(existingQuantiles.getUpperQuantile() - newQuantiles.getUpperQuantile()) > tol) { + return true; + } + return Math.abs(existingQuantiles.getLowerQuantile() - newQuantiles.getLowerQuantile()) > tol; + } + + private static class QuantizedByteVectorValueSub extends DocIDMerger.Sub { + private final QuantizedByteVectorValues values; + private final KnnVectorValues.DocIndexIterator iterator; + + QuantizedByteVectorValueSub(MergeState.DocMap docMap, QuantizedByteVectorValues values) { + super(docMap); + this.values = values; + iterator = values.iterator(); + assert iterator.docID() == -1; + } + + @Override + public int nextDoc() throws IOException { + return iterator.nextDoc(); + } + + public int index() { + return iterator.index(); + } + } + + private static class QuantizedFloatVectorValues extends QuantizedByteVectorValues { + private final FloatVectorValues values; + private final ScalarQuantizer quantizer; + private final byte[] quantizedVector; + private int lastOrd = -1; + private float offsetValue = 0f; + + private final VectorSimilarityFunction vectorSimilarityFunction; + + QuantizedFloatVectorValues(FloatVectorValues values, VectorSimilarityFunction vectorSimilarityFunction, ScalarQuantizer quantizer) { + this.values = values; + this.quantizer = quantizer; + this.quantizedVector = new byte[values.dimension()]; + this.vectorSimilarityFunction = vectorSimilarityFunction; + } + + @Override + public float getScoreCorrectionConstant(int ord) { + if (ord != lastOrd) { + throw new IllegalStateException( + "attempt to retrieve score correction for different ord " + ord + " than the quantization was done for: " + lastOrd + ); + } + return offsetValue; + } + + @Override + public int dimension() { + return values.dimension(); + } + + @Override + public int size() { + return values.size(); + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + if (ord != lastOrd) { + offsetValue = quantize(ord); + lastOrd = ord; + } + return quantizedVector; + } + + @Override + public VectorScorer scorer(float[] target) throws IOException { + throw new UnsupportedOperationException(); + } + + private float quantize(int ord) throws IOException { + return quantizer.quantize(values.vectorValue(ord), quantizedVector, vectorSimilarityFunction); + } + + @Override + public int ordToDoc(int ord) { + return values.ordToDoc(ord); + } + + @Override + public DocIndexIterator iterator() { + return values.iterator(); + } + } + + private static final class NormalizedFloatVectorValues extends FloatVectorValues { + private final FloatVectorValues values; + private final float[] normalizedVector; + + NormalizedFloatVectorValues(FloatVectorValues values) { + this.values = values; + this.normalizedVector = new float[values.dimension()]; + } + + @Override + public int dimension() { + return values.dimension(); + } + + @Override + public int size() { + return values.size(); + } + + @Override + public int ordToDoc(int ord) { + return values.ordToDoc(ord); + } + + @Override + public float[] vectorValue(int ord) throws IOException { + System.arraycopy(values.vectorValue(ord), 0, normalizedVector, 0, normalizedVector.length); + VectorUtil.l2normalize(normalizedVector); + return normalizedVector; + } + + @Override + public DocIndexIterator iterator() { + return values.iterator(); + } + + @Override + public NormalizedFloatVectorValues copy() throws IOException { + return new NormalizedFloatVectorValues(values.copy()); + } + } + + private static final class OffsetCorrectedQuantizedByteVectorValues extends QuantizedByteVectorValues { + private final QuantizedByteVectorValues in; + private final VectorSimilarityFunction vectorSimilarityFunction; + private final ScalarQuantizer scalarQuantizer, oldScalarQuantizer; + + OffsetCorrectedQuantizedByteVectorValues( + QuantizedByteVectorValues in, + VectorSimilarityFunction vectorSimilarityFunction, + ScalarQuantizer scalarQuantizer, + ScalarQuantizer oldScalarQuantizer + ) { + this.in = in; + this.vectorSimilarityFunction = vectorSimilarityFunction; + this.scalarQuantizer = scalarQuantizer; + this.oldScalarQuantizer = oldScalarQuantizer; + } + + @Override + public float getScoreCorrectionConstant(int ord) throws IOException { + return scalarQuantizer.recalculateCorrectiveOffset(in.vectorValue(ord), oldScalarQuantizer, vectorSimilarityFunction); + } + + @Override + public int dimension() { + return in.dimension(); + } + + @Override + public int size() { + return in.size(); + } + + @Override + public byte[] vectorValue(int ord) throws IOException { + return in.vectorValue(ord); + } + + @Override + public int ordToDoc(int ord) { + return in.ordToDoc(ord); + } + + @Override + public DocIndexIterator iterator() { + return in.iterator(); + } + } +} diff --git a/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml new file mode 100644 index 0000000000000..d0c571b8538b2 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml @@ -0,0 +1,2 @@ +com.nvidia.cuvs: + - load_native_libraries diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat new file mode 100644 index 0000000000000..7aa308150b6de --- /dev/null +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -0,0 +1,3 @@ + +org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat +org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification new file mode 100644 index 0000000000000..63e111db1dd79 --- /dev/null +++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -0,0 +1,8 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +# + +org.elasticsearch.xpack.gpu.GPUFeatures diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java new file mode 100644 index 0000000000000..b466f37cbe9c9 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java @@ -0,0 +1,235 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSMatrix; +import com.nvidia.cuvs.CuVSResources; +import com.nvidia.cuvs.CuVSResourcesInfo; +import com.nvidia.cuvs.GPUInfo; +import com.nvidia.cuvs.GPUInfoProvider; + +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.test.ESTestCase; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.LongSupplier; + +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; + +public class CuVSResourceManagerTests extends ESTestCase { + + private static final Logger log = LogManager.getLogger(CuVSResourceManagerTests.class); + + public static final long TOTAL_DEVICE_MEMORY_IN_BYTES = 256L * 1024 * 1024; + + public void testBasic() throws InterruptedException { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + assertThat(res1.toString(), containsString("id=0")); + assertThat(res2.toString(), containsString("id=1")); + mgr.release(res1); + mgr.release(res2); + res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + assertThat(res1.toString(), containsString("id=0")); + assertThat(res2.toString(), containsString("id=1")); + mgr.release(res1); + mgr.release(res2); + mgr.shutdown(); + } + + public void testBlocking() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res3 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + holder.set(res3); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + Thread.sleep(1_000); + assertNull(holder.get()); + mgr.release(randomFrom(res1, res2)); + t.join(); + assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1"))); + mgr.shutdown(); + } + + public void testBlockingOnInsufficientMemory() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res2 = mgr.acquire((16 * 1024) + 1, 1024, CuVSMatrix.DataType.FLOAT); + holder.set(res2); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + Thread.sleep(1_000); + assertNull(holder.get()); + mgr.release(res1); + t.join(); + assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1"))); + mgr.shutdown(); + } + + public void testNotBlockingOnSufficientMemory() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT); + + AtomicReference holder = new AtomicReference<>(); + Thread t = new Thread(() -> { + try { + var res2 = mgr.acquire((16 * 1024) - 1, 1024, CuVSMatrix.DataType.FLOAT); + holder.set(res2); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + }); + t.start(); + t.join(5_000); + assertNotNull(holder.get()); + assertThat(holder.get().toString(), not(equalTo(res1.toString()))); + mgr.shutdown(); + } + + public void testManagedResIsNotClosable() throws Exception { + var mgr = new MockPoolingCuVSResourceManager(1); + var res = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + assertThrows(UnsupportedOperationException.class, res::close); + mgr.release(res); + mgr.shutdown(); + } + + public void testDoubleRelease() throws InterruptedException { + var mgr = new MockPoolingCuVSResourceManager(2); + var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT); + mgr.release(res1); + mgr.release(res2); + assertThrows(AssertionError.class, () -> mgr.release(randomFrom(res1, res2))); + mgr.shutdown(); + } + + static class MockPoolingCuVSResourceManager extends CuVSResourceManager.PoolingCuVSResourceManager { + + private final AtomicInteger idGenerator = new AtomicInteger(); + private final List allocations; + + MockPoolingCuVSResourceManager(int capacity) { + this(capacity, new ArrayList<>()); + } + + private MockPoolingCuVSResourceManager(int capacity, List allocationList) { + super(capacity, new MockGPUInfoProvider(() -> freeMemoryFunction(allocationList))); + this.allocations = allocationList; + } + + private static long freeMemoryFunction(List allocations) { + return TOTAL_DEVICE_MEMORY_IN_BYTES - allocations.stream().mapToLong(x -> x).sum(); + } + + @Override + protected CuVSResources createNew() { + return new MockCuVSResources(idGenerator.getAndIncrement()); + } + + @Override + public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException { + var res = super.acquire(numVectors, dims, dataType); + long memory = (long) (numVectors * dims * Float.BYTES + * CuVSResourceManager.PoolingCuVSResourceManager.GPU_COMPUTATION_MEMORY_FACTOR); + allocations.add(memory); + log.info("Added [{}]", memory); + return res; + } + + @Override + public void release(ManagedCuVSResources resources) { + if (allocations.isEmpty() == false) { + var x = allocations.removeLast(); + log.info("Removed [{}]", x); + } + super.release(resources); + } + } + + static class MockCuVSResources implements CuVSResources { + + final int id; + + MockCuVSResources(int id) { + this.id = id; + } + + @Override + public ScopedAccess access() { + throw new UnsupportedOperationException(); + } + + @Override + public int deviceId() { + return 0; + } + + @Override + public void close() {} + + @Override + public Path tempDirectory() { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "MockCuVSResources[id=" + id + "]"; + } + } + + private static class MockGPUInfoProvider implements GPUInfoProvider { + private final LongSupplier freeMemorySupplier; + + MockGPUInfoProvider(LongSupplier freeMemorySupplier) { + this.freeMemorySupplier = freeMemorySupplier; + } + + @Override + public List availableGPUs() { + throw new UnsupportedOperationException(); + } + + @Override + public List compatibleGPUs() { + throw new UnsupportedOperationException(); + } + + @Override + public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) { + return new CuVSResourcesInfo(freeMemorySupplier.getAsLong(), TOTAL_DEVICE_MEMORY_IN_BYTES); + } + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java new file mode 100644 index 0000000000000..6c43843dbd830 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import com.nvidia.cuvs.CuVSMatrix; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.MMapDirectory; +import org.apache.lucene.store.MemorySegmentAccessInput; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.Before; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; + +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; + +public class DatasetUtilsTests extends ESTestCase { + + DatasetUtils datasetUtils; + + @Before + public void setup() { // TODO: abstract out setup in to common GPUTestcase + assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22); + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + datasetUtils = DatasetUtils.getInstance(); + } + + static final ValueLayout.OfFloat JAVA_FLOAT_LE = ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + public void testBasic() throws Exception { + try (Directory dir = new MMapDirectory(createTempDir("testBasic"))) { + int numVecs = randomIntBetween(1, 100); + int dims = randomIntBetween(128, 2049); + + try (var out = dir.createOutput("vector.data", IOContext.DEFAULT)) { + var ba = new byte[dims * Float.BYTES]; + var seg = MemorySegment.ofArray(ba); + for (int v = 0; v < numVecs; v++) { + var src = MemorySegment.ofArray(randomVector(dims)); + MemorySegment.copy(src, JAVA_FLOAT_UNALIGNED, 0L, seg, JAVA_FLOAT_LE, 0L, numVecs); + out.writeBytes(ba, 0, ba.length); + } + } + try ( + var in = dir.openInput("vector.data", IOContext.DEFAULT); + var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims, CuVSMatrix.DataType.FLOAT) + ) { + assertEquals(numVecs, dataset.size()); + assertEquals(dims, dataset.columns()); + } + } + } + + static final Class IAE = IllegalArgumentException.class; + + public void testIllegal() { + MemorySegmentAccessInput in = null; // TODO: make this non-null + expectThrows(IAE, () -> datasetUtils.fromInput(in, -1, 1, CuVSMatrix.DataType.FLOAT)); + expectThrows(IAE, () -> datasetUtils.fromInput(in, 1, -1, CuVSMatrix.DataType.FLOAT)); + } + + float[] randomVector(int dims) { + float[] fa = new float[dims]; + for (int i = 0; i < dims; ++i) { + fa[i] = random().nextFloat(); + } + return fa; + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java new file mode 100644 index 0000000000000..f1c13b15795c5 --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.BeforeClass; + +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") +public class ES92GpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + static Codec codec; + + @BeforeClass + public static void beforeClass() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswSQVectorsFormat()); + } + + @Override + protected Codec getCodec() { + return codec; + } + + @Override + protected VectorSimilarityFunction randomSimilarity() { + return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)]; + } + + @Override + protected VectorEncoding randomVectorEncoding() { + return VectorEncoding.FLOAT32; + } + + @Override + public void testRandomBytes() { + // No bytes support + } + + @Override + public void testSortedIndexBytes() { + // No bytes support + } + + @Override + public void testByteVectorScorerIteration() { + // No bytes support + } + + @Override + public void testEmptyByteVectorData() { + // No bytes support + } + + @Override + public void testMergingWithDifferentByteKnnFields() { + // No bytes support + } + + @Override + public void testMismatchedFields() { + // No bytes support + } +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java new file mode 100644 index 0000000000000..e7ce310d15d9b --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.BeforeClass; + +// CuVS prints tons of logs to stdout +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310") +public class ES92GpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase { + + static { + LogConfigurator.loadLog4jPlugins(); + LogConfigurator.configureESLogging(); // native access requires logging to be initialized + } + + static Codec codec; + + @BeforeClass + public static void beforeClass() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswVectorsFormat()); + } + + @Override + protected Codec getCodec() { + return codec; + } + + @Override + protected VectorSimilarityFunction randomSimilarity() { + return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)]; + } + + @Override + protected VectorEncoding randomVectorEncoding() { + return VectorEncoding.FLOAT32; + } + + @Override + public void testRandomBytes() throws Exception { + // No bytes support + } + + @Override + public void testSortedIndexBytes() throws Exception { + // No bytes support + } + + @Override + public void testByteVectorScorerIteration() throws Exception { + // No bytes support + } + + @Override + public void testEmptyByteVectorData() throws Exception { + // No bytes support + } + + @Override + public void testMergingWithDifferentByteKnnFields() throws Exception { + // No bytes support + } + + @Override + public void testMismatchedFields() throws Exception { + // No bytes support + } + +} diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java new file mode 100644 index 0000000000000..2648691d03eec --- /dev/null +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.gpu.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; +import org.elasticsearch.index.codec.PerFieldMapperCodec; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTests; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.gpu.GPUPlugin; +import org.elasticsearch.xpack.gpu.GPUSupport; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; + +import static org.hamcrest.Matchers.instanceOf; + +public class GPUDenseVectorFieldMapperTests extends DenseVectorFieldMapperTests { + + @BeforeClass + public static void setup() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + + @Override + protected Collection getPlugins() { + var plugin = new GPUPlugin(); + return Collections.singletonList(plugin); + } + + @Override + public void testKnnVectorsFormat() throws IOException { + // TODO improve test with custom parameters + KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw"); + String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, " + + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)"; + assertEquals(expectedStr, knnVectorsFormat.toString()); + } + + @Override + public void testKnnQuantizedHNSWVectorsFormat() throws IOException { + // TOD improve the test with custom parameters + KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw"); + String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, " + + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat"; + assertTrue(knnVectorsFormat.toString().startsWith(expectedStr)); + } + + private KnnVectorsFormat getKnnVectorsFormat(String indexOptionsType) throws IOException { + final int dims = randomIntBetween(128, 4096); + MapperService mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", dims); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", indexOptionsType); + b.endObject(); + })); + CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); + Codec codec = codecService.codec("default"); + if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { + assertThat(codec, instanceOf(PerFieldMapperCodec.class)); + return ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } else { + if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } + assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); + return ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); + } + } +} diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java new file mode 100644 index 0000000000000..c4e7e936b0111 --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.gpu; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.BeforeClass; +import org.junit.ClassRule; + +public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + + @BeforeClass + public static void setup() { + assumeTrue("cuvs not supported", GPUSupport.isSupported(false)); + } + + @ClassRule + public static ElasticsearchCluster cluster = createCluster(); + + private static ElasticsearchCluster createCluster() { + var builder = ElasticsearchCluster.local() + .nodes(1) + .module("gpu") + .setting("xpack.license.self_generated.type", "trial") + .setting("xpack.security.enabled", "false"); + + var libraryPath = System.getenv("LD_LIBRARY_PATH"); + if (libraryPath != null) { + builder.environment("LD_LIBRARY_PATH", libraryPath); + } + return builder.build(); + } + + public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml new file mode 100644 index 0000000000000..28cce941f0916 --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml @@ -0,0 +1,139 @@ +--- +"Test GPU vector operations": + + - requires: + cluster_features: [ "vectors.indexing.use_gpu" ] + reason: "A cluster should have a GPU plugin to run these tests" + + # creating an index is successful even if the GPU is not available + - do: + indices.create: + index: my_vectors + body: + mappings: + properties: + embedding: + type: dense_vector + dims: 24 + similarity: l2_norm + index_options: + type: hnsw + settings: + index.number_of_shards: 1 + index.vectors.indexing.use_gpu: true + - match: { error: null } + + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "1" + - text: "First document" + embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + - index: + _id: "2" + - text: "Second document" + embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] + - index: + _id: "3" + - text: "Third document" + embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] + - match: { errors: false } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "4" + - text: "Fourth document" + embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] + - index: + _id: "5" + - text: "Fifth document" + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] + - index: + _id: "7" + - text: "Seventh document" + embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 + + - match: { hits.hits.0._id: "7" } + - match: { hits.hits.1._id: "6" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - delete: + _id: "1" + - delete: + _id: "7" + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "5" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16] + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + k: 2 + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "2" } + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + k: 2 + - match: { hits.hits.0._id: "5" } + - match: { hits.hits.1._id: "4" } diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml new file mode 100644 index 0000000000000..e0a6f42409b66 --- /dev/null +++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml @@ -0,0 +1,148 @@ +--- +"Test GPU vector operations": + + - requires: + cluster_features: [ "vectors.indexing.use_gpu" ] + reason: "A cluster should have a GPU plugin to run these tests" + + # creating an index is successful even if the GPU is not available + - do: + indices.create: + index: my_vectors + body: + mappings: + properties: + embedding: + type: dense_vector + dims: 24 + similarity: l2_norm + index_options: + type: int8_hnsw + settings: + index.number_of_shards: 1 + index.vectors.indexing.use_gpu: true + index.refresh_interval: -1 # disable automatic refresh to ensure documents are indexed together + - match: { error: null } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "1" + - text: "First document" + embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + - index: + _id: "2" + - text: "Second document" + embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2] + - index: + _id: "3" + - text: "Third document" + embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3] + - index: + _id: "4" + - text: "Fourth document" + embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4] + - index: + _id: "5" + - text: "Fifth document" + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] + - index: + _id: "6" + - text: "Sixth document" + embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6] + - index: + _id: "7" + - text: "Seventh document" + embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7] + - index: + _id : "8" + - text: "Eighth document" + embedding: [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8] + - index: + _id: "9" + - text: "Ninth document" + embedding: [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9] + - index: + _id: "10" + - text: "Tenth document" + embedding: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + - match: { errors: false } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + k: 2 + - match: { hits.hits.0._id: "10" } + - match: { hits.hits.1._id: "9" } + + - do: + bulk: + index: my_vectors + refresh: true + body: + - delete: + _id: "1" + - delete: + _id: "10" + - match: { errors: false } + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + k: 2 + - match: { hits.hits.0._id: "9" } + - match: { hits.hits.1._id: "8" } + + + - do: + bulk: + index: my_vectors + refresh: true + body: + - index: + _id: "2" + - text: "Second document" + embedding: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + + - do: + indices.forcemerge: + index: my_vectors + max_num_segments: 1 + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ] + k: 2 + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "9" } + + - do: + search: + index: my_vectors + body: + knn: + field: embedding + query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] + k: 2 + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "4" } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index b7c01ce817b32..525d343826075 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -1265,7 +1265,8 @@ private static Mapper.Builder createEmbeddingsField( DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder( CHUNKED_EMBEDDINGS_FIELD, indexVersionCreated, - false + false, + List.of() ); configureDenseVectorMapperBuilder(indexVersionCreated, denseVectorMapperBuilder, modelSettings, indexOptions);