diff --git a/.buildkite/pipelines/cuvs-snapshot/run-tests.yml b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml
new file mode 100644
index 0000000000000..fe4e2f8cefd70
--- /dev/null
+++ b/.buildkite/pipelines/cuvs-snapshot/run-tests.yml
@@ -0,0 +1,21 @@
+steps:
+  - label: "{{matrix.GRADLE_TASK}}"
+    command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}}
+    timeout_in_minutes: 300
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2404-nvidia
+      machineType: g2-standard-32
+      buildDirectory: /dev/shm/bk
+      zones: us-central1-b,us-central1-c
+    env:
+      GRADLE_TASK: "{{matrix.GRADLE_TASK}}"
+    matrix:
+      setup:
+        GRADLE_TASK:
+          - checkPart1
+          - checkPart2
+          - checkPart3
+          - checkPart4
+          - checkPart5
+          - checkPart6
diff --git a/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml
new file mode 100644
index 0000000000000..6011926d782bb
--- /dev/null
+++ b/.buildkite/pipelines/cuvs-snapshot/update-snapshot.yml
@@ -0,0 +1,14 @@
+steps:
+  - label: "Smoke test and update new cuVS snapshot"
+    command: .buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh
+    agents:
+      provider: gcp
+      image: family/elasticsearch-ubuntu-2404-nvidia
+      machineType: g2-standard-16
+      zones: us-central1-b,us-central1-c
+      diskSizeGb: 150
+  - wait: ~
+  - trigger: "elasticsearch-cuvs-run-tests"
+    build:
+      branch: "${BUILDKITE_BRANCH}"
+    async: true
diff --git a/.buildkite/pipelines/pull-request/.defaults.yml b/.buildkite/pipelines/pull-request/.defaults.yml
index 84d73cbd738a2..a0c82d9ecdded 100644
--- a/.buildkite/pipelines/pull-request/.defaults.yml
+++ b/.buildkite/pipelines/pull-request/.defaults.yml
@@ -1,5 +1,6 @@
 config:
-  skip-labels: ">test-mute"
+  skip-labels:
+    - ">test-mute"
   excluded-regions:
     - ^docs/.*
     - ^x-pack/docs/.*
diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml
index 739deb2db92c4..961f6ae6d612e 100644
--- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml
+++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml
@@ -78,4 +78,3 @@ steps:
           image: family/elasticsearch-ubuntu-2004
           machineType: n1-standard-32
           buildDirectory: /dev/shm/bk
-
diff --git a/.buildkite/pipelines/pull-request/gpu.yml b/.buildkite/pipelines/pull-request/gpu.yml
new file mode 100644
index 0000000000000..40afa272ead7c
--- /dev/null
+++ b/.buildkite/pipelines/pull-request/gpu.yml
@@ -0,0 +1,28 @@
+config:
+  allow-labels: test-gpu
+  skip-labels:
+    - ">test-mute"
+steps:
+  - group: gpu-tests
+    steps:
+      - label: "{{matrix.GRADLE_TASK}} / gpu-tests"
+        key: "packaging-tests-unix"
+        command: .buildkite/scripts/cuvs-snapshot/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints {{matrix.GRADLE_TASK}}
+        timeout_in_minutes: 300
+        agents:
+          provider: gcp
+          image: family/elasticsearch-ubuntu-2404-nvidia
+          machineType: g2-standard-32
+          buildDirectory: /dev/shm/bk
+          zones: us-central1-b,us-central1-c
+        env:
+          GRADLE_TASK: "{{matrix.GRADLE_TASK}}"
+        matrix:
+          setup:
+            GRADLE_TASK:
+              - checkPart1
+              - checkPart2
+              - checkPart3
+              - checkPart4
+              - checkPart5
+              - checkPart6
diff --git a/.buildkite/scripts/cuvs-snapshot/configure.sh b/.buildkite/scripts/cuvs-snapshot/configure.sh
new file mode 100755
index 0000000000000..241d5f78900e0
--- /dev/null
+++ b/.buildkite/scripts/cuvs-snapshot/configure.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -euo pipefail
+
+if [[ -f /etc/profile.d/elastic-nvidia.sh ]]; then
+  export JAVA_HOME="$HOME/.java/openjdk24"
+  export PATH="$JAVA_HOME/bin:$PATH"
+
+  # Setup LD_LIBRARY_PATH, PATH
+
+  export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:-}"
+  source /etc/profile.d/elastic-nvidia.sh
+fi
+
+# Not running this before the tests results in an error when running the tests
+# No idea why...
+nvidia-smi
+
+CURRENT_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ELASTICSEARCH_REPO_DIR="$(cd "$CURRENT_SCRIPT_DIR/../../.." && pwd)"
+
+CUVS_SNAPSHOT_VERSION="${CUVS_SNAPSHOT_VERSION:-$(cat "$CURRENT_SCRIPT_DIR"/current-snapshot-version)}"
+CUVS_ARCHIVE="cuvs-$CUVS_SNAPSHOT_VERSION.tar.gz"
+CUVS_URL="https://storage.googleapis.com/elasticsearch-cuvs-snapshots/$CUVS_ARCHIVE"
+
+CUVS_WORKSPACE=${CUVS_WORKSPACE:-$(cd "$(mktemp -d)")}
+CUVS_DIR="$(pwd)/cuvs-$CUVS_SNAPSHOT_VERSION"
+
+curl -O "$CUVS_URL"
+tar -xzf "$CUVS_ARCHIVE"
+
+CUVS_VERSION=$(cd "$CUVS_DIR/cuvs-java/target" && mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
+
+LD_LIBRARY_PATH=$(echo "$LD_LIBRARY_PATH" | tr ':' '\n' | grep -v "libcuvs/linux-x64" | tr '\n' ':' | sed 's/:$//')
+LD_LIBRARY_PATH="$CUVS_DIR/libcuvs/linux-x64:$LD_LIBRARY_PATH"
+export LD_LIBRARY_PATH
+
+cd "$CUVS_DIR/cuvs-java/target"
+mvn install:install-file -Dfile="cuvs-java-$CUVS_VERSION.jar" -DartifactId=elastic-cuvs-java -DgeneratePom=true
+
+cd "$ELASTICSEARCH_REPO_DIR"
+PLUGIN_GRADLE_FILE=x-pack/plugin/gpu/build.gradle
+sed -i "s|implementation 'com.nvidia.cuvs:elastic-cuvs-java:.*'|implementation 'com.nvidia.cuvs:elastic-cuvs-java:$CUVS_VERSION'|" "$PLUGIN_GRADLE_FILE"
diff --git a/.buildkite/scripts/cuvs-snapshot/current-snapshot-version b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version
new file mode 100644
index 0000000000000..3bb6b7db4687c
--- /dev/null
+++ b/.buildkite/scripts/cuvs-snapshot/current-snapshot-version
@@ -0,0 +1 @@
+fdb8bfb8
diff --git a/.buildkite/scripts/cuvs-snapshot/run-gradle.sh b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh
new file mode 100755
index 0000000000000..4824981f5817f
--- /dev/null
+++ b/.buildkite/scripts/cuvs-snapshot/run-gradle.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -euo pipefail
+
+source .buildkite/scripts/cuvs-snapshot/configure.sh
+
+cd "$WORKSPACE"
+
+.ci/scripts/run-gradle.sh "$@"
diff --git a/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh
new file mode 100755
index 0000000000000..17c83e2f5504c
--- /dev/null
+++ b/.buildkite/scripts/cuvs-snapshot/update-current-snapshot-version.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SNAPSHOT_VERSION_FILE=.buildkite/scripts/cuvs-snapshot/current-snapshot-version
+BRANCH_TO_UPDATE="${BRANCH_TO_UPDATE:-${BUILDKITE_BRANCH:-cuvs-snapshot}}"
+
+if [[ -z "${CUVS_SNAPSHOT_VERSION:-}" ]]; then
+  echo "CUVS_SNAPSHOT_VERSION not set. Set this to update the current snapshot version."
+  exit 1
+fi
+
+if [[ "$CUVS_SNAPSHOT_VERSION" == "$(cat $SNAPSHOT_VERSION_FILE)" ]]; then
+  echo "Current snapshot version already set to '$CUVS_SNAPSHOT_VERSION'. No need to update."
+  exit 0
+fi
+
+echo "--- Configuring libcuvs/cuvs-java"
+source .buildkite/scripts/cuvs-snapshot/configure.sh
+
+if [[ "${SKIP_TESTING:-}" != "true" ]]; then
+  echo "--- Testing snapshot before updating"
+  ./gradlew -Druntime.java=24 :x-pack:plugin:gpu:yamlRestTest -S
+fi
+
+echo "--- Updating snapshot"
+
+echo "$CUVS_SNAPSHOT_VERSION" > "$SNAPSHOT_VERSION_FILE"
+
+CURRENT_SHA="$(gh api "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE?ref=$BRANCH_TO_UPDATE" | jq -r .sha)" || true
+
+gh api -X PUT "/repos/elastic/elasticsearch/contents/$SNAPSHOT_VERSION_FILE" \
+  -f branch="$BRANCH_TO_UPDATE" \
+  -f message="Update cuvs snapshot version to $CUVS_VERSION" \
+  -f content="$(base64 -w 0 "$WORKSPACE/$SNAPSHOT_VERSION_FILE")" \
+  -f sha="$CURRENT_SHA"
diff --git a/docs/changelog/135545.yaml b/docs/changelog/135545.yaml
new file mode 100644
index 0000000000000..bbd87fa047476
--- /dev/null
+++ b/docs/changelog/135545.yaml
@@ -0,0 +1,5 @@
+pr: 135545
+summary: Add GPUPlugin for indexing vectors on GPU
+area: Vector Search
+type: feature
+issues: []
diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml
index b4b17c450200f..5b5c9aea68ac3 100644
--- a/gradle/verification-metadata.xml
+++ b/gradle/verification-metadata.xml
@@ -1199,6 +1199,11 @@
             <sha256 value="64fab42f17bf8e0efb193dd34da716ef7abb7515234036119df1776b808dc066" origin="Generated by Gradle"/>
          </artifact>
       </component>
+      <component group="com.nvidia.cuvs" name="cuvs-java" version="25.10.0">
+         <artifact name="cuvs-java-25.10.0.jar">
+            <sha256 value="2d4cca3b6b6c7c4d3c1a2f57c00cb55f7a45699536ad356699f32bcea7714539" origin="Generated by Gradle"/>
+         </artifact>
+      </component>
       <component group="com.perforce" name="p4java" version="2015.2.1365273">
          <artifact name="p4java-2015.2.1365273.jar">
             <sha256 value="fbcf286c5863a658b400ec2595ee13c5d49c656f735923088509e3f976ea421e" origin="Generated by Gradle"/>
diff --git a/qa/vector/build.gradle b/qa/vector/build.gradle
index 41064d2bb3451..b0223791797dd 100644
--- a/qa/vector/build.gradle
+++ b/qa/vector/build.gradle
@@ -22,6 +22,12 @@ tasks.named("dependencyLicenses").configure {
 tasks.named('forbiddenApisMain').configure {
   enabled = false
 }
+repositories {
+  mavenLocal()
+  maven {
+    url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots")
+  }
+}
 
 dependencies {
   api "org.apache.lucene:lucene-core:${versions.lucene}"
@@ -31,6 +37,7 @@ dependencies {
   implementation project(':libs:native')
   implementation project(':libs:logging')
   implementation project(':server')
+  implementation project(':x-pack:plugin:gpu')
 }
 /**
  * Task to run the KnnIndexTester with the provided parameters.
diff --git a/qa/vector/src/main/java/module-info.java b/qa/vector/src/main/java/module-info.java
index b6647aafeb01f..0bcb7bc98b651 100644
--- a/qa/vector/src/main/java/module-info.java
+++ b/qa/vector/src/main/java/module-info.java
@@ -18,4 +18,5 @@
     requires org.elasticsearch.logging;
     requires java.management;
     requires jdk.management;
+    requires org.elasticsearch.gpu;
 }
diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java
index 421375f038475..9e4dca46f0c18 100644
--- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java
+++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java
@@ -39,6 +39,8 @@
 import org.elasticsearch.xcontent.XContentParser;
 import org.elasticsearch.xcontent.XContentParserConfiguration;
 import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat;
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -76,7 +78,8 @@ public class KnnIndexTester {
     enum IndexType {
         HNSW,
         FLAT,
-        IVF
+        IVF,
+        GPU_HNSW
     }
 
     enum MergePolicyType {
@@ -90,6 +93,8 @@ private static String formatIndexPath(CmdLineArgs args) {
         List<String> suffix = new ArrayList<>();
         if (args.indexType() == IndexType.FLAT) {
             suffix.add("flat");
+        } else if (args.indexType() == IndexType.GPU_HNSW) {
+            suffix.add("gpu_hnsw");
         } else if (args.indexType() == IndexType.IVF) {
             suffix.add("ivf");
             suffix.add(Integer.toString(args.ivfClusterSize()));
@@ -107,6 +112,16 @@ static Codec createCodec(CmdLineArgs args) {
         final KnnVectorsFormat format;
         if (args.indexType() == IndexType.IVF) {
             format = new ES920DiskBBQVectorsFormat(args.ivfClusterSize(), ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
+        } else if (args.indexType() == IndexType.GPU_HNSW) {
+            if (args.quantizeBits() == 32) {
+                format = new ES92GpuHnswVectorsFormat();
+            } else if (args.quantizeBits() == 7) {
+                format = new ES92GpuHnswSQVectorsFormat();
+            } else {
+                throw new IllegalArgumentException(
+                    "GPU HNSW index type only supports 7 or 32 bits quantization, but got: " + args.quantizeBits()
+                );
+            }
         } else {
             if (args.quantizeBits() == 1) {
                 if (args.indexType() == IndexType.FLAT) {
diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java
index 68dc8da3b7d15..cd375474797be 100644
--- a/server/src/main/java/module-info.java
+++ b/server/src/main/java/module-info.java
@@ -400,7 +400,8 @@
             org.elasticsearch.settings.secure,
             org.elasticsearch.serverless.constants,
             org.elasticsearch.serverless.apifiltering,
-            org.elasticsearch.internal.security;
+            org.elasticsearch.internal.security,
+            org.elasticsearch.gpu;
 
     exports org.elasticsearch.telemetry.tracing;
     exports org.elasticsearch.telemetry;
@@ -486,7 +487,7 @@
     exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference;
     exports org.elasticsearch.lucene.util.automaton;
     exports org.elasticsearch.index.codec.perfield;
-    exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
+    exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn, org.elasticsearch.gpu;
     exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
     exports org.elasticsearch.inference.telemetry;
     exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
index 8c0756f25286f..2ed1aa6c9f17f 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -120,7 +120,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
         if (mapperService != null) {
             Mapper mapper = mapperService.mappingLookup().getMapper(field);
             if (mapper instanceof DenseVectorFieldMapper vectorMapper) {
-                return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat);
+                return vectorMapper.getKnnVectorsFormatForField(knnVectorsFormat, mapperService.getIndexSettings());
             }
         }
         return knnVectorsFormat;
diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java
index 56710d49b5a7a..5925e81091238 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java
@@ -130,7 +130,7 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException
         );
     }
 
-    static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter {
+    public static final class ES814ScalarQuantizedVectorsWriter extends FlatVectorsWriter {
 
         final Lucene99ScalarQuantizedVectorsWriter delegate;
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
index 4112f9108d3ee..5ca5761f7a33f 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java
@@ -800,7 +800,8 @@ private static void postProcessDynamicArrayMapping(DocumentParserContext context
             DenseVectorFieldMapper.Builder builder = new DenseVectorFieldMapper.Builder(
                 fieldName,
                 context.indexSettings().getIndexVersionCreated(),
-                IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings())
+                IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(context.indexSettings().getSettings()),
+                context.getVectorFormatProviders()
             );
             builder.dimensions(mappers.size());
             DenseVectorFieldMapper denseVectorFieldMapper = builder.build(builderContext);
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java
index 0b1a64713857a..b0e002d6c0aff 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java
@@ -20,6 +20,7 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.mapper.MapperService.MergeReason;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
 import org.elasticsearch.xcontent.FilterXContentParserWrapper;
 import org.elasticsearch.xcontent.FlatteningXContentParser;
 import org.elasticsearch.xcontent.XContentBuilder;
@@ -305,6 +306,10 @@ public final MetadataFieldMapper getMetadataMapper(String mapperName) {
         return mappingLookup.getMapping().getMetadataMapperByName(mapperName);
     }
 
+    public final List<VectorsFormatProvider> getVectorFormatProviders() {
+        return mappingParserContext.getVectorsFormatProviders();
+    }
+
     public final MappingParserContext dynamicTemplateParserContext(DateFormatter dateFormatter) {
         return mappingParserContext.createDynamicTemplateContext(dateFormatter);
     }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java
index 03026655cdca4..b545397e400f5 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperRegistry.java
@@ -11,11 +11,13 @@
 
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
 import org.elasticsearch.plugins.FieldPredicate;
 import org.elasticsearch.plugins.MapperPlugin;
 
 import java.util.Collections;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 
@@ -32,14 +34,16 @@ public final class MapperRegistry {
     private final Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers5x;
     private final Function<String, FieldPredicate> fieldFilter;
     private final RootObjectMapperNamespaceValidator namespaceValidator;
+    private final List<VectorsFormatProvider> vectorsFormatProviders;
 
     public MapperRegistry(
         Map<String, Mapper.TypeParser> mapperParsers,
         Map<String, RuntimeField.Parser> runtimeFieldParsers,
         Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers,
-        Function<String, FieldPredicate> fieldFilter
+        Function<String, FieldPredicate> fieldFilter,
+        List<VectorsFormatProvider> vectorsFormatProviders
     ) {
-        this(mapperParsers, runtimeFieldParsers, metadataMapperParsers, fieldFilter, null);
+        this(mapperParsers, runtimeFieldParsers, metadataMapperParsers, fieldFilter, vectorsFormatProviders, null);
     }
 
     public MapperRegistry(
@@ -47,6 +51,7 @@ public MapperRegistry(
         Map<String, RuntimeField.Parser> runtimeFieldParsers,
         Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers,
         Function<String, FieldPredicate> fieldFilter,
+        List<VectorsFormatProvider> vectorsFormatProviders,
         RootObjectMapperNamespaceValidator namespaceValidator
     ) {
         this.mapperParsers = Collections.unmodifiableMap(new LinkedHashMap<>(mapperParsers));
@@ -62,6 +67,7 @@ public MapperRegistry(
         this.metadataMapperParsers5x = metadata5x;
         this.fieldFilter = fieldFilter;
         this.namespaceValidator = namespaceValidator;
+        this.vectorsFormatProviders = vectorsFormatProviders;
     }
 
     /**
@@ -88,6 +94,10 @@ public RootObjectMapperNamespaceValidator getNamespaceValidator() {
         return namespaceValidator;
     }
 
+    public List<VectorsFormatProvider> getVectorsFormatProviders() {
+        return vectorsFormatProviders;
+    }
+
     /**
      * Return a map of the meta mappers that have been registered. The
      * returned map uses the name of the field as a key.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java
index e0a4aca3d83f0..af817f5827b17 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java
@@ -246,6 +246,7 @@ public MapperService(
             indexSettings,
             idFieldMapper,
             bitSetProducer,
+            mapperRegistry.getVectorsFormatProviders(),
             mapperRegistry.getNamespaceValidator()
         );
         this.documentParser = new DocumentParser(parserConfiguration, this.mappingParserContextSupplier.get());
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java
index 5e5488c5f9acd..b6ca0ed2259a7 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingParserContext.java
@@ -17,10 +17,12 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 import org.elasticsearch.script.ScriptCompiler;
 
+import java.util.List;
 import java.util.function.Function;
 import java.util.function.Supplier;
 
@@ -43,6 +45,7 @@ public class MappingParserContext {
     private final Function<Query, BitSetProducer> bitSetProducer;
     private final long mappingObjectDepthLimit;
     private long mappingObjectDepth = 0;
+    private final List<VectorsFormatProvider> vectorsFormatProviders;
     private final RootObjectMapperNamespaceValidator namespaceValidator;
 
     public MappingParserContext(
@@ -57,7 +60,9 @@ public MappingParserContext(
         IndexSettings indexSettings,
         IdFieldMapper idFieldMapper,
         Function<Query, BitSetProducer> bitSetProducer,
+        List<VectorsFormatProvider> vectorsFormatProviders,
         RootObjectMapperNamespaceValidator namespaceValidator
+
     ) {
         this.similarityLookupService = similarityLookupService;
         this.typeParsers = typeParsers;
@@ -71,6 +76,7 @@ public MappingParserContext(
         this.idFieldMapper = idFieldMapper;
         this.mappingObjectDepthLimit = indexSettings.getMappingDepthLimit();
         this.bitSetProducer = bitSetProducer;
+        this.vectorsFormatProviders = vectorsFormatProviders;
         this.namespaceValidator = namespaceValidator;
     }
 
@@ -85,7 +91,8 @@ public MappingParserContext(
         IndexAnalyzers indexAnalyzers,
         IndexSettings indexSettings,
         IdFieldMapper idFieldMapper,
-        Function<Query, BitSetProducer> bitSetProducer
+        Function<Query, BitSetProducer> bitSetProducer,
+        List<VectorsFormatProvider> vectorsFormatProviders
     ) {
         this(
             similarityLookupService,
@@ -99,6 +106,7 @@ public MappingParserContext(
             indexSettings,
             idFieldMapper,
             bitSetProducer,
+            vectorsFormatProviders,
             null
         );
     }
@@ -178,6 +186,10 @@ public BitSetProducer bitSetProducer(Query query) {
         return bitSetProducer.apply(query);
     }
 
+    public List<VectorsFormatProvider> getVectorsFormatProviders() {
+        return vectorsFormatProviders;
+    }
+
     void incrementMappingObjectDepth() throws MapperParsingException {
         mappingObjectDepth++;
         if (mappingObjectDepth > mappingObjectDepthLimit) {
@@ -207,6 +219,7 @@ private static class MultiFieldParserContext extends MappingParserContext {
                 in.indexSettings,
                 in.idFieldMapper,
                 in.bitSetProducer,
+                in.vectorsFormatProviders,
                 in.namespaceValidator
             );
         }
@@ -238,6 +251,7 @@ private static class DynamicTemplateParserContext extends MappingParserContext {
                 in.indexSettings,
                 in.idFieldMapper,
                 in.bitSetProducer,
+                in.vectorsFormatProviders,
                 in.namespaceValidator
             );
             this.dateFormatter = dateFormatter;
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
index 7b8c9934f8104..ea31f18c39dae 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java
@@ -44,6 +44,7 @@
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.features.NodeFeature;
+import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.codec.vectors.ES813FlatVectorFormat;
@@ -251,10 +252,17 @@ public static class Builder extends FieldMapper.Builder {
 
         final IndexVersion indexVersionCreated;
         final boolean isExcludeSourceVectors;
+        private final List<VectorsFormatProvider> vectorsFormatProviders;
 
-        public Builder(String name, IndexVersion indexVersionCreated, boolean isExcludeSourceVectors) {
+        public Builder(
+            String name,
+            IndexVersion indexVersionCreated,
+            boolean isExcludeSourceVectors,
+            List<VectorsFormatProvider> vectorsFormatProviders
+        ) {
             super(name);
             this.indexVersionCreated = indexVersionCreated;
+            this.vectorsFormatProviders = vectorsFormatProviders;
             // This is defined as updatable because it can be updated once, from [null] to a valid dim size,
             // by a dynamic mapping update. Once it has been set, however, the value cannot be changed.
             this.dims = new Parameter<>("dims", true, () -> null, (n, c, o) -> {
@@ -443,7 +451,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) {
                 builderParams(this, context),
                 indexOptions.getValue(),
                 indexVersionCreated,
-                isExcludeSourceVectorsFinal
+                isExcludeSourceVectorsFinal,
+                vectorsFormatProviders
             );
         }
     }
@@ -1887,6 +1896,18 @@ public boolean isFlat() {
             return false;
         }
 
+        public int m() {
+            return m;
+        }
+
+        public int efConstruction() {
+            return efConstruction;
+        }
+
+        public Float confidenceInterval() {
+            return confidenceInterval;
+        }
+
         @Override
         public String toString() {
             return "{type="
@@ -1922,7 +1943,7 @@ public boolean updatableTo(DenseVectorIndexOptions update) {
         }
     }
 
-    static class HnswIndexOptions extends DenseVectorIndexOptions {
+    public static class HnswIndexOptions extends DenseVectorIndexOptions {
         private final int m;
         private final int efConstruction;
 
@@ -1983,6 +2004,14 @@ public boolean isFlat() {
             return false;
         }
 
+        public int m() {
+            return m;
+        }
+
+        public int efConstruction() {
+            return efConstruction;
+        }
+
         @Override
         public String toString() {
             return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + "}";
@@ -2203,7 +2232,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         (n, c) -> new Builder(
             n,
             c.getIndexSettings().getIndexVersionCreated(),
-            INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings())
+            INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(c.getIndexSettings().getSettings()),
+            c.getVectorsFormatProviders()
         ),
         notInMultiFields(CONTENT_TYPE)
     );
@@ -2608,11 +2638,11 @@ private Query createKnnFloatQuery(
             return knnQuery;
         }
 
-        VectorSimilarity getSimilarity() {
+        public VectorSimilarity getSimilarity() {
             return similarity;
         }
 
-        int getVectorDimensions() {
+        public int getVectorDimensions() {
             return dims;
         }
 
@@ -2666,6 +2696,7 @@ public List<Object> fetchValues(Source source, int doc, List<Object> ignoredValu
     private final DenseVectorIndexOptions indexOptions;
     private final IndexVersion indexCreatedVersion;
     private final boolean isExcludeSourceVectors;
+    private final List<VectorsFormatProvider> extraVectorsFormatProviders;
 
     private DenseVectorFieldMapper(
         String simpleName,
@@ -2673,12 +2704,14 @@ private DenseVectorFieldMapper(
         BuilderParams params,
         DenseVectorIndexOptions indexOptions,
         IndexVersion indexCreatedVersion,
-        boolean isExcludeSourceVectorsFinal
+        boolean isExcludeSourceVectorsFinal,
+        List<VectorsFormatProvider> vectorsFormatProviders
     ) {
         super(simpleName, mappedFieldType, params);
         this.indexOptions = indexOptions;
         this.indexCreatedVersion = indexCreatedVersion;
         this.isExcludeSourceVectors = isExcludeSourceVectorsFinal;
+        this.extraVectorsFormatProviders = vectorsFormatProviders;
     }
 
     @Override
@@ -2800,7 +2833,7 @@ protected String contentType() {
 
     @Override
     public FieldMapper.Builder getMergeBuilder() {
-        return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors).init(this);
+        return new Builder(leafName(), indexCreatedVersion, isExcludeSourceVectors, extraVectorsFormatProviders).init(this);
     }
 
     private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Object propNode, IndexVersion indexVersion) {
@@ -2823,12 +2856,20 @@ private static DenseVectorIndexOptions parseIndexOptions(String fieldName, Objec
      * @return the custom kNN vectors format that is configured for this field or
      * {@code null} if the default format should be used.
      */
-    public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat) {
+    public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat, IndexSettings indexSettings) {
         final KnnVectorsFormat format;
         if (indexOptions == null) {
             format = fieldType().element.elementType() == ElementType.BIT ? new ES815HnswBitVectorsFormat() : defaultFormat;
         } else {
-            format = indexOptions.getVectorsFormat(fieldType().element.elementType());
+            // if plugins provided alternative KnnVectorsFormat for this indexOptions, use it instead of standard
+            KnnVectorsFormat extraKnnFormat = null;
+            for (VectorsFormatProvider vectorsFormatProvider : extraVectorsFormatProviders) {
+                extraKnnFormat = vectorsFormatProvider.getKnnVectorsFormat(indexSettings, indexOptions);
+                if (extraKnnFormat != null) {
+                    break;
+                }
+            }
+            format = extraKnnFormat != null ? extraKnnFormat : indexOptions.getVectorsFormat(fieldType().element.elementType());
         }
         // It's legal to reuse the same format name as this is the same on-disk format.
         return new KnnVectorsFormat(format.getName()) {
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java
new file mode 100644
index 0000000000000..4bc338e6680ec
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/VectorsFormatProvider.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.mapper.vectors;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.elasticsearch.index.IndexSettings;
+
+/**
+ * A service provider interface for obtaining Lucene {@link KnnVectorsFormat} instances.
+ * Plugins can implement this interface to provide custom vector formats
+ */
+public interface VectorsFormatProvider {
+
+    /**
+     * Returns a {@link KnnVectorsFormat} instance based on the provided index settings and vector index options.
+     * May return {@code null} if the provider does not support the format for the given index settings or vector index options.
+     *
+     * @param indexSettings The index settings.
+     * @param indexOptions The dense vector index options.
+     * @return A KnnVectorsFormat instance.
+     */
+    KnnVectorsFormat getKnnVectorsFormat(IndexSettings indexSettings, DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions);
+}
diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
index 98734e373ba17..ad1608bfdff27 100644
--- a/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
+++ b/server/src/main/java/org/elasticsearch/indices/IndicesModule.java
@@ -69,6 +69,7 @@
 import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
 import org.elasticsearch.index.seqno.RetentionLeaseBackgroundSyncAction;
 import org.elasticsearch.index.seqno.RetentionLeaseSyncAction;
 import org.elasticsearch.index.seqno.RetentionLeaseSyncer;
@@ -78,9 +79,11 @@
 import org.elasticsearch.injection.guice.AbstractModule;
 import org.elasticsearch.plugins.FieldPredicate;
 import org.elasticsearch.plugins.MapperPlugin;
+import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin;
 import org.elasticsearch.xcontent.NamedXContentRegistry;
 import org.elasticsearch.xcontent.ParseField;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
@@ -95,18 +98,23 @@
 public class IndicesModule extends AbstractModule {
     private final MapperRegistry mapperRegistry;
 
-    public IndicesModule(List<MapperPlugin> mapperPlugins, RootObjectMapperNamespaceValidator namespaceValidator) {
+    public IndicesModule(
+        List<MapperPlugin> mapperPlugins,
+        List<InternalVectorFormatProviderPlugin> vectorFormatProviderPlugins,
+        RootObjectMapperNamespaceValidator namespaceValidator
+    ) {
         this.mapperRegistry = new MapperRegistry(
             getMappers(mapperPlugins),
             getRuntimeFields(mapperPlugins),
             getMetadataMappers(mapperPlugins),
             getFieldFilter(mapperPlugins),
+            getVectorFormatProviders(vectorFormatProviderPlugins),
             namespaceValidator
         );
     }
 
     public IndicesModule(List<MapperPlugin> mapperPlugins) {
-        this(mapperPlugins, null);
+        this(mapperPlugins, Collections.emptyList(), null);
     }
 
     public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
@@ -227,6 +235,19 @@ public static Map<String, Mapper.TypeParser> getMappers(List<MapperPlugin> mappe
         return Collections.unmodifiableMap(mappers);
     }
 
+    private static List<VectorsFormatProvider> getVectorFormatProviders(
+        List<InternalVectorFormatProviderPlugin> vectorFormatProviderPlugins
+    ) {
+        List<VectorsFormatProvider> vectorsFormatProviders = new ArrayList<>();
+        for (InternalVectorFormatProviderPlugin plugin : vectorFormatProviderPlugins) {
+            VectorsFormatProvider vectorsFormatProvider = plugin.getVectorsFormatProvider();
+            if (vectorsFormatProvider != null) {
+                vectorsFormatProviders.add(vectorsFormatProvider);
+            }
+        }
+        return Collections.unmodifiableList(vectorsFormatProviders);
+    }
+
     private static Map<String, RuntimeField.Parser> getRuntimeFields(List<MapperPlugin> mapperPlugins) {
         Map<String, RuntimeField.Parser> runtimeParsers = new LinkedHashMap<>();
         runtimeParsers.put(BooleanFieldMapper.CONTENT_TYPE, BooleanScriptFieldType.PARSER);
diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java
index 548ee6f4da22e..7a598475fc456 100644
--- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java
+++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java
@@ -185,6 +185,7 @@
 import org.elasticsearch.plugins.TelemetryPlugin;
 import org.elasticsearch.plugins.internal.DocumentParsingProvider;
 import org.elasticsearch.plugins.internal.DocumentParsingProviderPlugin;
+import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin;
 import org.elasticsearch.plugins.internal.LoggingDataProvider;
 import org.elasticsearch.plugins.internal.ReloadAwarePlugin;
 import org.elasticsearch.plugins.internal.RestExtension;
@@ -822,7 +823,11 @@ private void construct(
             )::onNewInfo
         );
 
-        IndicesModule indicesModule = new IndicesModule(pluginsService.filterPlugins(MapperPlugin.class).toList(), namespaceValidator);
+        IndicesModule indicesModule = new IndicesModule(
+            pluginsService.filterPlugins(MapperPlugin.class).toList(),
+            pluginsService.filterPlugins(InternalVectorFormatProviderPlugin.class).toList(),
+            namespaceValidator
+        );
         modules.add(indicesModule);
 
         modules.add(new GatewayModule());
diff --git a/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java
new file mode 100644
index 0000000000000..84b3d964fd2ba
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/plugins/internal/InternalVectorFormatProviderPlugin.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.plugins.internal;
+
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
+
+public interface InternalVectorFormatProviderPlugin {
+
+    /**
+     * Returns {VectorFormatProvider} implementations added by this plugin.
+     */
+    default VectorsFormatProvider getVectorsFormatProvider() {
+        return null;
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java
index 074c495e53db2..42513896f84fd 100644
--- a/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java
+++ b/server/src/test/java/org/elasticsearch/cluster/metadata/IndexMetadataVerifierTests.java
@@ -299,7 +299,13 @@ private IndexMetadataVerifier getIndexMetadataVerifier() {
             Settings.EMPTY,
             null,
             xContentRegistry(),
-            new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER),
+            new MapperRegistry(
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                MapperPlugin.NOOP_FIELD_FILTER,
+                null
+            ),
             IndexScopedSettings.DEFAULT_SCOPED_SETTINGS,
             null,
             MapperMetrics.NOOP
diff --git a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java
index 691ca7682f30c..ea18734190d7f 100644
--- a/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java
+++ b/server/src/test/java/org/elasticsearch/index/IndexSettingsTests.java
@@ -857,7 +857,13 @@ public void testIndexMapperDynamic() {
             Settings.EMPTY,
             null,
             xContentRegistry(),
-            new MapperRegistry(Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), MapperPlugin.NOOP_FIELD_FILTER),
+            new MapperRegistry(
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                Collections.emptyMap(),
+                MapperPlugin.NOOP_FIELD_FILTER,
+                null
+            ),
             IndexScopedSettings.DEFAULT_SCOPED_SETTINGS,
             null,
             MapperMetrics.NOOP
@@ -915,4 +921,5 @@ public void testSame() {
         }
         assertTrue(IndexSettings.same(settings, differentOtherSettingBuilder.build()));
     }
+
 }
diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java
index a2ff440facaf0..331d84d9e4f61 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java
@@ -128,7 +128,8 @@ private CodecService createCodecService() throws IOException {
             Collections.emptyMap(),
             Collections.emptyMap(),
             Collections.emptyMap(),
-            MapperPlugin.NOOP_FIELD_FILTER
+            MapperPlugin.NOOP_FIELD_FILTER,
+            null
         );
         BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(settings, BitsetFilterCache.Listener.NOOP);
         MapperService service = new MapperService(
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java
index 6ab4432c1efd3..ef3607476fb47 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingParserTests.java
@@ -65,7 +65,8 @@ private static MappingParser createMappingParser(Settings settings, IndexVersion
             indexAnalyzers,
             indexSettings,
             indexSettings.getMode().idFieldMapperWithoutFieldData(),
-            bitsetFilterCache::getBitSetProducer
+            bitsetFilterCache::getBitSetProducer,
+            null
         );
 
         Map<String, MetadataFieldMapper.TypeParser> metadataMapperParsers = mapperRegistry.getMetadataMapperParsers(
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java
index a161bcbc5d6d2..154f7d774bc9a 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/ParametrizedMapperTests.java
@@ -272,7 +272,8 @@ private static TestMapper fromMapping(
             mapperService.getIndexSettings().getMode().idFieldMapperWithoutFieldData(),
             query -> {
                 throw new UnsupportedOperationException();
-            }
+            },
+            null
         );
         if (fromDynamicTemplate) {
             pc = pc.createDynamicTemplateContext(null);
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java
index 9a30e7d696b68..5b0f823ac1e17 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/TypeParsersTests.java
@@ -106,7 +106,8 @@ public void testMultiFieldWithinMultiField() throws IOException {
             ProvidedIdFieldMapper.NO_FIELD_DATA,
             query -> {
                 throw new UnsupportedOperationException();
-            }
+            },
+            null
         );
 
         IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> {
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java
index e3a3a43dc591e..eed68d4c3ac0c 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java
@@ -2176,7 +2176,7 @@ public void testValidateOnBuild() {
 
         int dimensions = randomIntBetween(64, 1024);
         // Build a dense vector field mapper with float element type, which will trigger int8 HNSW index options
-        DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false).elementType(
+        DenseVectorFieldMapper mapper = new DenseVectorFieldMapper.Builder("test", IndexVersion.current(), false, List.of()).elementType(
             ElementType.FLOAT
         ).dimensions(dimensions).build(context);
 
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java
index b56b66767c7d7..75d7e8609bf56 100644
--- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java
+++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java
@@ -63,13 +63,39 @@ private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() {
         return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false));
     }
 
-    private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() {
+    private static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() {
         return randomFrom(
             new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000)),
             new DenseVectorFieldMapper.FlatIndexOptions()
         );
     }
 
+    public static DenseVectorFieldMapper.DenseVectorIndexOptions randomFlatIndexOptions() {
+        return randomFrom(
+            new DenseVectorFieldMapper.FlatIndexOptions(),
+            new DenseVectorFieldMapper.Int8FlatIndexOptions(
+                randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)),
+                randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector())
+            ),
+            new DenseVectorFieldMapper.Int4FlatIndexOptions(
+                randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)),
+                randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector())
+            )
+        );
+    }
+
+    public static DenseVectorFieldMapper.DenseVectorIndexOptions randomGpuSupportedIndexOptions() {
+        return randomFrom(
+            new DenseVectorFieldMapper.HnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 3199)),
+            new DenseVectorFieldMapper.Int8HnswIndexOptions(
+                randomIntBetween(1, 100),
+                randomIntBetween(1, 3199),
+                randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)),
+                randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector())
+            )
+        );
+    }
+
     public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsAll() {
         List<DenseVectorFieldMapper.DenseVectorIndexOptions> options = new ArrayList<>(
             Arrays.asList(
diff --git a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java
index 9bb56c71f4324..9c7f9a6d1e35e 100644
--- a/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java
+++ b/server/src/test/java/org/elasticsearch/index/query/SearchExecutionContextTests.java
@@ -630,7 +630,7 @@ private static MapperService createMapperServiceWithNamespaceValidator(
         RootObjectMapperNamespaceValidator namespaceValidator
     ) {
         IndexAnalyzers indexAnalyzers = IndexAnalyzers.of(singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, null)));
-        IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), namespaceValidator);
+        IndicesModule indicesModule = new IndicesModule(Collections.emptyList(), Collections.emptyList(), namespaceValidator);
         MapperRegistry mapperRegistry = indicesModule.getMapperRegistry();
         Supplier<SearchExecutionContext> searchExecutionContextSupplier = () -> { throw new UnsupportedOperationException(); };
         MapperService mapperService = mock(MapperService.class);
@@ -650,6 +650,7 @@ private static MapperService createMapperServiceWithNamespaceValidator(
                 query -> {
                     throw new UnsupportedOperationException();
                 },
+                null,
                 namespaceValidator
             )
         );
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java
index 4261e5845f3a8..8ccab79c1c6ae 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java
@@ -63,6 +63,7 @@
 import org.elasticsearch.plugins.MapperPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.plugins.TelemetryPlugin;
+import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptCompiler;
 import org.elasticsearch.script.ScriptContext;
@@ -312,6 +313,10 @@ public MapperService build() {
             SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
             MapperRegistry mapperRegistry = new IndicesModule(
                 plugins.stream().filter(p -> p instanceof MapperPlugin).map(p -> (MapperPlugin) p).collect(toList()),
+                plugins.stream()
+                    .filter(p -> p instanceof InternalVectorFormatProviderPlugin)
+                    .map(p -> (InternalVectorFormatProviderPlugin) p)
+                    .collect(toList()),
                 namespaceValidator
             ).getMapperRegistry();
 
diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java
index 799f0da58f827..f11e428f1274c 100644
--- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java
+++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestDocumentParserContext.java
@@ -68,7 +68,8 @@ private TestDocumentParserContext(MappingLookup mappingLookup, SourceToParse sou
                 null,
                 query -> {
                     throw new UnsupportedOperationException();
-                }
+                },
+                null
             ),
             source,
             mappingLookup.getMapping().getRoot(),
diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
index 0d89d40d982c5..f199fcaabd29b 100644
--- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
@@ -1410,7 +1410,8 @@ private static class MockParserContext extends MappingParserContext {
                 null,
                 query -> {
                     throw new UnsupportedOperationException();
-                }
+                },
+                null
             );
         }
 
diff --git a/x-pack/plugin/gpu/README b/x-pack/plugin/gpu/README
new file mode 100644
index 0000000000000..a56d18aef7ec5
--- /dev/null
+++ b/x-pack/plugin/gpu/README
@@ -0,0 +1,51 @@
+
+
+For local development on mac, where cuVS is not easily built one can
+minimally get an IDEA environment with compile-time support by building
+a minimal cuvs-java "api" jar. Test cannot be run.
+
+1. Clone cuvs
+   git clone https://github.com/rapidsai/cuvs/; cd cuvs
+   git checkout branch-25.08
+   cd java/cuvs-java
+
+2. Remove the implementation compile target from the pom.xml, e.g.
+
+$ git diff
+diff --git a/java/cuvs-java/pom.xml b/java/cuvs-java/pom.xml
+index 15e0193a..446f2b61 100644
+--- a/java/cuvs-java/pom.xml
++++ b/java/cuvs-java/pom.xml
+@@ -123,20 +123,6 @@
+                             </compileSourceRoots>
+                         </configuration>
+                     </execution>
+-                    <execution>
+-                        <id>compile-java-22</id>
+-                        <phase>compile</phase>
+-                        <goals>
+-                            <goal>compile</goal>
+-                        </goals>
+-                        <configuration>
+-                            <release>22</release>
+-                            <compileSourceRoots>
+-                                <compileSourceRoot>${project.basedir}/src/main/java22</compileSourceRoot>
+-                            </compileSourceRoots>
+-                            <multiReleaseOutput>true</multiReleaseOutput>
+-                        </configuration>
+-                    </execution>
+                 </executions>
+             </plugin>
+             <plugin>
+
+3. Build and install into local maven repository
+
+  export JAVA_HOME=/Users/chegar/binaries/jdk-22.0.2.jdk/Contents/Home/
+  cd java/cuvs-java
+  mvn install
+  Installs into maven local repository, e.g:
+    /Users/chegar/.m2/repository/com/nvidia/cuvs/cuvs-java/25.08.0/
+
+  Might need to modify gradle metadata sha in gradle/verification-metadata.xml
+
+
diff --git a/x-pack/plugin/gpu/build.gradle b/x-pack/plugin/gpu/build.gradle
new file mode 100644
index 0000000000000..3b9330371fc47
--- /dev/null
+++ b/x-pack/plugin/gpu/build.gradle
@@ -0,0 +1,36 @@
+apply plugin: 'elasticsearch.internal-es-plugin'
+apply plugin: 'elasticsearch.internal-cluster-test'
+apply plugin: 'elasticsearch.internal-yaml-rest-test'
+apply plugin: 'elasticsearch.mrjar'
+
+esplugin {
+  name = 'gpu'
+  description = 'A plugin for doing vector search in GPU'
+  classname = 'org.elasticsearch.xpack.gpu.GPUPlugin'
+  extendedPlugins = ['x-pack-core']
+}
+base {
+  archivesName = 'x-pack-gpu'
+}
+
+repositories {
+  maven {
+    url = uri("https://storage.googleapis.com/elasticsearch-cuvs-snapshots")
+  }
+}
+
+dependencies {
+  compileOnly project(path: xpackModule('core'))
+  compileOnly project(':server')
+  implementation('com.nvidia.cuvs:cuvs-java:25.10.0') {
+    changing = true // Ensure that we get updates even when the version number doesn't change. We can remove this once things stabilize
+  }
+  testImplementation(testArtifact(project(xpackModule('core'))))
+  testImplementation(testArtifact(project(':server')))
+  yamlRestTestImplementation(project(xpackModule('gpu')))
+  clusterModules project(xpackModule('gpu'))
+}
+
+artifacts {
+  restXpackTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test"))
+}
diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt
new file mode 100644
index 0000000000000..1a89b9054d669
--- /dev/null
+++ b/x-pack/plugin/gpu/licenses/cuvs-java-LICENSE.txt
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020 NVIDIA Corporation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt b/x-pack/plugin/gpu/licenses/cuvs-java-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java
new file mode 100644
index 0000000000000..d0f8e85ef6070
--- /dev/null
+++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/CuVSProviderDelegate.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.plugin.gpu;
+
+import com.nvidia.cuvs.BruteForceIndex;
+import com.nvidia.cuvs.CagraIndex;
+import com.nvidia.cuvs.CuVSDeviceMatrix;
+import com.nvidia.cuvs.CuVSHostMatrix;
+import com.nvidia.cuvs.CuVSMatrix;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.GPUInfoProvider;
+import com.nvidia.cuvs.HnswIndex;
+import com.nvidia.cuvs.TieredIndex;
+import com.nvidia.cuvs.spi.CuVSProvider;
+
+import java.lang.invoke.MethodHandle;
+import java.nio.file.Path;
+
+class CuVSProviderDelegate implements CuVSProvider {
+    private final CuVSProvider delegate;
+
+    CuVSProviderDelegate(CuVSProvider delegate) {
+        this.delegate = delegate;
+    }
+
+    @Override
+    public CuVSResources newCuVSResources(Path path) throws Throwable {
+        return delegate.newCuVSResources(path);
+    }
+
+    @Override
+    public CuVSMatrix.Builder<CuVSHostMatrix> newHostMatrixBuilder(long l, long l1, CuVSMatrix.DataType dataType) {
+        return delegate.newHostMatrixBuilder(l, l1, dataType);
+    }
+
+    @Override
+    public CuVSMatrix.Builder<CuVSDeviceMatrix> newDeviceMatrixBuilder(
+        CuVSResources cuVSResources,
+        long l,
+        long l1,
+        CuVSMatrix.DataType dataType
+    ) {
+        return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, dataType);
+    }
+
+    @Override
+    public CuVSMatrix.Builder<CuVSDeviceMatrix> newDeviceMatrixBuilder(
+        CuVSResources cuVSResources,
+        long l,
+        long l1,
+        int i,
+        int i1,
+        CuVSMatrix.DataType dataType
+    ) {
+        return delegate.newDeviceMatrixBuilder(cuVSResources, l, l1, i, i1, dataType);
+    }
+
+    @Override
+    public MethodHandle newNativeMatrixBuilder() {
+        return delegate.newNativeMatrixBuilder();
+    }
+
+    @Override
+    public CuVSMatrix newMatrixFromArray(float[][] floats) {
+        return delegate.newMatrixFromArray(floats);
+    }
+
+    @Override
+    public CuVSMatrix newMatrixFromArray(int[][] ints) {
+        return delegate.newMatrixFromArray(ints);
+    }
+
+    @Override
+    public CuVSMatrix newMatrixFromArray(byte[][] bytes) {
+        return delegate.newMatrixFromArray(bytes);
+    }
+
+    @Override
+    public BruteForceIndex.Builder newBruteForceIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException {
+        return delegate.newBruteForceIndexBuilder(cuVSResources);
+    }
+
+    @Override
+    public CagraIndex.Builder newCagraIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException {
+        return delegate.newCagraIndexBuilder(cuVSResources);
+    }
+
+    @Override
+    public HnswIndex.Builder newHnswIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException {
+        return delegate.newHnswIndexBuilder(cuVSResources);
+    }
+
+    @Override
+    public TieredIndex.Builder newTieredIndexBuilder(CuVSResources cuVSResources) throws UnsupportedOperationException {
+        return delegate.newTieredIndexBuilder(cuVSResources);
+    }
+
+    @Override
+    public CagraIndex mergeCagraIndexes(CagraIndex[] cagraIndices) throws Throwable {
+        return delegate.mergeCagraIndexes(cagraIndices);
+    }
+
+    @Override
+    public GPUInfoProvider gpuInfoProvider() {
+        return delegate.gpuInfoProvider();
+    }
+}
diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java
new file mode 100644
index 0000000000000..b00d8d83143a9
--- /dev/null
+++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUIndexIT.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.plugin.gpu;
+
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.bulk.BulkResponse;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.vectors.KnnSearchBuilder;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.xpack.gpu.GPUPlugin;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Locale;
+
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse;
+
+@LuceneTestCase.SuppressCodecs("*") // use our custom codec
+public class GPUIndexIT extends ESIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return List.of(GPUPlugin.class);
+    }
+
+    @BeforeClass
+    public static void checkGPUSupport() {
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+    }
+
+    public void testBasic() {
+        String indexName = "index1";
+        final int dims = randomIntBetween(4, 128);
+        final int[] numDocs = new int[] { randomIntBetween(1, 100), 1, 2, randomIntBetween(1, 100) };
+        createIndex(indexName, dims, false);
+        int totalDocs = 0;
+        for (int i = 0; i < numDocs.length; i++) {
+            indexDocs(indexName, numDocs[i], dims, i * 100);
+            totalDocs += numDocs[i];
+        }
+        refresh();
+        assertSearch(indexName, randomFloatVector(dims), totalDocs);
+    }
+
+    @AwaitsFix(bugUrl = "Fix sorted index")
+    public void testSortedIndexReturnsSameResultsAsUnsorted() {
+        String indexName1 = "index_unsorted";
+        String indexName2 = "index_sorted";
+        final int dims = randomIntBetween(4, 128);
+        createIndex(indexName1, dims, false);
+        createIndex(indexName2, dims, true);
+
+        final int[] numDocs = new int[] { randomIntBetween(50, 100), randomIntBetween(50, 100) };
+        for (int i = 0; i < numDocs.length; i++) {
+            BulkRequestBuilder bulkRequest1 = client().prepareBulk();
+            BulkRequestBuilder bulkRequest2 = client().prepareBulk();
+            for (int j = 0; j < numDocs[i]; j++) {
+                String id = String.valueOf(i * 100 + j);
+                String keywordValue = String.valueOf(numDocs[i] - j);
+                float[] vector = randomFloatVector(dims);
+                bulkRequest1.add(prepareIndex(indexName1).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue));
+                bulkRequest2.add(prepareIndex(indexName2).setId(id).setSource("my_vector", vector, "my_keyword", keywordValue));
+            }
+            BulkResponse bulkResponse1 = bulkRequest1.get();
+            assertFalse("Bulk request failed: " + bulkResponse1.buildFailureMessage(), bulkResponse1.hasFailures());
+            BulkResponse bulkResponse2 = bulkRequest2.get();
+            assertFalse("Bulk request failed: " + bulkResponse2.buildFailureMessage(), bulkResponse2.hasFailures());
+        }
+        refresh();
+
+        float[] queryVector = randomFloatVector(dims);
+        int k = 10;
+        int numCandidates = k * 10;
+
+        var searchResponse1 = prepareSearch(indexName1).setSize(k)
+            .setFetchSource(false)
+            .addFetchField("my_keyword")
+            .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null)))
+            .get();
+
+        var searchResponse2 = prepareSearch(indexName2).setSize(k)
+            .setFetchSource(false)
+            .addFetchField("my_keyword")
+            .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null)))
+            .get();
+
+        try {
+            SearchHit[] hits1 = searchResponse1.getHits().getHits();
+            SearchHit[] hits2 = searchResponse2.getHits().getHits();
+            Assert.assertEquals(hits1.length, hits2.length);
+            for (int i = 0; i < hits1.length; i++) {
+                Assert.assertEquals(hits1[i].getId(), hits2[i].getId());
+                Assert.assertEquals(hits1[i].field("my_keyword").getValue(), (String) hits2[i].field("my_keyword").getValue());
+                Assert.assertEquals(hits1[i].getScore(), hits2[i].getScore(), 0.001f);
+            }
+        } finally {
+            searchResponse1.decRef();
+            searchResponse2.decRef();
+        }
+
+        // Force merge and search again
+        assertNoFailures(indicesAdmin().prepareForceMerge(indexName1).get());
+        assertNoFailures(indicesAdmin().prepareForceMerge(indexName2).get());
+        ensureGreen();
+
+        var searchResponse3 = prepareSearch(indexName1).setSize(k)
+            .setFetchSource(false)
+            .addFetchField("my_keyword")
+            .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null)))
+            .get();
+
+        var searchResponse4 = prepareSearch(indexName2).setSize(k)
+            .setFetchSource(false)
+            .addFetchField("my_keyword")
+            .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null)))
+            .get();
+
+        try {
+            SearchHit[] hits3 = searchResponse3.getHits().getHits();
+            SearchHit[] hits4 = searchResponse4.getHits().getHits();
+            Assert.assertEquals(hits3.length, hits4.length);
+            for (int i = 0; i < hits3.length; i++) {
+                Assert.assertEquals(hits3[i].getId(), hits4[i].getId());
+                Assert.assertEquals(hits3[i].field("my_keyword").getValue(), (String) hits4[i].field("my_keyword").getValue());
+                Assert.assertEquals(hits3[i].getScore(), hits4[i].getScore(), 0.01f);
+            }
+        } finally {
+            searchResponse3.decRef();
+            searchResponse4.decRef();
+        }
+    }
+
+    public void testSearchWithoutGPU() {
+        String indexName = "index1";
+        final int dims = randomIntBetween(4, 128);
+        final int numDocs = randomIntBetween(1, 500);
+        createIndex(indexName, dims, false);
+        ensureGreen();
+
+        indexDocs(indexName, numDocs, dims, 0);
+        refresh();
+
+        // update settings to disable GPU usage
+        Settings.Builder settingsBuilder = Settings.builder().put("index.vectors.indexing.use_gpu", false);
+        assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(settingsBuilder.build()));
+        ensureGreen();
+        assertSearch(indexName, randomFloatVector(dims), numDocs);
+    }
+
+    private void createIndex(String indexName, int dims, boolean sorted) {
+        var settings = Settings.builder().put(indexSettings());
+        settings.put("index.number_of_shards", 1);
+        settings.put("index.vectors.indexing.use_gpu", true);
+        if (sorted) {
+            settings.put("index.sort.field", "my_keyword");
+        }
+
+        String type = randomFrom("hnsw", "int8_hnsw");
+        String mapping = String.format(Locale.ROOT, """
+            {
+              "properties": {
+                "my_vector": {
+                  "type": "dense_vector",
+                  "dims": %d,
+                  "similarity": "l2_norm",
+                  "index_options": {
+                    "type": "%s"
+                  }
+                },
+                "my_keyword": {
+                  "type": "keyword"
+                }
+              }
+            }
+            """, dims, type);
+        assertAcked(prepareCreate(indexName).setSettings(settings.build()).setMapping(mapping));
+        ensureGreen();
+    }
+
+    private void indexDocs(String indexName, int numDocs, int dims, int startDoc) {
+        BulkRequestBuilder bulkRequest = client().prepareBulk();
+        for (int i = 0; i < numDocs; i++) {
+            String id = String.valueOf(startDoc + i);
+            String keywordValue = String.valueOf(numDocs - i);
+            var indexRequest = prepareIndex(indexName).setId(id)
+                .setSource("my_vector", randomFloatVector(dims), "my_keyword", keywordValue);
+            bulkRequest.add(indexRequest);
+        }
+        BulkResponse bulkResponse = bulkRequest.get();
+        assertFalse("Bulk request failed: " + bulkResponse.buildFailureMessage(), bulkResponse.hasFailures());
+    }
+
+    private void assertSearch(String indexName, float[] queryVector, int totalDocs) {
+        int k = Math.min(randomIntBetween(1, 20), totalDocs);
+        int numCandidates = k * 10;
+        assertNoFailuresAndResponse(
+            prepareSearch(indexName).setSize(k)
+                .setFetchSource(false)
+                .addFetchField("my_keyword")
+                .setKnnSearch(List.of(new KnnSearchBuilder("my_vector", queryVector, k, numCandidates, null, null, null))),
+            response -> assertEquals("Expected k hits to be returned", k, response.getHits().getHits().length)
+        );
+    }
+
+    private static float[] randomFloatVector(int dims) {
+        float[] vector = new float[dims];
+        for (int i = 0; i < dims; i++) {
+            vector[i] = randomFloat();
+        }
+        return vector;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java
new file mode 100644
index 0000000000000..65d8daf14d31e
--- /dev/null
+++ b/x-pack/plugin/gpu/src/internalClusterTest/java/org/elasticsearch/plugin/gpu/GPUPluginInitializationIT.java
@@ -0,0 +1,283 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.plugin.gpu;
+
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.CuVSResourcesInfo;
+import com.nvidia.cuvs.GPUInfo;
+import com.nvidia.cuvs.GPUInfoProvider;
+import com.nvidia.cuvs.spi.CuVSProvider;
+import com.nvidia.cuvs.spi.CuVSServiceProvider;
+
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.IndexService;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldTypeTests;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
+import org.elasticsearch.indices.IndicesService;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.elasticsearch.xpack.gpu.GPUPlugin;
+import org.junit.After;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.function.Function;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.startsWith;
+
+public class GPUPluginInitializationIT extends ESIntegTestCase {
+
+    private static final Function<CuVSProvider, GPUInfoProvider> SUPPORTED_GPU_PROVIDER =
+        p -> new TestCuVSServiceProvider.TestGPUInfoProvider(
+            List.of(
+                new GPUInfo(
+                    0,
+                    "TestGPU",
+                    8 * 1024 * 1024 * 1024L,
+                    GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR,
+                    GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR,
+                    true,
+                    true
+                )
+            )
+        );
+
+    private static final Function<CuVSProvider, GPUInfoProvider> NO_GPU_PROVIDER = p -> new TestCuVSServiceProvider.TestGPUInfoProvider(
+        List.of()
+    );
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return List.of(GPUPlugin.class);
+    }
+
+    public static class TestCuVSServiceProvider extends CuVSServiceProvider {
+
+        static final Function<CuVSProvider, GPUInfoProvider> BUILTIN_GPU_INFO_PROVIDER = CuVSProvider::gpuInfoProvider;
+        static Function<CuVSProvider, GPUInfoProvider> mockedGPUInfoProvider = BUILTIN_GPU_INFO_PROVIDER;
+
+        @Override
+        public CuVSProvider get(CuVSProvider builtin) {
+            return new CuVSProviderDelegate(builtin) {
+                @Override
+                public GPUInfoProvider gpuInfoProvider() {
+                    return mockedGPUInfoProvider.apply(builtin);
+                }
+            };
+        }
+
+        private static class TestGPUInfoProvider implements GPUInfoProvider {
+            private final List<GPUInfo> gpuList;
+
+            private TestGPUInfoProvider(List<GPUInfo> gpuList) {
+                this.gpuList = gpuList;
+            }
+
+            @Override
+            public List<GPUInfo> availableGPUs() {
+                return gpuList;
+            }
+
+            @Override
+            public List<GPUInfo> compatibleGPUs() {
+                return gpuList;
+            }
+
+            @Override
+            public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) {
+                return null;
+            }
+        }
+    }
+
+    @After
+    public void disableMock() {
+        TestCuVSServiceProvider.mockedGPUInfoProvider = TestCuVSServiceProvider.BUILTIN_GPU_INFO_PROVIDER;
+    }
+
+    public void testFFOff() {
+        assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled());
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(null, null);
+        assertNull(format);
+    }
+
+    public void testFFOffIndexSettingNotSupported() {
+        assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        IllegalArgumentException exception = expectThrows(
+            IllegalArgumentException.class,
+            () -> createIndex(
+                "index1",
+                Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build()
+            )
+        );
+        assertThat(exception.getMessage(), containsString("unknown setting [index.vectors.indexing.use_gpu]"));
+    }
+
+    public void testFFOffGPUFormatNull() {
+        assumeFalse("GPU_FORMAT feature flag disabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.EMPTY);
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNull(format);
+    }
+
+    public void testIndexSettingOnIndexTypeSupportedGPUSupported() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNotNull(format);
+    }
+
+    public void testIndexSettingOnIndexTypeNotSupportedThrows() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions();
+
+        var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions));
+        assertThat(ex.getMessage(), startsWith("[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of"));
+    }
+
+    public void testIndexSettingOnGPUNotSupportedThrows() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions));
+        assertThat(
+            ex.getMessage(),
+            equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.")
+        );
+    }
+
+    public void testIndexSettingOnGPUSupportThrowsRethrows() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        // Mocks a cuvs-java UnsupportedProvider
+        TestCuVSServiceProvider.mockedGPUInfoProvider = p -> { throw new UnsupportedOperationException("cuvs-java UnsupportedProvider"); };
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.TRUE).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var ex = expectThrows(IllegalArgumentException.class, () -> vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions));
+        assertThat(
+            ex.getMessage(),
+            equalTo("[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node.")
+        );
+    }
+
+    public void testIndexSettingAutoIndexTypeSupportedGPUSupported() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNotNull(format);
+    }
+
+    public void testIndexSettingAutoGPUNotSupported() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = NO_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNull(format);
+    }
+
+    public void testIndexSettingAutoIndexTypeNotSupported() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.AUTO).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomFlatIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNull(format);
+    }
+
+    public void testIndexSettingOff() {
+        assumeTrue("GPU_FORMAT feature flag enabled", GPUPlugin.GPU_FORMAT.isEnabled());
+        TestCuVSServiceProvider.mockedGPUInfoProvider = SUPPORTED_GPU_PROVIDER;
+
+        GPUPlugin gpuPlugin = internalCluster().getInstance(GPUPlugin.class);
+        VectorsFormatProvider vectorsFormatProvider = gpuPlugin.getVectorsFormatProvider();
+
+        createIndex("index1", Settings.builder().put(GPUPlugin.VECTORS_INDEXING_USE_GPU_SETTING.getKey(), GPUPlugin.GpuMode.FALSE).build());
+        IndexSettings settings = getIndexSettings();
+        final var indexOptions = DenseVectorFieldTypeTests.randomGpuSupportedIndexOptions();
+
+        var format = vectorsFormatProvider.getKnnVectorsFormat(settings, indexOptions);
+        assertNull(format);
+    }
+
+    private IndexSettings getIndexSettings() {
+        ensureGreen("index1");
+        IndexSettings settings = null;
+        for (IndicesService service : internalCluster().getInstances(IndicesService.class)) {
+            IndexService indexService = service.indexService(resolveIndex("index1"));
+            if (indexService != null) {
+                settings = indexService.getIndexSettings();
+                break;
+            }
+        }
+        assertNotNull(settings);
+        return settings;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider
new file mode 100644
index 0000000000000..d5d524f49aa81
--- /dev/null
+++ b/x-pack/plugin/gpu/src/internalClusterTest/resources/META-INF/services/com.nvidia.cuvs.spi.CuVSServiceProvider
@@ -0,0 +1,8 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0; you may not use this file except in compliance with the Elastic License
+# 2.0.
+#
+
+org.elasticsearch.plugin.gpu.GPUPluginInitializationIT$TestCuVSServiceProvider
diff --git a/x-pack/plugin/gpu/src/main/java/module-info.java b/x-pack/plugin/gpu/src/main/java/module-info.java
new file mode 100644
index 0000000000000..dcada289c1376
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/module-info.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat;
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat;
+
+/** Provides GPU-accelerated support for vector indexing. */
+module org.elasticsearch.gpu {
+    requires org.elasticsearch.logging;
+    requires org.apache.lucene.core;
+    requires org.elasticsearch.xcontent;
+    requires org.elasticsearch.server;
+    requires org.elasticsearch.base;
+    requires com.nvidia.cuvs;
+
+    exports org.elasticsearch.xpack.gpu.codec;
+
+    provides org.elasticsearch.features.FeatureSpecification with org.elasticsearch.xpack.gpu.GPUFeatures;
+    provides org.apache.lucene.codecs.KnnVectorsFormat with ES92GpuHnswVectorsFormat, ES92GpuHnswSQVectorsFormat;
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java
new file mode 100644
index 0000000000000..cf9ed7b7e5a46
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUFeatures.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu;
+
+import org.elasticsearch.features.FeatureSpecification;
+import org.elasticsearch.features.NodeFeature;
+
+import java.util.Set;
+
+public class GPUFeatures implements FeatureSpecification {
+
+    public static final NodeFeature VECTORS_INDEXING_USE_GPU = new NodeFeature("vectors.indexing.use_gpu");
+
+    @Override
+    public Set<NodeFeature> getFeatures() {
+        return Set.of();
+    }
+
+    @Override
+    public Set<NodeFeature> getTestFeatures() {
+        return Set.of(VECTORS_INDEXING_USE_GPU);
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java
new file mode 100644
index 0000000000000..62190bc0fb752
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUPlugin.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.gpu;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.util.hnsw.HnswGraphBuilder;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.common.util.FeatureFlag;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
+import org.elasticsearch.index.mapper.vectors.VectorsFormatProvider;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.plugins.internal.InternalVectorFormatProviderPlugin;
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat;
+import org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat;
+
+import java.util.List;
+
+public class GPUPlugin extends Plugin implements InternalVectorFormatProviderPlugin {
+
+    public static final FeatureFlag GPU_FORMAT = new FeatureFlag("gpu_vectors_indexing");
+
+    /**
+     * An enum for the tri-state value of the `index.vectors.indexing.use_gpu` setting.
+     */
+    public enum GpuMode {
+        TRUE,
+        FALSE,
+        AUTO
+    }
+
+    /**
+     * Setting to control whether to use GPU for vectors indexing.
+     * Currently only applicable for index_options.type: hnsw.
+     *
+     * If unset or "auto", an automatic decision is made based on the presence of GPU, necessary libraries, vectors' index type.
+     * If set to <code>true</code>, GPU must be used for vectors indexing, and if GPU or necessary libraries are not available,
+     * an exception will be thrown.
+     * If set to <code>false</code>, GPU will not be used for vectors indexing.
+     */
+    public static final Setting<GpuMode> VECTORS_INDEXING_USE_GPU_SETTING = Setting.enumSetting(
+        GpuMode.class,
+        "index.vectors.indexing.use_gpu",
+        GpuMode.AUTO,
+        Setting.Property.IndexScope,
+        Setting.Property.Dynamic
+    );
+
+    @Override
+    public List<Setting<?>> getSettings() {
+        if (GPU_FORMAT.isEnabled()) {
+            return List.of(VECTORS_INDEXING_USE_GPU_SETTING);
+        } else {
+            return List.of();
+        }
+    }
+
+    @Override
+    public VectorsFormatProvider getVectorsFormatProvider() {
+        return (indexSettings, indexOptions) -> {
+            if (GPU_FORMAT.isEnabled()) {
+                GpuMode gpuMode = indexSettings.getValue(VECTORS_INDEXING_USE_GPU_SETTING);
+                if (gpuMode == GpuMode.TRUE) {
+                    if (vectorIndexTypeSupported(indexOptions.getType()) == false) {
+                        throw new IllegalArgumentException(
+                            "[index.vectors.indexing.use_gpu] doesn't support [index_options.type] of [" + indexOptions.getType() + "]."
+                        );
+                    }
+                    if (GPUSupport.isSupported(true) == false) {
+                        throw new IllegalArgumentException(
+                            "[index.vectors.indexing.use_gpu] was set to [true], but GPU resources are not accessible on the node."
+                        );
+                    }
+                    return getVectorsFormat(indexOptions);
+                }
+                if (gpuMode == GpuMode.AUTO && vectorIndexTypeSupported(indexOptions.getType()) && GPUSupport.isSupported(false)) {
+                    return getVectorsFormat(indexOptions);
+                }
+            }
+            return null;
+        };
+    }
+
+    private boolean vectorIndexTypeSupported(DenseVectorFieldMapper.VectorIndexType type) {
+        return type == DenseVectorFieldMapper.VectorIndexType.HNSW || type == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW;
+    }
+
+    private static KnnVectorsFormat getVectorsFormat(DenseVectorFieldMapper.DenseVectorIndexOptions indexOptions) {
+        if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.HNSW) {
+            DenseVectorFieldMapper.HnswIndexOptions hnswIndexOptions = (DenseVectorFieldMapper.HnswIndexOptions) indexOptions;
+            int efConstruction = hnswIndexOptions.efConstruction();
+            if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) {
+                efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128
+            }
+            return new ES92GpuHnswVectorsFormat(hnswIndexOptions.m(), efConstruction);
+        } else if (indexOptions.getType() == DenseVectorFieldMapper.VectorIndexType.INT8_HNSW) {
+            DenseVectorFieldMapper.Int8HnswIndexOptions int8HnswIndexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) indexOptions;
+            int efConstruction = int8HnswIndexOptions.efConstruction();
+            if (efConstruction == HnswGraphBuilder.DEFAULT_BEAM_WIDTH) {
+                efConstruction = ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH; // default value for GPU graph construction is 128
+            }
+            return new ES92GpuHnswSQVectorsFormat(
+                int8HnswIndexOptions.m(),
+                efConstruction,
+                int8HnswIndexOptions.confidenceInterval(),
+                7,
+                false
+            );
+        } else {
+            throw new IllegalArgumentException(
+                "GPU vector indexing is not supported on this vector type: [" + indexOptions.getType() + "]"
+            );
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
new file mode 100644
index 0000000000000..c21bda894790a
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/GPUSupport.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu;
+
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.GPUInfoProvider;
+import com.nvidia.cuvs.spi.CuVSProvider;
+
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+
+public class GPUSupport {
+
+    private static final Logger LOG = LogManager.getLogger(GPUSupport.class);
+
+    // Set the minimum at 7.5GB: 8GB GPUs (which are our targeted minimum) report less than that via the API
+    private static final long MIN_DEVICE_MEMORY_IN_BYTES = 8053063680L;
+
+    /** Tells whether the platform supports cuvs. */
+    public static boolean isSupported(boolean logError) {
+        try {
+            var gpuInfoProvider = CuVSProvider.provider().gpuInfoProvider();
+            var availableGPUs = gpuInfoProvider.availableGPUs();
+            if (availableGPUs.isEmpty()) {
+                if (logError) {
+                    LOG.warn("No GPU found");
+                }
+                return false;
+            }
+
+            for (var gpu : availableGPUs) {
+                if (gpu.computeCapabilityMajor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
+                    || (gpu.computeCapabilityMajor() == GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR
+                        && gpu.computeCapabilityMinor() < GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR)) {
+                    if (logError) {
+                        LOG.warn(
+                            "GPU [{}] does not have the minimum compute capabilities (required: [{}.{}], found: [{}.{}])",
+                            gpu.name(),
+                            GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MAJOR,
+                            GPUInfoProvider.MIN_COMPUTE_CAPABILITY_MINOR,
+                            gpu.computeCapabilityMajor(),
+                            gpu.computeCapabilityMinor()
+                        );
+                    }
+                } else if (gpu.totalDeviceMemoryInBytes() < MIN_DEVICE_MEMORY_IN_BYTES) {
+                    if (logError) {
+                        LOG.warn(
+                            "GPU [{}] does not have minimum memory required (required: [{}], found: [{}])",
+                            gpu.name(),
+                            MIN_DEVICE_MEMORY_IN_BYTES,
+                            gpu.totalDeviceMemoryInBytes()
+                        );
+                    }
+                } else {
+                    if (logError) {
+                        LOG.info("Found compatible GPU [{}] (id: [{}])", gpu.name(), gpu.gpuId());
+                    }
+                    return true;
+                }
+            }
+
+        } catch (UnsupportedOperationException uoe) {
+            if (logError) {
+                final String msg;
+                if (uoe.getMessage() == null) {
+                    msg = Strings.format(
+                        "runtime Java version [%d], OS [%s], arch [%s]",
+                        Runtime.version().feature(),
+                        System.getProperty("os.name"),
+                        System.getProperty("os.arch")
+                    );
+                } else {
+                    msg = uoe.getMessage();
+                }
+                LOG.warn("GPU based vector indexing is not supported on this platform; " + msg);
+            }
+        } catch (Throwable t) {
+            if (logError) {
+                if (t instanceof ExceptionInInitializerError ex) {
+                    t = ex.getCause();
+                }
+                LOG.warn("Exception occurred during creation of cuvs resources", t);
+            }
+        }
+        return false;
+    }
+
+    /** Returns a resources if supported, otherwise null. */
+    public static CuVSResources cuVSResourcesOrNull(boolean logError) {
+        try {
+            var resources = CuVSResources.create();
+            return resources;
+        } catch (UnsupportedOperationException uoe) {
+            if (logError) {
+                String msg = "";
+                if (uoe.getMessage() == null) {
+                    msg = "Runtime Java version: " + Runtime.version().feature();
+                } else {
+                    msg = ": " + uoe.getMessage();
+                }
+                LOG.warn("GPU based vector indexing is not supported on this platform or java version; " + msg);
+            }
+        } catch (Throwable t) {
+            if (logError) {
+                if (t instanceof ExceptionInInitializerError ex) {
+                    t = ex.getCause();
+                }
+                LOG.warn("Exception occurred during creation of cuvs resources", t);
+            }
+        }
+        return null;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
new file mode 100644
index 0000000000000..44240a848268b
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManager.java
@@ -0,0 +1,270 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CuVSMatrix;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.GPUInfoProvider;
+import com.nvidia.cuvs.spi.CuVSProvider;
+
+import org.elasticsearch.core.Strings;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+
+import java.nio.file.Path;
+import java.util.Objects;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * A manager of {@link com.nvidia.cuvs.CuVSResources}. There is one manager per GPU.
+ *
+ * <p>All access to GPU resources is mediated through a manager. A manager helps coordinate usage threads to:
+ * <ul>
+ *     <li>ensure single-threaded access to any particular resource at a time</li>
+ *     <li>Control the total number of concurrent operations that may be performed on a GPU</li>
+ *     <li>Pool resources, to avoid frequent creation and destruction, which are expensive operations. </li>
+ * </ul>
+ *
+ * <p> Fundamentally, a resource is used in compute and memory bound operations. The former occurs prior to the latter, e.g.
+ * index build (compute), followed by a copy/process of the newly built index (memory). The manager allows the resource
+ * user to indicate that compute is complete before releasing the resources. This can help improve parallelism of compute
+ * on the GPU - allowing the next compute operation to proceed before releasing the resources.
+ *
+ */
+public interface CuVSResourceManager {
+
+    /**
+     * Acquires a resource from the manager.
+     *
+     * <p>A manager can use the given parameters, numVectors and dims, to estimate the potential
+     * effect on GPU memory and compute usage to determine whether to give out
+     * another resource or wait for a resources to be returned before giving out another.
+     */
+    // numVectors and dims are currently unused, but could be used along with GPU metadata,
+    // memory, generation, etc, when acquiring for 10M x 1536 dims, or 100,000 x 128 dims,
+    // to give out a resources or not.
+    ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException;
+
+    /** Marks the resources as finished with regard to compute. */
+    void finishedComputation(ManagedCuVSResources resources);
+
+    /** Returns the given resource to the manager. */
+    void release(ManagedCuVSResources resources);
+
+    /** Shuts down the manager, releasing all open resources. */
+    void shutdown();
+
+    /** Returns the system-wide pooling manager. */
+    static CuVSResourceManager pooling() {
+        return PoolingCuVSResourceManager.Holder.INSTANCE;
+    }
+
+    /**
+     * A manager that maintains a pool of resources.
+     */
+    class PoolingCuVSResourceManager implements CuVSResourceManager {
+
+        static final Logger logger = LogManager.getLogger(CuVSResourceManager.class);
+
+        /** A multiplier on input data to account for intermediate and output data size required while processing it */
+        static final double GPU_COMPUTATION_MEMORY_FACTOR = 2.0;
+        static final int MAX_RESOURCES = 4;
+
+        static class Holder {
+            static final PoolingCuVSResourceManager INSTANCE = new PoolingCuVSResourceManager(
+                MAX_RESOURCES,
+                CuVSProvider.provider().gpuInfoProvider()
+            );
+        }
+
+        private final ManagedCuVSResources[] pool;
+        private final int capacity;
+        private final GPUInfoProvider gpuInfoProvider;
+        private int createdCount;
+
+        ReentrantLock lock = new ReentrantLock();
+        Condition enoughResourcesCondition = lock.newCondition();
+
+        public PoolingCuVSResourceManager(int capacity, GPUInfoProvider gpuInfoProvider) {
+            if (capacity < 1 || capacity > MAX_RESOURCES) {
+                throw new IllegalArgumentException("Resource count must be between 1 and " + MAX_RESOURCES);
+            }
+            this.capacity = capacity;
+            this.gpuInfoProvider = gpuInfoProvider;
+            this.pool = new ManagedCuVSResources[MAX_RESOURCES];
+        }
+
+        private ManagedCuVSResources getResourceFromPool() {
+            for (int i = 0; i < createdCount; ++i) {
+                var res = pool[i];
+                if (res.locked == false) {
+                    return res;
+                }
+            }
+            if (createdCount < capacity) {
+                var res = new ManagedCuVSResources(Objects.requireNonNull(createNew()));
+                pool[createdCount++] = res;
+                return res;
+            }
+            return null;
+        }
+
+        private int numLockedResources() {
+            int lockedResources = 0;
+            for (int i = 0; i < createdCount; ++i) {
+                var res = pool[i];
+                if (res.locked) {
+                    lockedResources++;
+                }
+            }
+            return lockedResources;
+        }
+
+        @Override
+        public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException {
+            try {
+                lock.lock();
+
+                boolean allConditionsMet = false;
+                ManagedCuVSResources res = null;
+                while (allConditionsMet == false) {
+                    res = getResourceFromPool();
+
+                    final boolean enoughMemory;
+                    if (res != null) {
+                        long requiredMemoryInBytes = estimateRequiredMemory(numVectors, dims, dataType);
+                        logger.debug(
+                            "Estimated memory for [{}] vectors, [{}] dims of type [{}] is [{} B]",
+                            numVectors,
+                            dims,
+                            dataType.name(),
+                            requiredMemoryInBytes
+                        );
+
+                        // Check immutable constraints
+                        long totalDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).totalDeviceMemoryInBytes();
+                        if (requiredMemoryInBytes > totalDeviceMemoryInBytes) {
+                            String message = Strings.format(
+                                "Requested GPU memory for [%d] vectors, [%d] dims is greater than the GPU total memory [%d B]",
+                                numVectors,
+                                dims,
+                                totalDeviceMemoryInBytes
+                            );
+                            logger.error(message);
+                            throw new IllegalArgumentException(message);
+                        }
+
+                        // If no resource in the pool is locked, short circuit to avoid livelock
+                        if (numLockedResources() == 0) {
+                            logger.debug("No resources currently locked, proceeding");
+                            break;
+                        }
+
+                        // Check resources availability
+                        long freeDeviceMemoryInBytes = gpuInfoProvider.getCurrentInfo(res).freeDeviceMemoryInBytes();
+                        enoughMemory = requiredMemoryInBytes <= freeDeviceMemoryInBytes;
+                        logger.debug("Free device memory [{} B], enoughMemory[{}]", freeDeviceMemoryInBytes, enoughMemory);
+                    } else {
+                        logger.debug("No resources available in pool");
+                        enoughMemory = false;
+                    }
+                    // TODO: add enoughComputation / enoughComputationCondition here
+                    allConditionsMet = enoughMemory; // && enoughComputation
+                    if (allConditionsMet == false) {
+                        enoughResourcesCondition.await();
+                    }
+                }
+                res.locked = true;
+                return res;
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        private long estimateRequiredMemory(int numVectors, int dims, CuVSMatrix.DataType dataType) {
+            int elementTypeBytes = switch (dataType) {
+                case FLOAT -> Float.BYTES;
+                case INT, UINT -> Integer.BYTES;
+                case BYTE -> Byte.BYTES;
+            };
+            return (long) (GPU_COMPUTATION_MEMORY_FACTOR * numVectors * dims * elementTypeBytes);
+        }
+
+        // visible for testing
+        protected CuVSResources createNew() {
+            return GPUSupport.cuVSResourcesOrNull(true);
+        }
+
+        @Override
+        public void finishedComputation(ManagedCuVSResources resources) {
+            logger.debug("Computation finished");
+            // currently does nothing, but could allow acquire to return possibly blocked resources
+            // enoughResourcesCondition.signalAll()
+        }
+
+        @Override
+        public void release(ManagedCuVSResources resources) {
+            logger.debug("Releasing resources to pool");
+            try {
+                lock.lock();
+                assert resources.locked;
+                resources.locked = false;
+                enoughResourcesCondition.signalAll();
+            } finally {
+                lock.unlock();
+            }
+        }
+
+        @Override
+        public void shutdown() {
+            for (int i = 0; i < createdCount; ++i) {
+                var res = pool[i];
+                assert res != null;
+                res.delegate.close();
+            }
+        }
+    }
+
+    /** A managed resource. Cannot be closed. */
+    final class ManagedCuVSResources implements CuVSResources {
+
+        final CuVSResources delegate;
+        boolean locked = false;
+
+        ManagedCuVSResources(CuVSResources resources) {
+            this.delegate = resources;
+        }
+
+        @Override
+        public ScopedAccess access() {
+            return delegate.access();
+        }
+
+        @Override
+        public int deviceId() {
+            return delegate.deviceId();
+        }
+
+        @Override
+        public void close() {
+            throw new UnsupportedOperationException("this resource is managed, cannot be closed by clients");
+        }
+
+        @Override
+        public Path tempDirectory() {
+            return null;
+        }
+
+        @Override
+        public String toString() {
+            return "ManagedCuVSResources[delegate=" + delegate + "]";
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java
new file mode 100644
index 0000000000000..3a9fcb2c68cd8
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtils.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CuVSMatrix;
+
+import org.apache.lucene.store.MemorySegmentAccessInput;
+
+import java.io.IOException;
+
+public interface DatasetUtils {
+
+    static DatasetUtils getInstance() {
+        return DatasetUtilsImpl.getInstance();
+    }
+
+    /** Returns a Dataset over the vectors of type {@code dataType} in the input. */
+    CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException;
+
+    /** Returns a Dataset over an input slice */
+    CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType)
+        throws IOException;
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
new file mode 100644
index 0000000000000..0dfb0960cebbe
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsImpl.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CuVSMatrix;
+import com.nvidia.cuvs.spi.CuVSProvider;
+
+import org.apache.lucene.store.MemorySegmentAccessInput;
+
+import java.io.IOException;
+import java.lang.foreign.MemorySegment;
+import java.lang.invoke.MethodHandle;
+
+public class DatasetUtilsImpl implements DatasetUtils {
+
+    private static final DatasetUtils INSTANCE = new DatasetUtilsImpl();
+
+    private static final MethodHandle createDataset$mh = CuVSProvider.provider().newNativeMatrixBuilder();
+
+    static DatasetUtils getInstance() {
+        return INSTANCE;
+    }
+
+    static CuVSMatrix fromMemorySegment(MemorySegment memorySegment, int size, int dimensions, CuVSMatrix.DataType dataType) {
+        try {
+            return (CuVSMatrix) createDataset$mh.invokeExact(memorySegment, size, dimensions, dataType);
+        } catch (Throwable e) {
+            if (e instanceof Error err) {
+                throw err;
+            } else if (e instanceof RuntimeException re) {
+                throw re;
+            } else {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    private DatasetUtilsImpl() {}
+
+    @Override
+    public CuVSMatrix fromInput(MemorySegmentAccessInput input, int numVectors, int dims, CuVSMatrix.DataType dataType) throws IOException {
+        if (numVectors < 0 || dims < 0) {
+            throwIllegalArgumentException(numVectors, dims);
+        }
+        return createCuVSMatrix(input, 0L, input.length(), numVectors, dims, dataType);
+    }
+
+    @Override
+    public CuVSMatrix fromSlice(MemorySegmentAccessInput input, long pos, long len, int numVectors, int dims, CuVSMatrix.DataType dataType)
+        throws IOException {
+        if (pos < 0 || len < 0) {
+            throw new IllegalArgumentException("pos and len must be positive");
+        }
+        return createCuVSMatrix(input, pos, len, numVectors, dims, dataType);
+    }
+
+    private static CuVSMatrix createCuVSMatrix(
+        MemorySegmentAccessInput input,
+        long pos,
+        long len,
+        int numVectors,
+        int dims,
+        CuVSMatrix.DataType dataType
+    ) throws IOException {
+        MemorySegment ms = input.segmentSliceOrNull(pos, len);
+        assert ms != null; // TODO: this can be null if larger than 16GB or ...
+        final int byteSize = dataType == CuVSMatrix.DataType.FLOAT ? Float.BYTES : Byte.BYTES;
+        if (((long) numVectors * dims * byteSize) > ms.byteSize()) {
+            throwIllegalArgumentException(ms, numVectors, dims);
+        }
+        return fromMemorySegment(ms, numVectors, dims, dataType);
+    }
+
+    static void throwIllegalArgumentException(MemorySegment ms, int numVectors, int dims) {
+        var s = "segment of size [" + ms.byteSize() + "] too small for expected " + numVectors + " float vectors of " + dims + " dims";
+        throw new IllegalArgumentException(s);
+    }
+
+    static void throwIllegalArgumentException(int numVectors, int dims) {
+        String s;
+        if (numVectors < 0) {
+            s = "negative number of vectors: " + numVectors;
+        } else {
+            s = "negative vector dims: " + dims;
+        }
+        throw new IllegalArgumentException(s);
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java
new file mode 100644
index 0000000000000..b62766fb39c3a
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormat.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.DEFAULT_MAX_CONN;
+
+/**
+ * Codec format for GPU-accelerated scalar quantized HNSW vector indexes.
+ * HNSW graph is built on GPU, while scalar quantization and search is performed on CPU.
+ */
+public class ES92GpuHnswSQVectorsFormat extends KnnVectorsFormat {
+    public static final String NAME = "Lucene99HnswVectorsFormat";
+    static final int MAXIMUM_MAX_CONN = 512;
+    static final int MAXIMUM_BEAM_WIDTH = 3200;
+    private final int maxConn;
+    private final int beamWidth;
+
+    /** The format for storing, reading, merging vectors on disk */
+    private final FlatVectorsFormat flatVectorsFormat;
+    private final Supplier<CuVSResourceManager> cuVSResourceManagerSupplier;
+
+    public ES92GpuHnswSQVectorsFormat() {
+        this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, null, 7, false);
+    }
+
+    public ES92GpuHnswSQVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) {
+        super(NAME);
+        this.cuVSResourceManagerSupplier = CuVSResourceManager::pooling;
+        if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
+            throw new IllegalArgumentException(
+                "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn
+            );
+        }
+        if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) {
+            throw new IllegalArgumentException(
+                "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth
+            );
+        }
+        this.maxConn = maxConn;
+        this.beamWidth = beamWidth;
+        this.flatVectorsFormat = new ES814ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress);
+    }
+
+    @Override
+    public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+        return new ES92GpuHnswVectorsWriter(
+            cuVSResourceManagerSupplier.get(),
+            state,
+            maxConn,
+            beamWidth,
+            flatVectorsFormat.fieldsWriter(state)
+        );
+    }
+
+    @Override
+    public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+        return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
+    }
+
+    @Override
+    public int getMaxDimensions(String fieldName) {
+        return MAX_DIMS_COUNT;
+    }
+
+    @Override
+    public String toString() {
+        return NAME
+            + "(name="
+            + NAME
+            + ", maxConn="
+            + maxConn
+            + ", beamWidth="
+            + beamWidth
+            + ", flatVectorFormat="
+            + flatVectorsFormat
+            + ")";
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java
new file mode 100644
index 0000000000000..8761b9e12f22a
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormat.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
+import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+
+import java.io.IOException;
+import java.util.function.Supplier;
+
+import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
+
+/**
+ * Codec format for GPU-accelerated vector indexes. This format is designed to
+ * leverage GPU processing capabilities for vector search operations.
+ */
+public class ES92GpuHnswVectorsFormat extends KnnVectorsFormat {
+    public static final String NAME = "Lucene99HnswVectorsFormat";
+    public static final int VERSION_GROUPVARINT = 1;
+
+    static final String LUCENE99_HNSW_META_CODEC_NAME = "Lucene99HnswVectorsFormatMeta";
+    static final String LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME = "Lucene99HnswVectorsFormatIndex";
+    static final String LUCENE99_HNSW_META_EXTENSION = "vem";
+    static final String LUCENE99_HNSW_VECTOR_INDEX_EXTENSION = "vex";
+    static final int LUCENE99_VERSION_CURRENT = VERSION_GROUPVARINT;
+
+    static final int DEFAULT_MAX_CONN = 16; // graph degree
+    public static final int DEFAULT_BEAM_WIDTH = 128; // intermediate graph degree
+    static final int MIN_NUM_VECTORS_FOR_GPU_BUILD = 2;
+
+    private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat(
+        FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
+    );
+
+    // How many nodes each node in the graph is connected to in the final graph
+    private final int maxConn;
+    // Intermediate graph degree, the number of connections for each node before pruning
+    private final int beamWidth;
+    private final Supplier<CuVSResourceManager> cuVSResourceManagerSupplier;
+
+    public ES92GpuHnswVectorsFormat() {
+        this(CuVSResourceManager::pooling, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
+    }
+
+    public ES92GpuHnswVectorsFormat(int maxConn, int beamWidth) {
+        this(CuVSResourceManager::pooling, maxConn, beamWidth);
+    };
+
+    public ES92GpuHnswVectorsFormat(Supplier<CuVSResourceManager> cuVSResourceManagerSupplier, int maxConn, int beamWidth) {
+        super(NAME);
+        this.cuVSResourceManagerSupplier = cuVSResourceManagerSupplier;
+        this.maxConn = maxConn;
+        this.beamWidth = beamWidth;
+    }
+
+    @Override
+    public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+        return new ES92GpuHnswVectorsWriter(
+            cuVSResourceManagerSupplier.get(),
+            state,
+            maxConn,
+            beamWidth,
+            flatVectorsFormat.fieldsWriter(state)
+        );
+    }
+
+    @Override
+    public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+        return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state));
+    }
+
+    @Override
+    public int getMaxDimensions(String fieldName) {
+        return MAX_DIMS_COUNT;
+    }
+
+    @Override
+    public String toString() {
+        return NAME
+            + "(name="
+            + NAME
+            + ", maxConn="
+            + maxConn
+            + ", beamWidth="
+            + beamWidth
+            + ", flatVectorFormat="
+            + flatVectorsFormat.getName()
+            + ")";
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java
new file mode 100644
index 0000000000000..f848f715f913b
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsWriter.java
@@ -0,0 +1,683 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CagraIndex;
+import com.nvidia.cuvs.CagraIndexParams;
+import com.nvidia.cuvs.CuVSMatrix;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.KnnFieldVectorsWriter;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
+import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
+import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.DocsWithFieldSet;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FilterIndexInput;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MemorySegmentAccessInput;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.hnsw.HnswGraph;
+import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
+import org.apache.lucene.util.packed.DirectMonotonicWriter;
+import org.apache.lucene.util.quantization.ScalarQuantizer;
+import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.index.codec.vectors.ES814ScalarQuantizedVectorsFormat;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
+import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter.mergeAndRecalculateQuantiles;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_CODEC_NAME;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_META_EXTENSION;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_HNSW_VECTOR_INDEX_EXTENSION;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.LUCENE99_VERSION_CURRENT;
+import static org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat.MIN_NUM_VECTORS_FOR_GPU_BUILD;
+
+/**
+ * Writer that builds an Nvidia Carga Graph on GPU and then writes it into the Lucene99 HNSW format,
+ * so that it can be searched on CPU with Lucene99HNSWVectorReader.
+ */
+final class ES92GpuHnswVectorsWriter extends KnnVectorsWriter {
+    private static final Logger logger = LogManager.getLogger(ES92GpuHnswVectorsWriter.class);
+    private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(ES92GpuHnswVectorsWriter.class);
+    private static final int LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
+
+    private final CuVSResourceManager cuVSResourceManager;
+    private final SegmentWriteState segmentWriteState;
+    private final IndexOutput meta, vectorIndex;
+    private final int M;
+    private final int beamWidth;
+    private final FlatVectorsWriter flatVectorWriter;
+
+    private final List<FieldWriter> fields = new ArrayList<>();
+    private boolean finished;
+    private final CuVSMatrix.DataType dataType;
+
+    ES92GpuHnswVectorsWriter(
+        CuVSResourceManager cuVSResourceManager,
+        SegmentWriteState state,
+        int M,
+        int beamWidth,
+        FlatVectorsWriter flatVectorWriter
+    ) throws IOException {
+        assert cuVSResourceManager != null : "CuVSResources must not be null";
+        this.cuVSResourceManager = cuVSResourceManager;
+        this.M = M;
+        this.beamWidth = beamWidth;
+        this.flatVectorWriter = flatVectorWriter;
+        if (flatVectorWriter instanceof ES814ScalarQuantizedVectorsFormat.ES814ScalarQuantizedVectorsWriter) {
+            dataType = CuVSMatrix.DataType.BYTE;
+        } else {
+            assert flatVectorWriter instanceof Lucene99FlatVectorsWriter;
+            dataType = CuVSMatrix.DataType.FLOAT;
+        }
+        this.segmentWriteState = state;
+        String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, LUCENE99_HNSW_META_EXTENSION);
+        String indexDataFileName = IndexFileNames.segmentFileName(
+            state.segmentInfo.name,
+            state.segmentSuffix,
+            LUCENE99_HNSW_VECTOR_INDEX_EXTENSION
+        );
+        boolean success = false;
+        try {
+            meta = state.directory.createOutput(metaFileName, state.context);
+            vectorIndex = state.directory.createOutput(indexDataFileName, state.context);
+            CodecUtil.writeIndexHeader(
+                meta,
+                LUCENE99_HNSW_META_CODEC_NAME,
+                LUCENE99_VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            CodecUtil.writeIndexHeader(
+                vectorIndex,
+                LUCENE99_HNSW_VECTOR_INDEX_CODEC_NAME,
+                LUCENE99_VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            success = true;
+        } finally {
+            if (success == false) {
+                org.elasticsearch.core.IOUtils.closeWhileHandlingException(this);
+            }
+        }
+    }
+
+    @Override
+    public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
+        if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32) == false) {
+            throw new IllegalArgumentException(
+                "Field [" + fieldInfo.name + "] must have FLOAT32 encoding, got: " + fieldInfo.getVectorEncoding()
+            );
+        }
+        @SuppressWarnings("unchecked")
+        FlatFieldVectorsWriter<float[]> flatFieldWriter = (FlatFieldVectorsWriter<float[]>) flatVectorWriter.addField(fieldInfo);
+        FieldWriter newField = new FieldWriter(flatFieldWriter, fieldInfo);
+        fields.add(newField);
+        return newField;
+    }
+
+    /**
+     * Flushes vector data and associated data to disk.
+     * <p>
+     * This method and the private helpers it calls only need to support FLOAT32.
+     * For FlatFieldVectorWriter we only need to support float[] during flush: during indexing users provide floats[], and pass floats to
+     * FlatFieldVectorWriter, even when we have a BYTE dataType (i.e. an "int8_hnsw" type).
+     * During merging, we use quantized data, so we need to support byte[] too (see {@link ES92GpuHnswVectorsWriter#mergeOneField}),
+     * but not here.
+     * That's how our other current formats work: use floats during indexing, and quantized data to build graph during merging.
+     * </p>
+     */
+    @Override
+    // TODO: fix sorted index case
+    public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
+        flatVectorWriter.flush(maxDoc, sortMap);
+        try {
+            flushFieldsWithoutMemoryMappedFile(sortMap);
+        } catch (Throwable t) {
+            throw new IOException("Failed to flush GPU index: ", t);
+        }
+    }
+
+    private void flushFieldsWithoutMemoryMappedFile(Sorter.DocMap sortMap) throws IOException, InterruptedException {
+        // No tmp file written, or the file cannot be mmapped
+        for (FieldWriter field : fields) {
+            var fieldInfo = field.fieldInfo;
+
+            var numVectors = field.flatFieldVectorsWriter.getVectors().size();
+            if (numVectors < MIN_NUM_VECTORS_FOR_GPU_BUILD) {
+                if (logger.isDebugEnabled()) {
+                    logger.debug(
+                        "Skip building carga index; vectors length {} < {} (min for GPU)",
+                        numVectors,
+                        MIN_NUM_VECTORS_FOR_GPU_BUILD
+                    );
+                }
+                // Will not be indexed on the GPU
+                flushFieldWithMockGraph(fieldInfo, numVectors, sortMap);
+            } else {
+                var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), CuVSMatrix.DataType.FLOAT);
+                try {
+                    var builder = CuVSMatrix.deviceBuilder(
+                        cuVSResources,
+                        numVectors,
+                        fieldInfo.getVectorDimension(),
+                        CuVSMatrix.DataType.FLOAT
+                    );
+                    for (var vector : field.flatFieldVectorsWriter.getVectors()) {
+                        builder.addVector(vector);
+                    }
+                    try (var dataset = builder.build()) {
+                        flushFieldWithGpuGraph(cuVSResources, fieldInfo, dataset, sortMap);
+                    }
+                } finally {
+                    cuVSResourceManager.release(cuVSResources);
+                }
+            }
+        }
+    }
+
+    private void flushFieldWithMockGraph(FieldInfo fieldInfo, int numVectors, Sorter.DocMap sortMap) throws IOException {
+        if (sortMap == null) {
+            generateMockGraphAndWriteMeta(fieldInfo, numVectors);
+        } else {
+            // TODO: use sortMap
+            generateMockGraphAndWriteMeta(fieldInfo, numVectors);
+        }
+    }
+
+    private void flushFieldWithGpuGraph(
+        CuVSResourceManager.ManagedCuVSResources resources,
+        FieldInfo fieldInfo,
+        CuVSMatrix dataset,
+        Sorter.DocMap sortMap
+    ) throws IOException {
+        if (sortMap == null) {
+            generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset);
+        } else {
+            // TODO: use sortMap
+            generateGpuGraphAndWriteMeta(resources, fieldInfo, dataset);
+        }
+    }
+
+    @Override
+    public void finish() throws IOException {
+        if (finished) {
+            throw new IllegalStateException("already finished");
+        }
+        finished = true;
+        flatVectorWriter.finish();
+
+        if (meta != null) {
+            // write end of fields marker
+            meta.writeInt(-1);
+            CodecUtil.writeFooter(meta);
+        }
+        if (vectorIndex != null) {
+            CodecUtil.writeFooter(vectorIndex);
+        }
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        long total = SHALLOW_RAM_BYTES_USED;
+        for (FieldWriter field : fields) {
+            // the field tracks the delegate field usage
+            total += field.ramBytesUsed();
+        }
+        return total;
+    }
+
+    private void generateGpuGraphAndWriteMeta(
+        CuVSResourceManager.ManagedCuVSResources cuVSResources,
+        FieldInfo fieldInfo,
+        CuVSMatrix dataset
+    ) throws IOException {
+        try {
+            assert dataset.size() >= MIN_NUM_VECTORS_FOR_GPU_BUILD;
+
+            long vectorIndexOffset = vectorIndex.getFilePointer();
+            int[][] graphLevelNodeOffsets = new int[1][];
+            final HnswGraph graph;
+            try (var index = buildGPUIndex(cuVSResources, fieldInfo.getVectorSimilarityFunction(), dataset)) {
+                assert index != null : "GPU index should be built for field: " + fieldInfo.name;
+                graph = writeGraph(index.getGraph(), graphLevelNodeOffsets);
+            }
+            long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
+            writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, (int) dataset.size(), graph, graphLevelNodeOffsets);
+        } catch (IOException e) {
+            throw e;
+        } catch (Throwable t) {
+            throw new IOException("Failed to write GPU index: ", t);
+        }
+    }
+
+    private void generateMockGraphAndWriteMeta(FieldInfo fieldInfo, int datasetSize) throws IOException {
+        try {
+            long vectorIndexOffset = vectorIndex.getFilePointer();
+            int[][] graphLevelNodeOffsets = new int[1][];
+            final HnswGraph graph = writeMockGraph(datasetSize, graphLevelNodeOffsets);
+            long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
+            writeMeta(fieldInfo, vectorIndexOffset, vectorIndexLength, datasetSize, graph, graphLevelNodeOffsets);
+        } catch (IOException e) {
+            throw e;
+        } catch (Throwable t) {
+            throw new IOException("Failed to write GPU index: ", t);
+        }
+    }
+
+    private CagraIndex buildGPUIndex(
+        CuVSResourceManager.ManagedCuVSResources cuVSResources,
+        VectorSimilarityFunction similarityFunction,
+        CuVSMatrix dataset
+    ) throws Throwable {
+        CagraIndexParams.CuvsDistanceType distanceType = switch (similarityFunction) {
+            case EUCLIDEAN -> CagraIndexParams.CuvsDistanceType.L2Expanded;
+            case DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> CagraIndexParams.CuvsDistanceType.InnerProduct;
+            case COSINE -> CagraIndexParams.CuvsDistanceType.CosineExpanded;
+        };
+
+        // TODO: expose cagra index params for algorithm, NNDescentNumIterations
+        CagraIndexParams params = new CagraIndexParams.Builder().withNumWriterThreads(1) // TODO: how many CPU threads we can use?
+            .withCagraGraphBuildAlgo(CagraIndexParams.CagraGraphBuildAlgo.NN_DESCENT)
+            .withGraphDegree(M)
+            .withIntermediateGraphDegree(beamWidth)
+            .withMetric(distanceType)
+            .build();
+
+        long startTime = System.nanoTime();
+        var indexBuilder = CagraIndex.newBuilder(cuVSResources).withDataset(dataset).withIndexParams(params);
+        var index = indexBuilder.build();
+        cuVSResourceManager.finishedComputation(cuVSResources);
+        if (logger.isDebugEnabled()) {
+            logger.debug("Carga index created in: {} ms; #num vectors: {}", (System.nanoTime() - startTime) / 1_000_000.0, dataset.size());
+        }
+        return index;
+    }
+
+    private HnswGraph writeGraph(CuVSMatrix cagraGraph, int[][] levelNodeOffsets) throws IOException {
+        long startTime = System.nanoTime();
+
+        int maxElementCount = (int) cagraGraph.size();
+        int maxGraphDegree = (int) cagraGraph.columns();
+        int[] neighbors = new int[maxGraphDegree];
+
+        levelNodeOffsets[0] = new int[maxElementCount];
+        // write the cagra graph to the Lucene vectorIndex file
+        int[] scratch = new int[maxGraphDegree];
+        for (int node = 0; node < maxElementCount; node++) {
+            cagraGraph.getRow(node).toArray(neighbors);
+
+            // write to the Lucene vectorIndex file
+            long offsetStart = vectorIndex.getFilePointer();
+            Arrays.sort(neighbors);
+            int actualSize = 0;
+            if (maxGraphDegree > 0) {
+                scratch[0] = neighbors[0];
+                actualSize = 1;
+            }
+            for (int i = 1; i < maxGraphDegree; i++) {
+                assert neighbors[i] < maxElementCount : "node too large: " + neighbors[i] + ">=" + maxElementCount;
+                if (neighbors[i - 1] == neighbors[i]) {
+                    continue;
+                }
+                scratch[actualSize++] = neighbors[i] - neighbors[i - 1];
+            }
+            // Write the size after duplicates are removed
+            vectorIndex.writeVInt(actualSize);
+            vectorIndex.writeGroupVInts(scratch, actualSize);
+            levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
+        }
+        if (logger.isDebugEnabled()) {
+            logger.debug("cagra_hnws index serialized to Lucene HNSW in: {} ms", (System.nanoTime() - startTime) / 1_000_000.0);
+        }
+        return createMockGraph(maxElementCount, maxGraphDegree);
+    }
+
+    // create a mock graph where every node is connected to every other node
+    private HnswGraph writeMockGraph(int elementCount, int[][] levelNodeOffsets) throws IOException {
+        if (elementCount == 0) {
+            return null;
+        }
+        int nodeDegree = elementCount - 1;
+        levelNodeOffsets[0] = new int[elementCount];
+
+        int[] neighbors = new int[nodeDegree];
+        int[] scratch = new int[nodeDegree];
+        for (int node = 0; node < elementCount; node++) {
+            if (nodeDegree > 0) {
+                for (int j = 0; j < nodeDegree; j++) {
+                    neighbors[j] = j < node ? j : j + 1; // skip self
+                }
+                scratch[0] = neighbors[0];
+                for (int i = 1; i < nodeDegree; i++) {
+                    scratch[i] = neighbors[i] - neighbors[i - 1];
+                }
+            }
+
+            long offsetStart = vectorIndex.getFilePointer();
+            vectorIndex.writeVInt(nodeDegree);
+            vectorIndex.writeGroupVInts(scratch, nodeDegree);
+            levelNodeOffsets[0][node] = Math.toIntExact(vectorIndex.getFilePointer() - offsetStart);
+        }
+        return createMockGraph(elementCount, nodeDegree);
+    }
+
+    private static HnswGraph createMockGraph(int elementCount, int graphDegree) {
+        return new HnswGraph() {
+            @Override
+            public int nextNeighbor() {
+                throw new UnsupportedOperationException("Not supported on a mock graph");
+            }
+
+            @Override
+            public void seek(int level, int target) {
+                throw new UnsupportedOperationException("Not supported on a mock graph");
+            }
+
+            @Override
+            public int size() {
+                return elementCount;
+            }
+
+            @Override
+            public int numLevels() {
+                return 1;
+            }
+
+            @Override
+            public int maxConn() {
+                return graphDegree;
+            }
+
+            @Override
+            public int entryNode() {
+                throw new UnsupportedOperationException("Not supported on a mock graph");
+            }
+
+            @Override
+            public int neighborCount() {
+                throw new UnsupportedOperationException("Not supported on a mock graph");
+            }
+
+            @Override
+            public NodesIterator getNodesOnLevel(int level) {
+                return new ArrayNodesIterator(size());
+            }
+        };
+    }
+
+    @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)")
+    private static void deleteFilesIgnoringExceptions(Directory dir, String fileName) {
+        org.apache.lucene.util.IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
+    }
+
+    // TODO check with deleted documents
+    @Override
+    // fix sorted index case
+    public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+        flatVectorWriter.mergeOneField(fieldInfo, mergeState);
+        final int numVectors;
+        String tempRawVectorsFileName = null;
+        boolean success = false;
+        // save merged vector values to a temp file
+        try (IndexOutput out = mergeState.segmentInfo.dir.createTempOutput(mergeState.segmentInfo.name, "vec_", IOContext.DEFAULT)) {
+            tempRawVectorsFileName = out.getName();
+            if (dataType == CuVSMatrix.DataType.BYTE) {
+                numVectors = writeByteVectorValues(out, getMergedByteVectorValues(fieldInfo, mergeState));
+            } else {
+                numVectors = writeFloatVectorValues(fieldInfo, out, MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
+            }
+            CodecUtil.writeFooter(out);
+            success = true;
+        } finally {
+            if (success == false && tempRawVectorsFileName != null) {
+                deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName);
+            }
+        }
+        try (IndexInput in = mergeState.segmentInfo.dir.openInput(tempRawVectorsFileName, IOContext.DEFAULT)) {
+            var input = FilterIndexInput.unwrapOnlyTest(in);
+
+            if (numVectors >= MIN_NUM_VECTORS_FOR_GPU_BUILD) {
+                if (input instanceof MemorySegmentAccessInput memorySegmentAccessInput) {
+                    // Direct access to mmapped file
+                    final var dataset = DatasetUtils.getInstance()
+                        .fromInput(memorySegmentAccessInput, numVectors, fieldInfo.getVectorDimension(), dataType);
+
+                    var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType);
+                    try {
+                        generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset);
+                    } finally {
+                        dataset.close();
+                        cuVSResourceManager.release(cuVSResources);
+                    }
+                } else {
+                    logger.debug(
+                        () -> "Cannot mmap merged raw vectors temporary file. IndexInput type [" + input.getClass().getSimpleName() + "]"
+                    );
+
+                    var cuVSResources = cuVSResourceManager.acquire(numVectors, fieldInfo.getVectorDimension(), dataType);
+                    try {
+                        // Read vector-by-vector
+                        var builder = CuVSMatrix.deviceBuilder(cuVSResources, numVectors, fieldInfo.getVectorDimension(), dataType);
+
+                        // During merging, we use quantized data, so we need to support byte[] too.
+                        // That's how our current formats work: use floats during indexing, and quantized data to build a graph
+                        // during merging.
+                        if (dataType == CuVSMatrix.DataType.FLOAT) {
+                            float[] vector = new float[fieldInfo.getVectorDimension()];
+                            for (int i = 0; i < numVectors; ++i) {
+                                input.readFloats(vector, 0, fieldInfo.getVectorDimension());
+                                builder.addVector(vector);
+                            }
+                        } else {
+                            assert dataType == CuVSMatrix.DataType.BYTE;
+                            byte[] vector = new byte[fieldInfo.getVectorDimension()];
+                            for (int i = 0; i < numVectors; ++i) {
+                                input.readBytes(vector, 0, fieldInfo.getVectorDimension());
+                                builder.addVector(vector);
+                            }
+                        }
+                        try (var dataset = builder.build()) {
+                            generateGpuGraphAndWriteMeta(cuVSResources, fieldInfo, dataset);
+                        }
+                    } finally {
+                        cuVSResourceManager.release(cuVSResources);
+                    }
+                }
+            } else {
+                // we don't really need real value for vectors here,
+                // we just build a mock graph where every node is connected to every other node
+                generateMockGraphAndWriteMeta(fieldInfo, numVectors);
+            }
+        } catch (Throwable t) {
+            throw new IOException("Failed to merge GPU index: ", t);
+        } finally {
+            deleteFilesIgnoringExceptions(mergeState.segmentInfo.dir, tempRawVectorsFileName);
+        }
+    }
+
+    private ByteVectorValues getMergedByteVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+        // TODO: expose confidence interval from the format
+        final byte bits = 7;
+        final Float confidenceInterval = null;
+        ScalarQuantizer quantizer = mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval, bits);
+        return MergedQuantizedVectorValues.mergeQuantizedByteVectorValues(fieldInfo, mergeState, quantizer);
+    }
+
+    private static int writeByteVectorValues(IndexOutput out, ByteVectorValues vectorValues) throws IOException {
+        int numVectors = 0;
+        byte[] vector;
+        final KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator();
+        for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) {
+            numVectors++;
+            vector = vectorValues.vectorValue(iterator.index());
+            out.writeBytes(vector, vector.length);
+        }
+        return numVectors;
+    }
+
+    private static int writeFloatVectorValues(FieldInfo fieldInfo, IndexOutput out, FloatVectorValues floatVectorValues)
+        throws IOException {
+        int numVectors = 0;
+        final ByteBuffer buffer = ByteBuffer.allocate(fieldInfo.getVectorDimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
+        final KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator();
+        for (int docV = iterator.nextDoc(); docV != NO_MORE_DOCS; docV = iterator.nextDoc()) {
+            numVectors++;
+            float[] vector = floatVectorValues.vectorValue(iterator.index());
+            buffer.asFloatBuffer().put(vector);
+            out.writeBytes(buffer.array(), buffer.array().length);
+        }
+        return numVectors;
+    }
+
+    private void writeMeta(
+        FieldInfo field,
+        long vectorIndexOffset,
+        long vectorIndexLength,
+        int count,
+        HnswGraph graph,
+        int[][] graphLevelNodeOffsets
+    ) throws IOException {
+        meta.writeInt(field.number);
+        meta.writeInt(field.getVectorEncoding().ordinal());
+        meta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
+        meta.writeVLong(vectorIndexOffset);
+        meta.writeVLong(vectorIndexLength);
+        meta.writeVInt(field.getVectorDimension());
+        meta.writeInt(count);
+        // write graph nodes on each level
+        if (graph == null) {
+            meta.writeVInt(M);
+            meta.writeVInt(0);
+        } else {
+            meta.writeVInt(graph.maxConn());
+            meta.writeVInt(graph.numLevels());
+            long valueCount = 0;
+
+            for (int level = 0; level < graph.numLevels(); level++) {
+                NodesIterator nodesOnLevel = graph.getNodesOnLevel(level);
+                valueCount += nodesOnLevel.size();
+                if (level > 0) {
+                    int[] nol = new int[nodesOnLevel.size()];
+                    int numberConsumed = nodesOnLevel.consume(nol);
+                    Arrays.sort(nol);
+                    assert numberConsumed == nodesOnLevel.size();
+                    meta.writeVInt(nol.length); // number of nodes on a level
+                    for (int i = nodesOnLevel.size() - 1; i > 0; --i) {
+                        nol[i] -= nol[i - 1];
+                    }
+                    for (int n : nol) {
+                        assert n >= 0 : "delta encoding for nodes failed; expected nodes to be sorted";
+                        meta.writeVInt(n);
+                    }
+                } else {
+                    assert nodesOnLevel.size() == count : "Level 0 expects to have all nodes";
+                }
+            }
+            long start = vectorIndex.getFilePointer();
+            meta.writeLong(start);
+            meta.writeVInt(LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT);
+            final DirectMonotonicWriter memoryOffsetsWriter = DirectMonotonicWriter.getInstance(
+                meta,
+                vectorIndex,
+                valueCount,
+                LUCENE99_HNSW_DIRECT_MONOTONIC_BLOCK_SHIFT
+            );
+            long cumulativeOffsetSum = 0;
+            for (int[] levelOffsets : graphLevelNodeOffsets) {
+                for (int v : levelOffsets) {
+                    memoryOffsetsWriter.add(cumulativeOffsetSum);
+                    cumulativeOffsetSum += v;
+                }
+            }
+            memoryOffsetsWriter.finish();
+            meta.writeLong(vectorIndex.getFilePointer() - start);
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        IOUtils.close(meta, vectorIndex, flatVectorWriter);
+    }
+
+    static int distFuncToOrd(VectorSimilarityFunction func) {
+        for (int i = 0; i < SIMILARITY_FUNCTIONS.size(); i++) {
+            if (SIMILARITY_FUNCTIONS.get(i).equals(func)) {
+                return (byte) i;
+            }
+        }
+        throw new IllegalArgumentException("invalid distance function: " + func);
+    }
+
+    private static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
+        private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
+
+        private final FieldInfo fieldInfo;
+        private int lastDocID = -1;
+        private final FlatFieldVectorsWriter<float[]> flatFieldVectorsWriter;
+
+        FieldWriter(FlatFieldVectorsWriter<float[]> flatFieldVectorsWriter, FieldInfo fieldInfo) {
+            this.fieldInfo = fieldInfo;
+            this.flatFieldVectorsWriter = Objects.requireNonNull(flatFieldVectorsWriter);
+        }
+
+        @Override
+        public void addValue(int docID, float[] vectorValue) throws IOException {
+            if (docID == lastDocID) {
+                throw new IllegalArgumentException(
+                    "VectorValuesField \""
+                        + fieldInfo.name
+                        + "\" appears more than once in this document (only one value is allowed per field)"
+                );
+            }
+            flatFieldVectorsWriter.addValue(docID, vectorValue);
+            lastDocID = docID;
+        }
+
+        public DocsWithFieldSet getDocsWithFieldSet() {
+            return flatFieldVectorsWriter.getDocsWithFieldSet();
+        }
+
+        @Override
+        public float[] copyValue(float[] vectorValue) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public long ramBytesUsed() {
+            return SHALLOW_SIZE + flatFieldVectorsWriter.ramBytesUsed();
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java
new file mode 100644
index 0000000000000..4d3d5013dd381
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/java/org/elasticsearch/xpack/gpu/codec/MergedQuantizedVectorValues.java
@@ -0,0 +1,372 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2025 Elasticsearch B.V.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.index.DocIDMerger;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.util.VectorUtil;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedVectorsReader;
+import org.apache.lucene.util.quantization.ScalarQuantizer;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues;
+
+/**
+ * A copy from Lucene99ScalarQuantizedVectorsWriter to access mergeQuantizedByteVectorValues
+ * during segment merge.
+ */
+class MergedQuantizedVectorValues extends QuantizedByteVectorValues {
+    private static final float REQUANTIZATION_LIMIT = 0.2f;
+
+    private final List<QuantizedByteVectorValueSub> subs;
+    private final DocIDMerger<QuantizedByteVectorValueSub> docIdMerger;
+    private final int size;
+    private QuantizedByteVectorValueSub current;
+
+    private MergedQuantizedVectorValues(List<QuantizedByteVectorValueSub> subs, MergeState mergeState) throws IOException {
+        this.subs = subs;
+        docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
+        int totalSize = 0;
+        for (QuantizedByteVectorValueSub sub : subs) {
+            totalSize += sub.values.size();
+        }
+        size = totalSize;
+    }
+
+    @Override
+    public byte[] vectorValue(int ord) throws IOException {
+        return current.values.vectorValue(current.index());
+    }
+
+    @Override
+    public DocIndexIterator iterator() {
+        return new MergedQuantizedVectorValues.CompositeIterator();
+    }
+
+    @Override
+    public int size() {
+        return size;
+    }
+
+    @Override
+    public int dimension() {
+        return subs.get(0).values.dimension();
+    }
+
+    @Override
+    public float getScoreCorrectionConstant(int ord) throws IOException {
+        return current.values.getScoreCorrectionConstant(current.index());
+    }
+
+    private class CompositeIterator extends DocIndexIterator {
+        private int docId;
+        private int ord;
+
+        CompositeIterator() {
+            docId = -1;
+            ord = -1;
+        }
+
+        @Override
+        public int index() {
+            return ord;
+        }
+
+        @Override
+        public int docID() {
+            return docId;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            current = docIdMerger.next();
+            if (current == null) {
+                docId = NO_MORE_DOCS;
+                ord = NO_MORE_DOCS;
+            } else {
+                docId = current.mappedDocID;
+                ++ord;
+            }
+            return docId;
+        }
+
+        @Override
+        public int advance(int target) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public long cost() {
+            return size;
+        }
+    }
+
+    private static QuantizedVectorsReader getQuantizedKnnVectorsReader(KnnVectorsReader vectorsReader, String fieldName) {
+        if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) {
+            vectorsReader = candidateReader.getFieldReader(fieldName);
+        }
+        if (vectorsReader instanceof QuantizedVectorsReader reader) {
+            return reader;
+        }
+        return null;
+    }
+
+    static MergedQuantizedVectorValues mergeQuantizedByteVectorValues(
+        FieldInfo fieldInfo,
+        MergeState mergeState,
+        ScalarQuantizer scalarQuantizer
+    ) throws IOException {
+        assert fieldInfo != null && fieldInfo.hasVectorValues();
+
+        List<QuantizedByteVectorValueSub> subs = new ArrayList<>();
+        for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
+            if (hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) {
+                QuantizedVectorsReader reader = getQuantizedKnnVectorsReader(mergeState.knnVectorsReaders[i], fieldInfo.name);
+                assert scalarQuantizer != null;
+                final QuantizedByteVectorValueSub sub;
+                // Either our quantization parameters are way different than the merged ones
+                // Or we have never been quantized.
+                if (reader == null || reader.getQuantizationState(fieldInfo.name) == null
+                // For smaller `bits` values, we should always recalculate the quantiles
+                // TODO: this is very conservative, could we reuse information for even int4
+                // quantization?
+                    || scalarQuantizer.getBits() <= 4
+                    || shouldRequantize(reader.getQuantizationState(fieldInfo.name), scalarQuantizer)) {
+                    FloatVectorValues toQuantize = mergeState.knnVectorsReaders[i].getFloatVectorValues(fieldInfo.name);
+                    if (fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE) {
+                        toQuantize = new NormalizedFloatVectorValues(toQuantize);
+                    }
+                    sub = new QuantizedByteVectorValueSub(
+                        mergeState.docMaps[i],
+                        new QuantizedFloatVectorValues(toQuantize, fieldInfo.getVectorSimilarityFunction(), scalarQuantizer)
+                    );
+                } else {
+                    sub = new QuantizedByteVectorValueSub(
+                        mergeState.docMaps[i],
+                        new OffsetCorrectedQuantizedByteVectorValues(
+                            reader.getQuantizedVectorValues(fieldInfo.name),
+                            fieldInfo.getVectorSimilarityFunction(),
+                            scalarQuantizer,
+                            reader.getQuantizationState(fieldInfo.name)
+                        )
+                    );
+                }
+                subs.add(sub);
+            }
+        }
+        return new MergedQuantizedVectorValues(subs, mergeState);
+    }
+
+    private static boolean shouldRequantize(ScalarQuantizer existingQuantiles, ScalarQuantizer newQuantiles) {
+        float tol = REQUANTIZATION_LIMIT * (newQuantiles.getUpperQuantile() - newQuantiles.getLowerQuantile()) / 128f;
+        if (Math.abs(existingQuantiles.getUpperQuantile() - newQuantiles.getUpperQuantile()) > tol) {
+            return true;
+        }
+        return Math.abs(existingQuantiles.getLowerQuantile() - newQuantiles.getLowerQuantile()) > tol;
+    }
+
+    private static class QuantizedByteVectorValueSub extends DocIDMerger.Sub {
+        private final QuantizedByteVectorValues values;
+        private final KnnVectorValues.DocIndexIterator iterator;
+
+        QuantizedByteVectorValueSub(MergeState.DocMap docMap, QuantizedByteVectorValues values) {
+            super(docMap);
+            this.values = values;
+            iterator = values.iterator();
+            assert iterator.docID() == -1;
+        }
+
+        @Override
+        public int nextDoc() throws IOException {
+            return iterator.nextDoc();
+        }
+
+        public int index() {
+            return iterator.index();
+        }
+    }
+
+    private static class QuantizedFloatVectorValues extends QuantizedByteVectorValues {
+        private final FloatVectorValues values;
+        private final ScalarQuantizer quantizer;
+        private final byte[] quantizedVector;
+        private int lastOrd = -1;
+        private float offsetValue = 0f;
+
+        private final VectorSimilarityFunction vectorSimilarityFunction;
+
+        QuantizedFloatVectorValues(FloatVectorValues values, VectorSimilarityFunction vectorSimilarityFunction, ScalarQuantizer quantizer) {
+            this.values = values;
+            this.quantizer = quantizer;
+            this.quantizedVector = new byte[values.dimension()];
+            this.vectorSimilarityFunction = vectorSimilarityFunction;
+        }
+
+        @Override
+        public float getScoreCorrectionConstant(int ord) {
+            if (ord != lastOrd) {
+                throw new IllegalStateException(
+                    "attempt to retrieve score correction for different ord " + ord + " than the quantization was done for: " + lastOrd
+                );
+            }
+            return offsetValue;
+        }
+
+        @Override
+        public int dimension() {
+            return values.dimension();
+        }
+
+        @Override
+        public int size() {
+            return values.size();
+        }
+
+        @Override
+        public byte[] vectorValue(int ord) throws IOException {
+            if (ord != lastOrd) {
+                offsetValue = quantize(ord);
+                lastOrd = ord;
+            }
+            return quantizedVector;
+        }
+
+        @Override
+        public VectorScorer scorer(float[] target) throws IOException {
+            throw new UnsupportedOperationException();
+        }
+
+        private float quantize(int ord) throws IOException {
+            return quantizer.quantize(values.vectorValue(ord), quantizedVector, vectorSimilarityFunction);
+        }
+
+        @Override
+        public int ordToDoc(int ord) {
+            return values.ordToDoc(ord);
+        }
+
+        @Override
+        public DocIndexIterator iterator() {
+            return values.iterator();
+        }
+    }
+
+    private static final class NormalizedFloatVectorValues extends FloatVectorValues {
+        private final FloatVectorValues values;
+        private final float[] normalizedVector;
+
+        NormalizedFloatVectorValues(FloatVectorValues values) {
+            this.values = values;
+            this.normalizedVector = new float[values.dimension()];
+        }
+
+        @Override
+        public int dimension() {
+            return values.dimension();
+        }
+
+        @Override
+        public int size() {
+            return values.size();
+        }
+
+        @Override
+        public int ordToDoc(int ord) {
+            return values.ordToDoc(ord);
+        }
+
+        @Override
+        public float[] vectorValue(int ord) throws IOException {
+            System.arraycopy(values.vectorValue(ord), 0, normalizedVector, 0, normalizedVector.length);
+            VectorUtil.l2normalize(normalizedVector);
+            return normalizedVector;
+        }
+
+        @Override
+        public DocIndexIterator iterator() {
+            return values.iterator();
+        }
+
+        @Override
+        public NormalizedFloatVectorValues copy() throws IOException {
+            return new NormalizedFloatVectorValues(values.copy());
+        }
+    }
+
+    private static final class OffsetCorrectedQuantizedByteVectorValues extends QuantizedByteVectorValues {
+        private final QuantizedByteVectorValues in;
+        private final VectorSimilarityFunction vectorSimilarityFunction;
+        private final ScalarQuantizer scalarQuantizer, oldScalarQuantizer;
+
+        OffsetCorrectedQuantizedByteVectorValues(
+            QuantizedByteVectorValues in,
+            VectorSimilarityFunction vectorSimilarityFunction,
+            ScalarQuantizer scalarQuantizer,
+            ScalarQuantizer oldScalarQuantizer
+        ) {
+            this.in = in;
+            this.vectorSimilarityFunction = vectorSimilarityFunction;
+            this.scalarQuantizer = scalarQuantizer;
+            this.oldScalarQuantizer = oldScalarQuantizer;
+        }
+
+        @Override
+        public float getScoreCorrectionConstant(int ord) throws IOException {
+            return scalarQuantizer.recalculateCorrectiveOffset(in.vectorValue(ord), oldScalarQuantizer, vectorSimilarityFunction);
+        }
+
+        @Override
+        public int dimension() {
+            return in.dimension();
+        }
+
+        @Override
+        public int size() {
+            return in.size();
+        }
+
+        @Override
+        public byte[] vectorValue(int ord) throws IOException {
+            return in.vectorValue(ord);
+        }
+
+        @Override
+        public int ordToDoc(int ord) {
+            return in.ordToDoc(ord);
+        }
+
+        @Override
+        public DocIndexIterator iterator() {
+            return in.iterator();
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml
new file mode 100644
index 0000000000000..d0c571b8538b2
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/plugin-metadata/entitlement-policy.yaml
@@ -0,0 +1,2 @@
+com.nvidia.cuvs:
+  - load_native_libraries
diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
new file mode 100644
index 0000000000000..7aa308150b6de
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -0,0 +1,3 @@
+
+org.elasticsearch.xpack.gpu.codec.ES92GpuHnswVectorsFormat
+org.elasticsearch.xpack.gpu.codec.ES92GpuHnswSQVectorsFormat
diff --git a/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
new file mode 100644
index 0000000000000..63e111db1dd79
--- /dev/null
+++ b/x-pack/plugin/gpu/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification
@@ -0,0 +1,8 @@
+#
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0; you may not use this file except in compliance with the Elastic License
+# 2.0.
+#
+
+org.elasticsearch.xpack.gpu.GPUFeatures
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java
new file mode 100644
index 0000000000000..b466f37cbe9c9
--- /dev/null
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/CuVSResourceManagerTests.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CuVSMatrix;
+import com.nvidia.cuvs.CuVSResources;
+import com.nvidia.cuvs.CuVSResourcesInfo;
+import com.nvidia.cuvs.GPUInfo;
+import com.nvidia.cuvs.GPUInfoProvider;
+
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+import org.elasticsearch.test.ESTestCase;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.LongSupplier;
+
+import static org.hamcrest.Matchers.anyOf;
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.not;
+
+public class CuVSResourceManagerTests extends ESTestCase {
+
+    private static final Logger log = LogManager.getLogger(CuVSResourceManagerTests.class);
+
+    public static final long TOTAL_DEVICE_MEMORY_IN_BYTES = 256L * 1024 * 1024;
+
+    public void testBasic() throws InterruptedException {
+        var mgr = new MockPoolingCuVSResourceManager(2);
+        var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        assertThat(res1.toString(), containsString("id=0"));
+        assertThat(res2.toString(), containsString("id=1"));
+        mgr.release(res1);
+        mgr.release(res2);
+        res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        assertThat(res1.toString(), containsString("id=0"));
+        assertThat(res2.toString(), containsString("id=1"));
+        mgr.release(res1);
+        mgr.release(res2);
+        mgr.shutdown();
+    }
+
+    public void testBlocking() throws Exception {
+        var mgr = new MockPoolingCuVSResourceManager(2);
+        var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+
+        AtomicReference<CuVSResources> holder = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                var res3 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+                holder.set(res3);
+            } catch (InterruptedException e) {
+                throw new AssertionError(e);
+            }
+        });
+        t.start();
+        Thread.sleep(1_000);
+        assertNull(holder.get());
+        mgr.release(randomFrom(res1, res2));
+        t.join();
+        assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1")));
+        mgr.shutdown();
+    }
+
+    public void testBlockingOnInsufficientMemory() throws Exception {
+        var mgr = new MockPoolingCuVSResourceManager(2);
+        var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT);
+
+        AtomicReference<CuVSResources> holder = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                var res2 = mgr.acquire((16 * 1024) + 1, 1024, CuVSMatrix.DataType.FLOAT);
+                holder.set(res2);
+            } catch (InterruptedException e) {
+                throw new AssertionError(e);
+            }
+        });
+        t.start();
+        Thread.sleep(1_000);
+        assertNull(holder.get());
+        mgr.release(res1);
+        t.join();
+        assertThat(holder.get().toString(), anyOf(containsString("id=0"), containsString("id=1")));
+        mgr.shutdown();
+    }
+
+    public void testNotBlockingOnSufficientMemory() throws Exception {
+        var mgr = new MockPoolingCuVSResourceManager(2);
+        var res1 = mgr.acquire(16 * 1024, 1024, CuVSMatrix.DataType.FLOAT);
+
+        AtomicReference<CuVSResources> holder = new AtomicReference<>();
+        Thread t = new Thread(() -> {
+            try {
+                var res2 = mgr.acquire((16 * 1024) - 1, 1024, CuVSMatrix.DataType.FLOAT);
+                holder.set(res2);
+            } catch (InterruptedException e) {
+                throw new AssertionError(e);
+            }
+        });
+        t.start();
+        t.join(5_000);
+        assertNotNull(holder.get());
+        assertThat(holder.get().toString(), not(equalTo(res1.toString())));
+        mgr.shutdown();
+    }
+
+    public void testManagedResIsNotClosable() throws Exception {
+        var mgr = new MockPoolingCuVSResourceManager(1);
+        var res = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        assertThrows(UnsupportedOperationException.class, res::close);
+        mgr.release(res);
+        mgr.shutdown();
+    }
+
+    public void testDoubleRelease() throws InterruptedException {
+        var mgr = new MockPoolingCuVSResourceManager(2);
+        var res1 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        var res2 = mgr.acquire(0, 0, CuVSMatrix.DataType.FLOAT);
+        mgr.release(res1);
+        mgr.release(res2);
+        assertThrows(AssertionError.class, () -> mgr.release(randomFrom(res1, res2)));
+        mgr.shutdown();
+    }
+
+    static class MockPoolingCuVSResourceManager extends CuVSResourceManager.PoolingCuVSResourceManager {
+
+        private final AtomicInteger idGenerator = new AtomicInteger();
+        private final List<Long> allocations;
+
+        MockPoolingCuVSResourceManager(int capacity) {
+            this(capacity, new ArrayList<>());
+        }
+
+        private MockPoolingCuVSResourceManager(int capacity, List<Long> allocationList) {
+            super(capacity, new MockGPUInfoProvider(() -> freeMemoryFunction(allocationList)));
+            this.allocations = allocationList;
+        }
+
+        private static long freeMemoryFunction(List<Long> allocations) {
+            return TOTAL_DEVICE_MEMORY_IN_BYTES - allocations.stream().mapToLong(x -> x).sum();
+        }
+
+        @Override
+        protected CuVSResources createNew() {
+            return new MockCuVSResources(idGenerator.getAndIncrement());
+        }
+
+        @Override
+        public ManagedCuVSResources acquire(int numVectors, int dims, CuVSMatrix.DataType dataType) throws InterruptedException {
+            var res = super.acquire(numVectors, dims, dataType);
+            long memory = (long) (numVectors * dims * Float.BYTES
+                * CuVSResourceManager.PoolingCuVSResourceManager.GPU_COMPUTATION_MEMORY_FACTOR);
+            allocations.add(memory);
+            log.info("Added [{}]", memory);
+            return res;
+        }
+
+        @Override
+        public void release(ManagedCuVSResources resources) {
+            if (allocations.isEmpty() == false) {
+                var x = allocations.removeLast();
+                log.info("Removed [{}]", x);
+            }
+            super.release(resources);
+        }
+    }
+
+    static class MockCuVSResources implements CuVSResources {
+
+        final int id;
+
+        MockCuVSResources(int id) {
+            this.id = id;
+        }
+
+        @Override
+        public ScopedAccess access() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public int deviceId() {
+            return 0;
+        }
+
+        @Override
+        public void close() {}
+
+        @Override
+        public Path tempDirectory() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public String toString() {
+            return "MockCuVSResources[id=" + id + "]";
+        }
+    }
+
+    private static class MockGPUInfoProvider implements GPUInfoProvider {
+        private final LongSupplier freeMemorySupplier;
+
+        MockGPUInfoProvider(LongSupplier freeMemorySupplier) {
+            this.freeMemorySupplier = freeMemorySupplier;
+        }
+
+        @Override
+        public List<GPUInfo> availableGPUs() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public List<GPUInfo> compatibleGPUs() {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public CuVSResourcesInfo getCurrentInfo(CuVSResources cuVSResources) {
+            return new CuVSResourcesInfo(freeMemorySupplier.getAsLong(), TOTAL_DEVICE_MEMORY_IN_BYTES);
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java
new file mode 100644
index 0000000000000..6c43843dbd830
--- /dev/null
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/DatasetUtilsTests.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import com.nvidia.cuvs.CuVSMatrix;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.store.MemorySegmentAccessInput;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+import org.junit.Before;
+
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.ByteOrder;
+
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+
+public class DatasetUtilsTests extends ESTestCase {
+
+    DatasetUtils datasetUtils;
+
+    @Before
+    public void setup() {  // TODO: abstract out setup in to common GPUTestcase
+        assumeTrue("cuvs runtime only supported on 22 or greater, your JDK is " + Runtime.version(), Runtime.version().feature() >= 22);
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+        datasetUtils = DatasetUtils.getInstance();
+    }
+
+    static final ValueLayout.OfFloat JAVA_FLOAT_LE = ValueLayout.JAVA_FLOAT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
+
+    public void testBasic() throws Exception {
+        try (Directory dir = new MMapDirectory(createTempDir("testBasic"))) {
+            int numVecs = randomIntBetween(1, 100);
+            int dims = randomIntBetween(128, 2049);
+
+            try (var out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
+                var ba = new byte[dims * Float.BYTES];
+                var seg = MemorySegment.ofArray(ba);
+                for (int v = 0; v < numVecs; v++) {
+                    var src = MemorySegment.ofArray(randomVector(dims));
+                    MemorySegment.copy(src, JAVA_FLOAT_UNALIGNED, 0L, seg, JAVA_FLOAT_LE, 0L, numVecs);
+                    out.writeBytes(ba, 0, ba.length);
+                }
+            }
+            try (
+                var in = dir.openInput("vector.data", IOContext.DEFAULT);
+                var dataset = datasetUtils.fromInput((MemorySegmentAccessInput) in, numVecs, dims, CuVSMatrix.DataType.FLOAT)
+            ) {
+                assertEquals(numVecs, dataset.size());
+                assertEquals(dims, dataset.columns());
+            }
+        }
+    }
+
+    static final Class<IllegalArgumentException> IAE = IllegalArgumentException.class;
+
+    public void testIllegal() {
+        MemorySegmentAccessInput in = null; // TODO: make this non-null
+        expectThrows(IAE, () -> datasetUtils.fromInput(in, -1, 1, CuVSMatrix.DataType.FLOAT));
+        expectThrows(IAE, () -> datasetUtils.fromInput(in, 1, -1, CuVSMatrix.DataType.FLOAT));
+    }
+
+    float[] randomVector(int dims) {
+        float[] fa = new float[dims];
+        for (int i = 0; i < dims; ++i) {
+            fa[i] = random().nextFloat();
+        }
+        return fa;
+    }
+}
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java
new file mode 100644
index 0000000000000..f1c13b15795c5
--- /dev/null
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswSQVectorsFormatTests.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+import org.junit.BeforeClass;
+
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310")
+public class ES92GpuHnswSQVectorsFormatTests extends BaseKnnVectorsFormatTestCase {
+
+    static {
+        LogConfigurator.loadLog4jPlugins();
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    static Codec codec;
+
+    @BeforeClass
+    public static void beforeClass() {
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+        codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswSQVectorsFormat());
+    }
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+
+    @Override
+    protected VectorSimilarityFunction randomSimilarity() {
+        return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)];
+    }
+
+    @Override
+    protected VectorEncoding randomVectorEncoding() {
+        return VectorEncoding.FLOAT32;
+    }
+
+    @Override
+    public void testRandomBytes() {
+        // No bytes support
+    }
+
+    @Override
+    public void testSortedIndexBytes() {
+        // No bytes support
+    }
+
+    @Override
+    public void testByteVectorScorerIteration() {
+        // No bytes support
+    }
+
+    @Override
+    public void testEmptyByteVectorData() {
+        // No bytes support
+    }
+
+    @Override
+    public void testMergingWithDifferentByteKnnFields() {
+        // No bytes support
+    }
+
+    @Override
+    public void testMismatchedFields() {
+        // No bytes support
+    }
+}
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java
new file mode 100644
index 0000000000000..e7ce310d15d9b
--- /dev/null
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/ES92GpuHnswVectorsFormatTests.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.elasticsearch.common.logging.LogConfigurator;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+import org.junit.BeforeClass;
+
+// CuVS prints tons of logs to stdout
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "https://github.com/rapidsai/cuvs/issues/1310")
+public class ES92GpuHnswVectorsFormatTests extends BaseKnnVectorsFormatTestCase {
+
+    static {
+        LogConfigurator.loadLog4jPlugins();
+        LogConfigurator.configureESLogging(); // native access requires logging to be initialized
+    }
+
+    static Codec codec;
+
+    @BeforeClass
+    public static void beforeClass() {
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+        codec = TestUtil.alwaysKnnVectorsFormat(new ES92GpuHnswVectorsFormat());
+    }
+
+    @Override
+    protected Codec getCodec() {
+        return codec;
+    }
+
+    @Override
+    protected VectorSimilarityFunction randomSimilarity() {
+        return VectorSimilarityFunction.values()[random().nextInt(VectorSimilarityFunction.values().length)];
+    }
+
+    @Override
+    protected VectorEncoding randomVectorEncoding() {
+        return VectorEncoding.FLOAT32;
+    }
+
+    @Override
+    public void testRandomBytes() throws Exception {
+        // No bytes support
+    }
+
+    @Override
+    public void testSortedIndexBytes() throws Exception {
+        // No bytes support
+    }
+
+    @Override
+    public void testByteVectorScorerIteration() throws Exception {
+        // No bytes support
+    }
+
+    @Override
+    public void testEmptyByteVectorData() throws Exception {
+        // No bytes support
+    }
+
+    @Override
+    public void testMergingWithDifferentByteKnnFields() throws Exception {
+        // No bytes support
+    }
+
+    @Override
+    public void testMismatchedFields() throws Exception {
+        // No bytes support
+    }
+
+}
diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java
new file mode 100644
index 0000000000000..2648691d03eec
--- /dev/null
+++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.gpu.codec;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.index.codec.CodecService;
+import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec;
+import org.elasticsearch.index.codec.PerFieldMapperCodec;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTests;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.xpack.gpu.GPUPlugin;
+import org.elasticsearch.xpack.gpu.GPUSupport;
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+
+import static org.hamcrest.Matchers.instanceOf;
+
+public class GPUDenseVectorFieldMapperTests extends DenseVectorFieldMapperTests {
+
+    @BeforeClass
+    public static void setup() {
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+    }
+
+    @Override
+    protected Collection<Plugin> getPlugins() {
+        var plugin = new GPUPlugin();
+        return Collections.singletonList(plugin);
+    }
+
+    @Override
+    public void testKnnVectorsFormat() throws IOException {
+        // TODO improve test with custom parameters
+        KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("hnsw");
+        String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, "
+            + "maxConn=16, beamWidth=128, flatVectorFormat=Lucene99FlatVectorsFormat)";
+        assertEquals(expectedStr, knnVectorsFormat.toString());
+    }
+
+    @Override
+    public void testKnnQuantizedHNSWVectorsFormat() throws IOException {
+        // TOD improve the test with custom parameters
+        KnnVectorsFormat knnVectorsFormat = getKnnVectorsFormat("int8_hnsw");
+        String expectedStr = "Lucene99HnswVectorsFormat(name=Lucene99HnswVectorsFormat, "
+            + "maxConn=16, beamWidth=128, flatVectorFormat=ES814ScalarQuantizedVectorsFormat";
+        assertTrue(knnVectorsFormat.toString().startsWith(expectedStr));
+    }
+
+    private KnnVectorsFormat getKnnVectorsFormat(String indexOptionsType) throws IOException {
+        final int dims = randomIntBetween(128, 4096);
+        MapperService mapperService = createMapperService(fieldMapping(b -> {
+            b.field("type", "dense_vector");
+            b.field("dims", dims);
+            b.field("index", true);
+            b.field("similarity", "dot_product");
+            b.startObject("index_options");
+            b.field("type", indexOptionsType);
+            b.endObject();
+        }));
+        CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE);
+        Codec codec = codecService.codec("default");
+        if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) {
+            assertThat(codec, instanceOf(PerFieldMapperCodec.class));
+            return ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field");
+        } else {
+            if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) {
+                codec = deduplicateFieldInfosCodec.delegate();
+            }
+            assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class));
+            return ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field");
+        }
+    }
+}
diff --git a/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java
new file mode 100644
index 0000000000000..c4e7e936b0111
--- /dev/null
+++ b/x-pack/plugin/gpu/src/yamlRestTest/java/org/elasticsearch/xpack/gpu/GPUClientYamlTestSuiteIT.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+package org.elasticsearch.xpack.gpu;
+
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.test.cluster.ElasticsearchCluster;
+import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
+import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+
+public class GPUClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
+
+    @BeforeClass
+    public static void setup() {
+        assumeTrue("cuvs not supported", GPUSupport.isSupported(false));
+    }
+
+    @ClassRule
+    public static ElasticsearchCluster cluster = createCluster();
+
+    private static ElasticsearchCluster createCluster() {
+        var builder = ElasticsearchCluster.local()
+            .nodes(1)
+            .module("gpu")
+            .setting("xpack.license.self_generated.type", "trial")
+            .setting("xpack.security.enabled", "false");
+
+        var libraryPath = System.getenv("LD_LIBRARY_PATH");
+        if (libraryPath != null) {
+            builder.environment("LD_LIBRARY_PATH", libraryPath);
+        }
+        return builder.build();
+    }
+
+    public GPUClientYamlTestSuiteIT(final ClientYamlTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws Exception {
+        return ESClientYamlSuiteTestCase.createParameters();
+    }
+
+    @Override
+    protected String getTestRestCluster() {
+        return cluster.getHttpAddresses();
+    }
+}
diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml
new file mode 100644
index 0000000000000..28cce941f0916
--- /dev/null
+++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/10_hnsw.yml
@@ -0,0 +1,139 @@
+---
+"Test GPU vector operations":
+
+  - requires:
+      cluster_features: [ "vectors.indexing.use_gpu" ]
+      reason: "A cluster should have a GPU plugin to run these tests"
+
+  # creating an index is successful even if the GPU is not available
+  - do:
+      indices.create:
+        index: my_vectors
+        body:
+          mappings:
+            properties:
+              embedding:
+                type: dense_vector
+                dims: 24
+                similarity: l2_norm
+                index_options:
+                  type: hnsw
+          settings:
+            index.number_of_shards: 1
+            index.vectors.indexing.use_gpu: true
+  - match: { error: null }
+
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - index:
+              _id: "1"
+          - text: "First document"
+            embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+          - index:
+              _id: "2"
+          - text: "Second document"
+            embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]
+          - index:
+              _id: "3"
+          - text: "Third document"
+            embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
+  - match: { errors: false }
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - index:
+              _id: "4"
+          - text: "Fourth document"
+            embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]
+          - index:
+              _id: "5"
+          - text: "Fifth document"
+            embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
+          - index:
+              _id: "6"
+          - text: "Sixth document"
+            embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6]
+          - index:
+              _id: "7"
+          - text: "Seventh document"
+            embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
+            k: 2
+
+  - match: { hits.hits.0._id: "7" }
+  - match: { hits.hits.1._id: "6" }
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - delete:
+              _id: "1"
+          - delete:
+              _id: "7"
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
+            k: 2
+  - match: { hits.hits.0._id: "6" }
+  - match: { hits.hits.1._id: "5" }
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - index:
+              _id: "6"
+          - text: "Sixth document"
+            embedding: [0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16, 0.16]
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+            k: 2
+  - match: { hits.hits.0._id: "6" }
+  - match: { hits.hits.1._id: "2" }
+
+  - do:
+      indices.forcemerge:
+        index: my_vectors
+        max_num_segments: 1
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
+            k: 2
+  - match: { hits.hits.0._id: "5" }
+  - match: { hits.hits.1._id: "4" }
diff --git a/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml
new file mode 100644
index 0000000000000..e0a6f42409b66
--- /dev/null
+++ b/x-pack/plugin/gpu/src/yamlRestTest/resources/rest-api-spec/test/gpu/20_int8_hnsw.yml
@@ -0,0 +1,148 @@
+---
+"Test GPU vector operations":
+
+  - requires:
+      cluster_features: [ "vectors.indexing.use_gpu" ]
+      reason: "A cluster should have a GPU plugin to run these tests"
+
+  # creating an index is successful even if the GPU is not available
+  - do:
+      indices.create:
+        index: my_vectors
+        body:
+          mappings:
+            properties:
+              embedding:
+                type: dense_vector
+                dims: 24
+                similarity: l2_norm
+                index_options:
+                  type: int8_hnsw
+          settings:
+            index.number_of_shards: 1
+            index.vectors.indexing.use_gpu: true
+            index.refresh_interval: -1 # disable automatic refresh to ensure documents are indexed together
+  - match: { error: null }
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - index:
+              _id: "1"
+          - text: "First document"
+            embedding: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+          - index:
+              _id: "2"
+          - text: "Second document"
+            embedding: [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]
+          - index:
+              _id: "3"
+          - text: "Third document"
+            embedding: [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
+          - index:
+              _id: "4"
+          - text: "Fourth document"
+            embedding: [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4]
+          - index:
+              _id: "5"
+          - text: "Fifth document"
+            embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
+          - index:
+              _id: "6"
+          - text: "Sixth document"
+            embedding: [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6]
+          - index:
+              _id: "7"
+          - text: "Seventh document"
+            embedding: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7]
+          - index:
+              _id : "8"
+          - text: "Eighth document"
+            embedding: [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8]
+          - index:
+              _id: "9"
+          - text: "Ninth document"
+            embedding: [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]
+          - index:
+              _id: "10"
+          - text: "Tenth document"
+            embedding: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+  - match: { errors: false }
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            k: 2
+  - match: { hits.hits.0._id: "10" }
+  - match: { hits.hits.1._id: "9" }
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - delete:
+              _id: "1"
+          - delete:
+              _id: "10"
+  - match: { errors: false }
+
+  - do:
+      indices.forcemerge:
+        index: my_vectors
+        max_num_segments: 1
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+            k: 2
+  - match: { hits.hits.0._id: "9" }
+  - match: { hits.hits.1._id: "8" }
+
+
+  - do:
+      bulk:
+        index: my_vectors
+        refresh: true
+        body:
+          - index:
+              _id: "2"
+          - text: "Second document"
+            embedding: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+
+  - do:
+      indices.forcemerge:
+        index: my_vectors
+        max_num_segments: 1
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+            k: 2
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.1._id: "9" }
+
+  - do:
+      search:
+        index: my_vectors
+        body:
+          knn:
+            field: embedding
+            query_vector: [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
+            k: 2
+  - match: { hits.hits.0._id: "3" }
+  - match: { hits.hits.1._id: "4" }
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
index b7c01ce817b32..525d343826075 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
@@ -1265,7 +1265,8 @@ private static Mapper.Builder createEmbeddingsField(
                 DenseVectorFieldMapper.Builder denseVectorMapperBuilder = new DenseVectorFieldMapper.Builder(
                     CHUNKED_EMBEDDINGS_FIELD,
                     indexVersionCreated,
-                    false
+                    false,
+                    List.of()
                 );
 
                 configureDenseVectorMapperBuilder(indexVersionCreated, denseVectorMapperBuilder, modelSettings, indexOptions);