From 52630aae83da9d613c97fdcff76b5e1849a35c3c Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 25 Sep 2025 16:09:46 -0400 Subject: [PATCH 1/3] Add notation for chunk rescorer --- output/schema/schema.json | 196 ++++++++++++++++-- output/typescript/types.ts | 15 ++ specification/_types/Retriever.ts | 17 +- .../_types/mapping/ChunkingSettings.ts | 14 ++ 4 files changed, 225 insertions(+), 17 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index 6803d265e4..ed56732d19 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -50602,6 +50602,40 @@ } } }, + { + "kind": "interface", + "name": { + "name": "ChunkRescorer", + "namespace": "_types" + }, + "properties": [ + { + "description": "The number of chunks per document to evaluate for reranking.", + "name": "size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Chunking settings to apply", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ChunkRescorerChunkingSettings", + "namespace": "_types.mapping" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L176-L181" + }, { "kind": "type_alias", "name": { @@ -53682,7 +53716,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L85-L89" + "specLocation": "_types/Retriever.ts#L86-L90" }, { "kind": "type_alias", @@ -53938,7 +53972,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L115-L133" + "specLocation": "_types/Retriever.ts#L116-L134" }, { "kind": "interface", @@ -54241,7 +54275,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L68-L75" + "specLocation": "_types/Retriever.ts#L69-L76" }, { "kind": "type_alias", @@ -55283,7 +55317,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L77-L83" + "specLocation": "_types/Retriever.ts#L78-L84" }, { "kind": "type_alias", @@ -55668,7 +55702,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L135-L144" + "specLocation": "_types/Retriever.ts#L136-L145" }, { "kind": "interface", @@ -56076,7 +56110,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L62-L66" + "specLocation": "_types/Retriever.ts#L63-L67" }, { "kind": "enum", @@ -56197,7 +56231,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L53-L60" + "specLocation": "_types/Retriever.ts#L54-L61" }, { "kind": "interface", @@ -56303,7 +56337,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L28-L51", + "specLocation": "_types/Retriever.ts#L29-L52", "variants": { "kind": "container" } @@ -56436,7 +56470,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L159-L168" + "specLocation": "_types/Retriever.ts#L165-L174" }, { "kind": "type_alias", @@ -56511,7 +56545,7 @@ "name": "ScoreNormalizer", "namespace": "_types" }, - "specLocation": "_types/Retriever.ts#L91-L95" + "specLocation": "_types/Retriever.ts#L92-L96" }, { "kind": "interface", @@ -58055,7 +58089,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L97-L100" + "specLocation": "_types/Retriever.ts#L98-L101" }, { "kind": "interface", @@ -58131,7 +58165,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L102-L113" + "specLocation": "_types/Retriever.ts#L103-L114" }, { "kind": "interface", @@ -58475,7 +58509,7 @@ } }, { - "description": "The text snippet used as the basis for similarity comparison", + "description": "The text snippet used as the basis for similarity comparison.", "name": "inference_text", "required": true, "type": { @@ -58487,7 +58521,7 @@ } }, { - "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text.", "name": "field", "required": true, "type": { @@ -58497,9 +58531,30 @@ "namespace": "_builtins" } } + }, + { + "availability": { + "serverless": { + "stability": "experimental" + }, + "stack": { + "since": "9.2.0", + "stability": "experimental" + } + }, + "description": "Whether to rescore on only the best matching chunks.", + "name": "chunk_rescorer", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ChunkRescorer", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L146-L157" + "specLocation": "_types/Retriever.ts#L147-L163" }, { "kind": "enum", @@ -82451,6 +82506,117 @@ ], "specLocation": "_types/mapping/core.ts#L188-L191" }, + { + "kind": "interface", + "attachedBehaviors": [ + "OverloadOf" + ], + "behaviors": [ + { + "generics": [ + { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + ], + "type": { + "name": "OverloadOf", + "namespace": "_spec_utils" + } + } + ], + "name": { + "name": "ChunkRescorerChunkingSettings", + "namespace": "_types.mapping" + }, + "properties": [ + { + "description": "The chunking strategy: `sentence`, `word`, `none` or `recursive`.\n\n * If `strategy` is set to `recursive`, you must also specify:\n\n- `max_chunk_size`\n- either `separators` or`separator_group`\n\nLearn more about different chunking strategies in the linked documentation.", + "extDocId": "chunking-strategies", + "extDocUrl": "https://www.elastic.co/docs/explore-analyze/elastic-inference/inference-api#chunking-strategies", + "name": "strategy", + "required": false, + "serverDefault": "sentence", + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Only applicable to the `recursive` strategy and required when using it.\n\nSets a predefined list of separators in the saved chunking settings based on the selected text type.\nValues can be `markdown` or `plaintext`.\n\nUsing this parameter is an alternative to manually specifying a custom `separators` list.", + "name": "separator_group", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "Only applicable to the `recursive` strategy and required when using it.\n\nA list of strings used as possible split points when chunking text.\n\nEach string can be a plain string or a regular expression (regex) pattern.\nThe system tries each separator in order to split the text, starting from the first item in the list.\n\nAfter splitting, it attempts to recombine smaller pieces into larger chunks that stay within\nthe `max_chunk_size` limit, to reduce the total number of chunks generated.", + "name": "separators", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The maximum size of a chunk in words.\nThis value cannot be lower than `20` (for `sentence` strategy) or `10` (for `word` strategy).\nThis value should not exceed the window size for the associated model.", + "name": "max_chunk_size", + "required": true, + "serverDefault": 250, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The number of overlapping words for chunks.\nIt is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.", + "name": "overlap", + "required": false, + "serverDefault": 100, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "The number of overlapping sentences for chunks.\nIt is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.", + "name": "sentence_overlap", + "required": false, + "serverDefault": 1, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "_types/mapping/ChunkingSettings.ts#L38-L50" + }, { "kind": "interface", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index de561b77c2..9f2455a4bd 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -2222,6 +2222,11 @@ export type Bytes = 'b' | 'kb' | 'mb' | 'gb' | 'tb' | 'pb' export type CategoryId = string +export interface ChunkRescorer { + size?: integer + chunking_settings?: MappingChunkRescorerChunkingSettings +} + export type ClusterAlias = string export interface ClusterDetails { @@ -3020,6 +3025,7 @@ export interface TextSimilarityReranker extends RetrieverBase { inference_id?: string inference_text: string field: string + chunk_rescorer?: ChunkRescorer } export type ThreadType = 'cpu' | 'wait' | 'block' | 'gpu' | 'mem' @@ -5588,6 +5594,15 @@ export interface MappingByteNumberProperty extends MappingNumberPropertyBase { null_value?: byte } +export interface MappingChunkRescorerChunkingSettings { + strategy?: string + separator_group?: string + separators?: string[] + max_chunk_size: integer + overlap?: integer + sentence_overlap?: integer +} + export interface MappingChunkingSettings { strategy: string separator_group?: string diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index 0e4f4f4294..a7f729cca1 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -24,6 +24,7 @@ import { Rescore } from '@global/search/_types/rescoring' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { Id, IndexName } from './common' import { QueryContainer } from './query_dsl/abstractions' +import { ChunkRescorerChunkingSettings } from './mapping/ChunkingSettings' /** * @variants container @@ -150,10 +151,15 @@ export class TextSimilarityReranker extends RetrieverBase { rank_window_size?: integer /** Unique identifier of the inference endpoint created using the inference API. */ inference_id?: string - /** The text snippet used as the basis for similarity comparison */ + /** The text snippet used as the basis for similarity comparison. */ inference_text: string - /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text */ + /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text. */ field: string + /** Whether to rescore on only the best matching chunks. + * @availability stack since=9.2.0 stability=experimental + * @availability serverless stability=experimental + */ + chunk_rescorer?: ChunkRescorer } export class RuleRetriever extends RetrieverBase { @@ -166,3 +172,10 @@ export class RuleRetriever extends RetrieverBase { /** This value determines the size of the individual result set. */ rank_window_size?: integer } + +export class ChunkRescorer { + /** The number of chunks per document to evaluate for reranking. */ + size?: integer, + /** Chunking settings to apply */ + chunking_settings?: ChunkRescorerChunkingSettings +} diff --git a/specification/_types/mapping/ChunkingSettings.ts b/specification/_types/mapping/ChunkingSettings.ts index 75454d0956..e3d7e418c5 100644 --- a/specification/_types/mapping/ChunkingSettings.ts +++ b/specification/_types/mapping/ChunkingSettings.ts @@ -34,3 +34,17 @@ export class ChunkingSettings implements OverloadOf { sentence_overlap?: integer } + +export class ChunkRescorerChunkingSettings implements OverloadOf { + strategy?: string + + separator_group?: string + + separators?: string[] + + max_chunk_size: integer + + overlap?: integer + + sentence_overlap?: integer +} From ec18411069ab0c89e5ef8313b7e72edf307a3e8d Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 25 Sep 2025 16:31:32 -0400 Subject: [PATCH 2/3] Mark as beta rather than experimental --- output/schema/schema.json | 4 ++-- specification/_types/Retriever.ts | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/output/schema/schema.json b/output/schema/schema.json index ed56732d19..2013f537bb 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -58535,11 +58535,11 @@ { "availability": { "serverless": { - "stability": "experimental" + "stability": "beta" }, "stack": { "since": "9.2.0", - "stability": "experimental" + "stability": "beta" } }, "description": "Whether to rescore on only the best matching chunks.", diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index a7f729cca1..8cc41cc4b4 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -156,8 +156,8 @@ export class TextSimilarityReranker extends RetrieverBase { /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text. */ field: string /** Whether to rescore on only the best matching chunks. - * @availability stack since=9.2.0 stability=experimental - * @availability serverless stability=experimental + * @availability stack since=9.2.0 stability=beta + * @availability serverless stability=beta */ chunk_rescorer?: ChunkRescorer } @@ -175,7 +175,7 @@ export class RuleRetriever extends RetrieverBase { export class ChunkRescorer { /** The number of chunks per document to evaluate for reranking. */ - size?: integer, + size?: integer /** Chunking settings to apply */ chunking_settings?: ChunkRescorerChunkingSettings } From 75c7e7314000160e8a81d5bfd383cddab1a766b8 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 25 Sep 2025 16:35:39 -0400 Subject: [PATCH 3/3] Linting --- specification/_types/Retriever.ts | 6 +++--- specification/_types/mapping/ChunkingSettings.ts | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index 8cc41cc4b4..7ab388bc35 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -23,8 +23,8 @@ import { FieldCollapse } from '@global/search/_types/FieldCollapse' import { Rescore } from '@global/search/_types/rescoring' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { Id, IndexName } from './common' -import { QueryContainer } from './query_dsl/abstractions' import { ChunkRescorerChunkingSettings } from './mapping/ChunkingSettings' +import { QueryContainer } from './query_dsl/abstractions' /** * @variants container @@ -155,10 +155,10 @@ export class TextSimilarityReranker extends RetrieverBase { inference_text: string /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text. */ field: string - /** Whether to rescore on only the best matching chunks. + /** Whether to rescore on only the best matching chunks. * @availability stack since=9.2.0 stability=beta * @availability serverless stability=beta - */ + */ chunk_rescorer?: ChunkRescorer } diff --git a/specification/_types/mapping/ChunkingSettings.ts b/specification/_types/mapping/ChunkingSettings.ts index e3d7e418c5..6f6efdcec1 100644 --- a/specification/_types/mapping/ChunkingSettings.ts +++ b/specification/_types/mapping/ChunkingSettings.ts @@ -35,7 +35,9 @@ export class ChunkingSettings implements OverloadOf { sentence_overlap?: integer } -export class ChunkRescorerChunkingSettings implements OverloadOf { +export class ChunkRescorerChunkingSettings + implements OverloadOf +{ strategy?: string separator_group?: string