From 8f6236eaf57fc50f87b466c63d8afca93e082f63 Mon Sep 17 00:00:00 2001
From: Jeff Vestal <53237856+jeffvestal@users.noreply.github.com>
Date: Thu, 15 Aug 2024 08:41:59 -0500
Subject: [PATCH] blog noteboook
---
..._The_RAG_Really_Tied_the_App_Togetheripynb | 5624 +++++++++++++++++
1 file changed, 5624 insertions(+)
create mode 100644 supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb
diff --git a/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb b/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb
new file mode 100644
index 00000000..69eebff3
--- /dev/null
+++ b/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb
@@ -0,0 +1,5624 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# ChatGPT and Elasticsearch: The RAG Really Tied the App Together\n",
+ "\n",
+ "\n",
+ "## This notebook will show you how to:\n",
+ " - Create an Elastics Serverless Project\n",
+ "- Setup an Inference API\n",
+ " - This will download and deploy ELSER for embedding inference\n",
+ "- Create an index template\n",
+ " - This will use `semantic_text` which will auto-chunk and embed the body of text\n",
+ "- Use the Elastic Open Crawler to crawl the Elastic Search/Observability/Security Labs\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "## The [accompying blog](https://www.elastic.co/search-labs/blog/app/search-labs/blog/rag-ties-the-room-together/) takes it further by showing you how to:\n",
+ "- Use Playground to test chat prompts and configurations\n",
+ " - Then generate queries for our RAG app\n",
+ "- Use the queries from Playground to finish out a RAG Chatbot app\n",
+ " - Python FastAPI backend with React frontend"
+ ],
+ "metadata": {
+ "id": "_ebYbHHh_0hI"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install elasticsearch"
+ ],
+ "metadata": {
+ "id": "_DmXlQWsGNeM"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import requests\n",
+ "import getpass\n",
+ "from pprint import pprint\n",
+ "from elasticsearch import Elasticsearch\n",
+ "from elasticsearch.exceptions import ConnectionTimeout\n",
+ "from time import sleep\n",
+ "from IPython.display import clear_output\n"
+ ],
+ "metadata": {
+ "id": "cuomUVE-zYjB"
+ },
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Project Setup"
+ ],
+ "metadata": {
+ "id": "HOOv0igTKjMS"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Enter your Cloud API Key\n",
+ "\n",
+ "Generate your secret API key at https://cloud.elastic.co/account/keys"
+ ],
+ "metadata": {
+ "id": "yWSg_D91x9mF"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Prompt the user for input while masking it for security\n",
+ "api_key = getpass.getpass(\"Enter your API key: \")\n",
+ "\n",
+ "print(\"API key successfully entered!\")\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "bidHlfsy2OPf",
+ "outputId": "ba8305a6-85d7-4173-fde4-166e77c4971e"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Enter your API key: ··········\n",
+ "API key successfully entered!\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Create Elasticsearch project\n",
+ "[Serverless API Docs](https://www.elastic.co/docs/api/doc/elastic-cloud-serverless/operation/operation-createelasticsearchproject#operation-createelasticsearchproject-body-application-json-optimized_for)"
+ ],
+ "metadata": {
+ "id": "mt4_kL0b0E75"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "url = \"https://api.elastic-cloud.com/api/v1/serverless/projects/elasticsearch\"\n",
+ "\n",
+ "project_data = {\n",
+ " \"name\": \"The RAG Really Tied the App Together\",\n",
+ " \"region_id\": \"aws-us-east-1\",\n",
+ " \"optimized_for\": \"vector\"\n",
+ "}\n",
+ "\n",
+ "auth_header = f\"ApiKey {api_key}\"\n",
+ "headers = {\n",
+ " \"Content-Type\": \"application/json\",\n",
+ " \"Authorization\": auth_header\n",
+ "}\n",
+ "\n",
+ "es_project = requests.post(url, json=project_data, headers=headers)\n",
+ "\n",
+ "if 200 <= es_project.status_code < 300:\n",
+ " es_project_keys = es_project.json()\n",
+ " prg_name = es_project_keys['name']\n",
+ " print(f\"Project {prg_name} creation started\")\n",
+ "\n",
+ " # wait for the project to be initialized and ready\n",
+ " project_id = es_project.json()['id']\n",
+ " print('Checking if project is created and ready')\n",
+ " loop = 1\n",
+ " while True:\n",
+ " es_project_check = requests.get(url +f'/{project_id}/status', headers=headers)\n",
+ " if es_project_check.json()['phase'] == 'initialized':\n",
+ " break\n",
+ " else:\n",
+ " clear_output(wait=True)\n",
+ " print(f\"Waiting for project to be ready. Current status:{es_project_check.json()['phase']} - Loop {loop} Sleeping 10 seconds\")\n",
+ " sleep(10)\n",
+ " loop += 1\n",
+ "\n",
+ " print('Project is ready')\n",
+ "\n",
+ "else:\n",
+ " print(es_project.text)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "lVkyA7KUyDEO",
+ "outputId": "8dd818ba-9c41-4e90-d718-319dbfcf6062"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Waiting for project to be ready. Current status:initializing - Loop 7 Sleeping 10 seconds\n",
+ "Project is ready\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Create elasticsearch client"
+ ],
+ "metadata": {
+ "id": "Uh0JpsnONMhv"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "es = Elasticsearch(es_project_keys['endpoints']['elasticsearch'],\n",
+ " basic_auth=(es_project_keys['credentials']['username'],\n",
+ " es_project_keys['credentials']['password']\n",
+ " )\n",
+ " )"
+ ],
+ "metadata": {
+ "id": "KG01YrIwMdHz"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Project API Key\n",
+ "Create a [Project level API key](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html)"
+ ],
+ "metadata": {
+ "id": "Xhu2U-YszbDe"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "project_key_response = es.security.create_api_key(\n",
+ " name= \"full_access_key\",\n",
+ " metadata = { \"description\": \"API key for full access\"},\n",
+ " expiration= \"14d\",\n",
+ ")\n",
+ "\n",
+ "project_api_key = project_key_response['encoded']\n",
+ "print (f\"{project_key_response['name']} has been created\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Puj1UWIKVtSv",
+ "outputId": "0f5d6937-1204-4f35-9f12-d9eb517cf675"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "full_access_key has been created\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Inference API and Index Setup"
+ ],
+ "metadata": {
+ "id": "C-J2hDsVWIut"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Inference API\n",
+ "This will:\n",
+ "- Create an inference API endpoint\n",
+ "- Download ELSER model (if not already downloaded)\n",
+ "- Deploy ELSER model with `service_settings` configs\n",
+ "\n",
+ "Note - This will wait for ELSER to be downloaded and deployed"
+ ],
+ "metadata": {
+ "id": "AjGmk-jwXi_4"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model_config = {\n",
+ " \"service\": \"elser\",\n",
+ " \"service_settings\": {\n",
+ " \"num_allocations\": 8,\n",
+ " \"num_threads\": 1\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "inference_id = \"my-elser-model\"\n",
+ "\n",
+ "try:\n",
+ " create_endpoint = es.inference.put_model(\n",
+ " inference_id=inference_id,\n",
+ " task_type=\"sparse_embedding\",\n",
+ " body=model_config\n",
+ " )\n",
+ "\n",
+ "except ConnectionTimeout:\n",
+ " print(\"Connection timed out. This can happen while waiting for the Inference model to fully deploy and start.\")\n",
+ "finally:\n",
+ " print(\"Waiting for inference model to be fully deployed\")\n",
+ " inf_info = es.inference.get_model(inference_id=inference_id)\n",
+ " model_id = inf_info.body['endpoints'][0]['service_settings']['model_id']\n",
+ "\n",
+ " while True:\n",
+ " try:\n",
+ " model_stats = es.ml.get_trained_models_stats(model_id=model_id)\n",
+ " routing_state = model_stats.body['trained_model_stats'][0]['deployment_stats']['nodes'][0]['routing_state']['routing_state']\n",
+ "\n",
+ " if routing_state == 'started':\n",
+ " print(\"Inference API created and Inference model is fully deployed.\")\n",
+ " break\n",
+ " else:\n",
+ " clear_output(wait=True)\n",
+ " print(\"Waiting for inference model to be fully deployed\")\n",
+ " sleep(5)\n",
+ " except (IndexError, KeyError): # Handle missing data in the response\n",
+ " clear_output(wait=True)\n",
+ " print(\"Still waiting for model deployment...\")\n",
+ " sleep(5)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "yblutX5J1LT1",
+ "outputId": "9810ce99-1d6f-413e-d4ba-89ed6b4391e4"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Waiting for inference model to be fully deployed\n",
+ "Inference API created and Inference model is fully deployed.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Create index template\n",
+ "The two key fields here are:\n",
+ "- body\n",
+ " - the field with the body of text and we use that as the source to copy to our semantic text field `semantic_body`\n",
+ "- semantic_body\n",
+ " - This field will automatically handle chunking and generating embeddings"
+ ],
+ "metadata": {
+ "id": "hixAZWcxkBkZ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "template_body = {\n",
+ " \"index_patterns\": [\"elastic-labs*\"],\n",
+ " \"template\": {\n",
+ " \"mappings\": {\n",
+ " \"properties\": {\n",
+ " \"body\": {\n",
+ " \"type\": \"text\",\n",
+ " \"copy_to\": \"semantic_body\"\n",
+ " },\n",
+ " \"semantic_body\": {\n",
+ " \"type\": \"semantic_text\",\n",
+ " \"inference_id\": \"my-elser-model\"\n",
+ " },\n",
+ " \"headings\": {\n",
+ " \"type\": \"text\"\n",
+ " },\n",
+ " \"id\": {\n",
+ " \"type\": \"keyword\"\n",
+ " },\n",
+ " \"meta_description\": {\n",
+ " \"type\": \"text\"\n",
+ " },\n",
+ " \"title\": {\n",
+ " \"type\": \"text\"\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "template_resp = es.indices.put_index_template(\n",
+ " name=\"labs_template\",\n",
+ " body=template_body\n",
+ ")\n",
+ "\n",
+ "print(template_resp.body)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "EBEyEVg1kDh2",
+ "outputId": "3355ee8d-30c5-4675-f039-77675cae1ba3"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'acknowledged': True}\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Crawl the docs"
+ ],
+ "metadata": {
+ "id": "hm65dUPTBTpb"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Open Crawler\n",
+ "This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab\n",
+ "\n",
+ "The [blog details the steps](https://www.elastic.co/search-labs/blog/app/search-labs/blog/rag-ties-the-room-together#crawl-all-the-labs) below running on a Macbook\n",
+ "\n",
+ "You can also review the [Open Crawler setup](https://github.com/elastic/crawler?tab=readme-ov-file#setup)."
+ ],
+ "metadata": {
+ "id": "EJ5D8bh3BWX5"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## High level steps to configure and run crawler\n",
+ "*This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab*\n",
+ "\n",
+ "- Clone the repo\n",
+ " - `git clone git@github.com:elastic/crawler.git`\n",
+ "- Build the Open Crawler Docker container\n",
+ " - `docker build -t crawler-image . && docker run -i -d --name crawler crawler-image`\n",
+ "- Create a new config file\n",
+ " - `vi config/elastic-labs.yml`\n",
+ " - run the _generate config_ cell below then paste the output in the config file and save.\n",
+ "- Copy the new local config into the container\n",
+ " - `docker cp config/elastic-labs.yml crawler:/app/config/elastic-labs.yml`\n",
+ "- Run the crawler\n",
+ " - `docker exec -it crawler bin/crawler crawl config/elastic-labs.yml`"
+ ],
+ "metadata": {
+ "id": "WMjjJTXR_hhD"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Generate Config\n",
+ "Run the below cell to generate the yml config file"
+ ],
+ "metadata": {
+ "id": "2ZB6L76Y8thR"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "config = f\"\"\"\n",
+ "domains:\n",
+ " - url: https://www.elastic.co\n",
+ " seed_urls:\n",
+ " - https://www.elastic.co/search-labs\n",
+ " - https://www.elastic.co/observability-labs\n",
+ " - https://www.elastic.co/security-labs\n",
+ " crawl_rules:\n",
+ " - policy: allow\n",
+ " type: begins\n",
+ " pattern: /search-labs\n",
+ " - policy: allow\n",
+ " type: begins\n",
+ " pattern: /observability-labs\n",
+ " - policy: allow\n",
+ " type: begins\n",
+ " pattern: /security-labs\n",
+ " - policy:deny\n",
+ " type: regex\n",
+ " pattern: .*/author/.*\n",
+ " - policy: deny\n",
+ " type: regex\n",
+ " pattern: .*\n",
+ "\n",
+ "output_sink: elasticsearch\n",
+ "output_index: elastic-labs\n",
+ "max_crawl_depth: 25\n",
+ "\n",
+ "elasticsearch:\n",
+ " host: \"{es_project.json()['endpoints']['elasticsearch']}\"\n",
+ " port: \"443\"\n",
+ " api_key: \"{project_api_key}\"\n",
+ " bulk_api.max_items: 10\n",
+ "\"\"\"\n",
+ "\n",
+ "print(config)"
+ ],
+ "metadata": {
+ "id": "2XDfDkdM85lN"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Confirm the docs have been crawled"
+ ],
+ "metadata": {
+ "id": "v7wxtXHABiC8"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "First look at the count of docs for each Labs' site"
+ ],
+ "metadata": {
+ "id": "a6mItejyBovz"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "query = {\n",
+ " \"size\": 0,\n",
+ " \"aggs\": {\n",
+ " \"url_path_dir1\": {\n",
+ " \"terms\": {\n",
+ " \"field\": \"url_path_dir1.keyword\"\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "response = es.search(index=\"elastic-labs\", body=query)\n",
+ "pprint(response.body)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_OZxWsx_BVy_",
+ "outputId": "e7333fa5-37b9-46c7-96f1-622d98e52521"
+ },
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},\n",
+ " 'aggregations': {'url_path_dir1': {'buckets': [{'doc_count': 216,\n",
+ " 'key': 'search-labs'},\n",
+ " {'doc_count': 214,\n",
+ " 'key': 'security-labs'},\n",
+ " {'doc_count': 158,\n",
+ " 'key': 'observability-labs'}],\n",
+ " 'doc_count_error_upper_bound': 0,\n",
+ " 'sum_other_doc_count': 0}},\n",
+ " 'hits': {'hits': [],\n",
+ " 'max_score': None,\n",
+ " 'total': {'relation': 'eq', 'value': 588}},\n",
+ " 'timed_out': False,\n",
+ " 'took': 6}\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Next review a sample doc"
+ ],
+ "metadata": {
+ "id": "UnTStG_TCpp1"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "query = {\n",
+ " \"size\": 1,\n",
+ " \"query\": {\n",
+ " \"match\": {\n",
+ " \"url_path_dir2\": \"blog\"\n",
+ " }\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "response = es.search(index=\"elastic-labs\", body=query)\n",
+ "pprint(response.body)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-1vWFv7cCuFu",
+ "outputId": "eaa4aa08-13a1-459a-dae0-9c8c1f0a69fc"
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'exposes '\n",
+ " 'a '\n",
+ " 'list '\n",
+ " 'of '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'values, '\n",
+ " 'one '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'as '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pools '\n",
+ " '(which '\n",
+ " 'handle '\n",
+ " 'indexing '\n",
+ " 'requests) '\n",
+ " 'are '\n",
+ " 'sized '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'CPU '\n",
+ " 'cores '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'node, '\n",
+ " 'this '\n",
+ " 'essentially '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'cores '\n",
+ " 'that '\n",
+ " 'is '\n",
+ " 'needed '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'on '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'consists '\n",
+ " 'of '\n",
+ " 'two '\n",
+ " 'components: '\n",
+ " 'Thread '\n",
+ " 'pool '\n",
+ " 'utilization: '\n",
+ " 'the '\n",
+ " 'average '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'threads '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'processing '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'during '\n",
+ " 'that '\n",
+ " 'sampling '\n",
+ " 'period. '\n",
+ " 'Queued '\n",
+ " 'ingestion '\n",
+ " 'load: '\n",
+ " 'the '\n",
+ " 'estimated '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'threads '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'queued '\n",
+ " 'write '\n",
+ " 'requests. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'is '\n",
+ " 'calculated '\n",
+ " 'as '\n",
+ " 'the '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'these '\n",
+ " 'two '\n",
+ " 'values '\n",
+ " 'for '\n",
+ " 'all '\n",
+ " 'the '\n",
+ " 'three '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pools '\n",
+ " '. '\n",
+ " 'The '\n",
+ " 'total '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'is '\n",
+ " 'the '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'individual '\n",
+ " 'nodes. '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 't '\n",
+ " 'h'},\n",
+ " {'embeddings': {'##est': 1.3433179,\n",
+ " '##estinal': 0.5916747,\n",
+ " '##ical': 0.21335103,\n",
+ " '##ing': 0.66160166,\n",
+ " '##ion': 1.223692,\n",
+ " '##l': 0.06755174,\n",
+ " '##ler': 0.34178317,\n",
+ " '##line': 0.6707441,\n",
+ " '##ling': 1.0343578,\n",
+ " '##load': 0.9880499,\n",
+ " '##mat': 0.01314945,\n",
+ " '##rch': 1.3459072,\n",
+ " '##s': 0.25005433,\n",
+ " '##sca': 1.6867673,\n",
+ " '##scu': 0.028700678,\n",
+ " '##sea': 1.6748068,\n",
+ " '_': 0.28835136,\n",
+ " 'access': 0.116686985,\n",
+ " 'accounting': 0.15865436,\n",
+ " 'algorithm': 1.0487378,\n",
+ " 'algorithms': 0.2763102,\n",
+ " 'allocation': 0.1481772,\n",
+ " 'amazon': 0.9099395,\n",
+ " 'among': 0.04313716,\n",
+ " 'anal': 0.025087006,\n",
+ " 'analysis': 0.64178395,\n",
+ " 'analyze': 0.18673302,\n",
+ " 'and': 0.19101046,\n",
+ " 'apache': 0.6617465,\n",
+ " 'api': 1.4468017,\n",
+ " 'approximate': 0.026616694,\n",
+ " 'are': 0.19081613,\n",
+ " 'arithmetic': 0.12217364,\n",
+ " 'ass': 0.12156314,\n",
+ " 'auto': 1.4633765,\n",
+ " 'automatic': 0.73048806,\n",
+ " 'availability': 0.20461462,\n",
+ " 'average': 0.58710635,\n",
+ " 'bot': 0.12357169,\n",
+ " 'buffer': 0.14556783,\n",
+ " 'calculate': 0.02387442,\n",
+ " 'calculated': 0.2452304,\n",
+ " 'calculation': 0.81089926,\n",
+ " 'called': 0.2972479,\n",
+ " 'capacity': 0.60224617,\n",
+ " 'catalog': 0.078262925,\n",
+ " 'category': 0.21683785,\n",
+ " 'checkpoint': 0.012995078,\n",
+ " 'chess': 0.41694775,\n",
+ " 'chip': 0.10178017,\n",
+ " 'class': 0.5914888,\n",
+ " 'classification': 0.17686933,\n",
+ " 'cluster': 1.4369037,\n",
+ " 'clusters': 0.21254443,\n",
+ " 'comply': 0.131236,\n",
+ " 'component': 0.37191656,\n",
+ " 'components': 0.87235415,\n",
+ " 'computation': 0.47024545,\n",
+ " 'compute': 0.14372817,\n",
+ " 'computer': 0.397558,\n",
+ " 'constant': 0.09540719,\n",
+ " 'consumption': 0.123454005,\n",
+ " 'cope': 0.7024604,\n",
+ " 'core': 0.62535626,\n",
+ " 'cores': 1.0230916,\n",
+ " 'cpu': 0.874175,\n",
+ " 'crawl': 0.23010625,\n",
+ " 'current': 0.5516459,\n",
+ " 'data': 0.25792596,\n",
+ " 'database': 0.4601695,\n",
+ " 'determine': 0.3844099,\n",
+ " 'determined': 0.41348428,\n",
+ " 'diagram': 0.025166756,\n",
+ " 'dimensions': 0.07042265,\n",
+ " 'disk': 0.07931721,\n",
+ " 'each': 0.22229394,\n",
+ " 'elastic': 1.8257822,\n",
+ " 'enter': 0.058845505,\n",
+ " 'equation': 0.43812877,\n",
+ " 'es': 0.8055687,\n",
+ " 'estimate': 0.03608101,\n",
+ " 'estimated': 0.46266982,\n",
+ " 'execution': 0.05638616,\n",
+ " 'factors': 0.12973839,\n",
+ " 'forest': 0.3904727,\n",
+ " 'formula': 0.016075172,\n",
+ " 'framework': 0.34186286,\n",
+ " 'g': 0.08017753,\n",
+ " 'gage': 0.30852094,\n",
+ " 'gene': 0.27250904,\n",
+ " 'handle': 0.9037246,\n",
+ " 'handling': 0.69093794,\n",
+ " 'implement': 0.053764082,\n",
+ " 'index': 1.3896008,\n",
+ " 'indexed': 0.25086805,\n",
+ " 'ing': 1.5002296,\n",
+ " 'integration': 0.20222682,\n",
+ " 'interface': 0.25386703,\n",
+ " 'inventory': 0.5645011,\n",
+ " 'is': 0.05772473,\n",
+ " 'java': 1.2391971,\n",
+ " 'l': 0.048691455,\n",
+ " 'lake': 0.24773102,\n",
+ " 'lane': 0.25919613,\n",
+ " 'lang': 0.039321195,\n",
+ " 'learning': 0.033810128,\n",
+ " 'library': 0.14143226,\n",
+ " 'list': 0.10985089,\n",
+ " 'lists': 0.12752165,\n",
+ " 'load': 1.7350225,\n",
+ " 'loaded': 0.057171866,\n",
+ " 'loading': 0.75305617,\n",
+ " 'loads': 0.12072936,\n",
+ " 'log': 0.06388949,\n",
+ " 'machine': 0.47294563,\n",
+ " 'mass': 0.092697844,\n",
+ " 'math': 0.7472431,\n",
+ " 'matrix': 0.045127213,\n",
+ " 'maximum': 0.094020285,\n",
+ " 'measure': 0.32414404,\n",
+ " 'memories': 0.03024405,\n",
+ " 'memory': 1.2586498,\n",
+ " 'method': 0.016832462,\n",
+ " 'metric': 1.1439759,\n",
+ " 'mining': 0.40203753,\n",
+ " 'mp': 0.09331862,\n",
+ " 'multi': 0.031247457,\n",
+ " 'multiple': 0.38688186,\n",
+ " 'n': 0.33228758,\n",
+ " 'need': 0.19645856,\n",
+ " 'network': 0.42359397,\n",
+ " 'new': 0.041632555,\n",
+ " 'node': 1.3807943,\n",
+ " 'nodes': 0.63807905,\n",
+ " 'number': 0.4450389,\n",
+ " 'o': 0.50335085,\n",
+ " 'operation': 0.008523868,\n",
+ " 'order': 0.08601924,\n",
+ " 'pattern': 0.11067777,\n",
+ " 'percent': 0.13746342,\n",
+ " 'performance': 0.41614294,\n",
+ " 'period': 0.49507552,\n",
+ " 'pool': 1.3188534,\n",
+ " 'poole': 0.3433027,\n",
+ " 'pools': 1.2800426,\n",
+ " 'predict': 0.23377013,\n",
+ " 'processing': 1.0733001,\n",
+ " 'processor': 0.10840816,\n",
+ " 'pure': 0.11351536,\n",
+ " 'quantity': 0.109573685,\n",
+ " 'queue': 1.1129105,\n",
+ " 'ram': 0.14691876,\n",
+ " 'rank': 0.36504152,\n",
+ " 'ratio': 0.011385939,\n",
+ " 'read': 0.13304754,\n",
+ " 'represent': 0.42444453,\n",
+ " 'representation': 0.058323957,\n",
+ " 'request': 0.755568,\n",
+ " 'requests': 0.7039498,\n",
+ " 'routing': 0.060857404,\n",
+ " 'sample': 0.62170815,\n",
+ " 'sampling': 0.8610632,\n",
+ " 'scala': 0.25192302,\n",
+ " 'scale': 0.5968038,\n",
+ " 'sea': 0.20613533,\n",
+ " 'search': 0.4318061,\n",
+ " 'semi': 0.33687106,\n",
+ " 'sequence': 0.23863083,\n",
+ " 'serial': 0.15801017,\n",
+ " 'server': 0.16233677,\n",
+ " 'si': 0.2002626,\n",
+ " 'sid': 0.44975162,\n",
+ " 'size': 0.8577202,\n",
+ " 'sized': 0.21010487,\n",
+ " 'sizes': 0.4059122,\n",
+ " 'small': 0.09116832,\n",
+ " 'software': 0.09232291,\n",
+ " 'sort': 0.35720947,\n",
+ " 'sorting': 0.06234357,\n",
+ " 'spectrum': 0.07792632,\n",
+ " 'sql': 0.116530605,\n",
+ " 'statistical': 0.0852167,\n",
+ " 'statistics': 0.22820702,\n",
+ " 'stomach': 0.018201118,\n",
+ " 'sum': 0.89766365,\n",
+ " 'swarm': 0.20437151,\n",
+ " 'table': 0.007837142,\n",
+ " 'task': 0.37974054,\n",
+ " 'taste': 0.053832427,\n",
+ " 'taylor': 0.10206632,\n",
+ " 'thread': 1.5052487,\n",
+ " 'threads': 1.2515007,\n",
+ " 'three': 0.27322263,\n",
+ " 'total': 0.64918166,\n",
+ " 'tree': 0.098200426,\n",
+ " 'unit': 0.15584692,\n",
+ " 'used': 0.56170344,\n",
+ " 'useful': 0.34977943,\n",
+ " 'utilization': 1.0091052,\n",
+ " 'value': 0.7453479,\n",
+ " 'values': 0.63835937,\n",
+ " 'vector': 0.3917736,\n",
+ " 'weaving': 0.11804886,\n",
+ " 'web': 0.46383187,\n",
+ " 'work': 0.29207155,\n",
+ " 'write': 1.1660185,\n",
+ " 'writing': 0.25973478,\n",
+ " 'z': 0.3776876},\n",
+ " 'text': 'that '\n",
+ " 'are '\n",
+ " 'used '\n",
+ " 'for '\n",
+ " 'ingest '\n",
+ " 'autoscaling '\n",
+ " 'in '\n",
+ " 'Elasticsearch '\n",
+ " 'are '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'memory. '\n",
+ " 'Ingestion '\n",
+ " 'load '\n",
+ " 'Ingestion '\n",
+ " 'load '\n",
+ " 'represents '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'threads '\n",
+ " 'that '\n",
+ " 'is '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'cope '\n",
+ " 'with '\n",
+ " 'the '\n",
+ " 'current '\n",
+ " 'indexing '\n",
+ " 'load. '\n",
+ " 'The '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'exposes '\n",
+ " 'a '\n",
+ " 'list '\n",
+ " 'of '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'values, '\n",
+ " 'one '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'as '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pools '\n",
+ " '(which '\n",
+ " 'handle '\n",
+ " 'indexing '\n",
+ " 'requests) '\n",
+ " 'are '\n",
+ " 'sized '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'CPU '\n",
+ " 'cores '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'node, '\n",
+ " 'this '\n",
+ " 'essentially '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'cores '\n",
+ " 'that '\n",
+ " 'is '\n",
+ " 'needed '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'on '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'consists '\n",
+ " 'of '\n",
+ " 'two '\n",
+ " 'components: '\n",
+ " 'Thread '\n",
+ " 'pool '\n",
+ " 'utilization: '\n",
+ " 'the '\n",
+ " 'average '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'threads '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'processing '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'during '\n",
+ " 'that '\n",
+ " 'sampling '\n",
+ " 'period. '\n",
+ " 'Queued '\n",
+ " 'ingestion '\n",
+ " 'load: '\n",
+ " 'the '\n",
+ " 'estimated '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'threads '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'queued '\n",
+ " 'write '\n",
+ " 'requests. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'is '\n",
+ " 'calculated '\n",
+ " 'as '\n",
+ " 'the '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'these '\n",
+ " 'two '\n",
+ " 'values '\n",
+ " 'for '\n",
+ " 'all '\n",
+ " 'the '\n",
+ " 'three '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pools '\n",
+ " '. '\n",
+ " 'The '\n",
+ " 'total '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'is '\n",
+ " 'the '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'individual '\n",
+ " 'nodes. '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 't '\n",
+ " 'h '\n",
+ " 'r '\n",
+ " 'e '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'p '\n",
+ " 'o '\n",
+ " 'o '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'l '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'a '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '+ '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 't '\n",
+ " 'o '\n",
+ " 't '\n",
+ " 'a '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " '\\\\small '\n",
+ " 'node\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\sum(thread\\\\_pool\\\\_utilization '\n",
+ " '+ '\n",
+ " 'queued\\\\_ingestion\\\\_load) '\n",
+ " '\\\\newline '\n",
+ " 'total\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\sum(node\\\\_ingestion\\\\_load) '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 't '\n",
+ " 'h '\n",
+ " 're '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'p '\n",
+ " 'oo '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'l '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'a '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '+ '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 't '\n",
+ " 'o '\n",
+ " 't '\n",
+ " 'a '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 'Figure '\n",
+ " '2 '\n",
+ " ': '\n",
+ " 'ingestion'},\n",
+ " {'embeddings': {'##able': 0.5624876,\n",
+ " '##ba': 0.10684605,\n",
+ " '##d': 0.12233314,\n",
+ " '##est': 0.84587747,\n",
+ " '##ima': 0.2508807,\n",
+ " '##ing': 0.57414246,\n",
+ " '##ion': 1.1121849,\n",
+ " '##line': 1.1430916,\n",
+ " '##ma': 1.1706055,\n",
+ " '##w': 1.3673741,\n",
+ " '##ws': 0.33763555,\n",
+ " '10': 0.51392806,\n",
+ " '200': 0.73087466,\n",
+ " '30': 0.45019,\n",
+ " '60': 1.3045075,\n",
+ " '[UNK]': 0.2956499,\n",
+ " '_': 0.33742356,\n",
+ " 'acceptable': 0.29635867,\n",
+ " 'access': 0.23300913,\n",
+ " 'accounting': 0.1906402,\n",
+ " 'achieve': 0.19722655,\n",
+ " 'algorithm': 1.1037958,\n",
+ " 'algorithms': 0.26360378,\n",
+ " 'allocation': 0.53156596,\n",
+ " 'analysis': 0.41347402,\n",
+ " 'apache': 0.54295164,\n",
+ " 'api': 0.21713388,\n",
+ " 'approximate': 0.51163644,\n",
+ " 'arithmetic': 0.005784557,\n",
+ " 'availability': 0.4917338,\n",
+ " 'average': 0.8478212,\n",
+ " 'batch': 0.08666975,\n",
+ " 'blocking': 0.02501016,\n",
+ " 'bot': 0.06050198,\n",
+ " 'buffer': 0.40386045,\n",
+ " 'bug': 0.055751722,\n",
+ " 'busy': 1.3026394,\n",
+ " 'calculate': 0.26999432,\n",
+ " 'calculation': 0.74316484,\n",
+ " 'capacity': 0.6725085,\n",
+ " 'chess': 0.25134456,\n",
+ " 'class': 0.328252,\n",
+ " 'client': 0.23896244,\n",
+ " 'clock': 1.125488,\n",
+ " 'cluster': 0.5103067,\n",
+ " 'component': 0.2536751,\n",
+ " 'components': 0.78435194,\n",
+ " 'computation': 0.62016183,\n",
+ " 'compute': 0.06482519,\n",
+ " 'computer': 0.32330835,\n",
+ " 'concurrency': 0.011380989,\n",
+ " 'configuration': 0.6887391,\n",
+ " 'configured': 0.26263618,\n",
+ " 'constant': 0.29082793,\n",
+ " 'consumption': 0.16989039,\n",
+ " 'cpu': 0.3717718,\n",
+ " 'database': 0.13461274,\n",
+ " 'e': 0.7789312,\n",
+ " 'effect': 0.09419204,\n",
+ " 'effort': 0.055172946,\n",
+ " 'employee': 0.3274528,\n",
+ " 'employees': 0.14320064,\n",
+ " 'ensemble': 0.19942468,\n",
+ " 'equation': 0.3787911,\n",
+ " 'equivalent': 0.050270963,\n",
+ " 'error': 0.12898737,\n",
+ " 'es': 0.043630168,\n",
+ " 'est': 0.20599021,\n",
+ " 'estimate': 1.0792123,\n",
+ " 'estimated': 0.39457676,\n",
+ " 'estimates': 0.465428,\n",
+ " 'estimation': 0.080784135,\n",
+ " 'every': 0.16873945,\n",
+ " 'excess': 1.0022457,\n",
+ " 'excessive': 0.451759,\n",
+ " 'execute': 0.59175754,\n",
+ " 'executing': 0.091966435,\n",
+ " 'execution': 1.3065349,\n",
+ " 'existing': 0.6437884,\n",
+ " 'exponential': 1.1467187,\n",
+ " 'extra': 0.26056916,\n",
+ " 'figure': 0.019528389,\n",
+ " 'finish': 0.012790194,\n",
+ " 'finished': 0.21236378,\n",
+ " 'flow': 0.10995065,\n",
+ " 'g': 0.43504617,\n",
+ " 'gage': 0.4229588,\n",
+ " 'group': 0.43960038,\n",
+ " 'guild': 0.014967873,\n",
+ " 'handle': 0.80899215,\n",
+ " 'handling': 0.7681083,\n",
+ " 'heap': 0.3867438,\n",
+ " 'hours': 0.7462872,\n",
+ " 'http': 0.20072725,\n",
+ " 'implement': 0.16245411,\n",
+ " 'implementation': 0.2408709,\n",
+ " 'improve': 0.10136651,\n",
+ " 'index': 1.2976965,\n",
+ " 'indexed': 0.10614389,\n",
+ " 'ing': 1.2063053,\n",
+ " 'inventory': 0.25356865,\n",
+ " 'java': 1.2153534,\n",
+ " 'l': 0.48968774,\n",
+ " 'lake': 0.27167574,\n",
+ " 'lane': 0.54473066,\n",
+ " 'length': 0.64622724,\n",
+ " 'library': 0.08392323,\n",
+ " 'line': 0.5581907,\n",
+ " 'load': 1.5088638,\n",
+ " 'loading': 0.5335804,\n",
+ " 'machine': 0.3173762,\n",
+ " 'manage': 0.5220977,\n",
+ " 'managed': 0.45824686,\n",
+ " 'management': 0.3230387,\n",
+ " 'mass': 0.15742503,\n",
+ " 'math': 0.81244004,\n",
+ " 'maximum': 0.34374076,\n",
+ " 'measure': 0.25600985,\n",
+ " 'memory': 0.5085309,\n",
+ " 'mining': 0.4451848,\n",
+ " 'minute': 0.39483455,\n",
+ " 'minutes': 0.22895378,\n",
+ " 'moving': 0.76410496,\n",
+ " 'mp': 0.046217,\n",
+ " 'multiple': 0.10666605,\n",
+ " 'n': 0.5416694,\n",
+ " 'network': 0.3097243,\n",
+ " 'new': 0.49582836,\n",
+ " 'node': 1.1907045,\n",
+ " 'number': 0.47905272,\n",
+ " 'o': 0.47123736,\n",
+ " 'operation': 0.19577809,\n",
+ " 'optimal': 0.1733028,\n",
+ " 'par': 0.09612937,\n",
+ " 'percent': 0.1152151,\n",
+ " 'performance': 0.74001515,\n",
+ " 'pool': 1.7006081,\n",
+ " 'poole': 0.36192703,\n",
+ " 'pools': 1.0764378,\n",
+ " 'predict': 0.38117534,\n",
+ " 'probe': 0.2430691,\n",
+ " 'process': 0.12230635,\n",
+ " 'processing': 0.47061718,\n",
+ " 'proportion': 0.2145018,\n",
+ " 'proportional': 1.1204233,\n",
+ " 'proposal': 0.1401456,\n",
+ " 'q': 0.3259466,\n",
+ " 'queue': 1.580318,\n",
+ " 'r': 0.14266703,\n",
+ " 'rank': 0.13613336,\n",
+ " 'rate': 0.39469108,\n",
+ " 'request': 1.1001134,\n",
+ " 'requests': 0.63539153,\n",
+ " 'resolution': 0.055606272,\n",
+ " 'resource': 0.21417612,\n",
+ " 'resources': 0.7937882,\n",
+ " 'routing': 0.14261606,\n",
+ " 'sample': 1.0720835,\n",
+ " 'sampled': 1.0306277,\n",
+ " 'samples': 1.2079935,\n",
+ " 'sampling': 0.6740413,\n",
+ " 'scala': 0.07395835,\n",
+ " 'script': 0.10171158,\n",
+ " 'second': 0.18827602,\n",
+ " 'seconds': 0.817573,\n",
+ " 'sequence': 0.49634397,\n",
+ " 'serial': 0.033651996,\n",
+ " 'server': 0.32002103,\n",
+ " 'share': 0.27626935,\n",
+ " 'sid': 0.27850676,\n",
+ " 'size': 0.11843514,\n",
+ " 'small': 0.75451213,\n",
+ " 'speed': 0.30091006,\n",
+ " 'sql': 0.31397846,\n",
+ " 'statistical': 0.0100006005,\n",
+ " 'strategy': 0.08963276,\n",
+ " 'stream': 0.028335843,\n",
+ " 'sum': 1.1407199,\n",
+ " 'surplus': 0.15598625,\n",
+ " 'swarm': 0.054142684,\n",
+ " 'task': 1.2177191,\n",
+ " 'tasks': 1.0780356,\n",
+ " 'taylor': 0.24217507,\n",
+ " 'technique': 0.0030198945,\n",
+ " 'thread': 1.7842301,\n",
+ " 'threads': 0.9916815,\n",
+ " 'time': 0.9839317,\n",
+ " 'timer': 0.19039534,\n",
+ " 'times': 0.5299459,\n",
+ " 'total': 0.40682667,\n",
+ " 'traffic': 0.28910428,\n",
+ " 'universe': 0.013594781,\n",
+ " 'usage': 0.5520448,\n",
+ " 'utilization': 1.6104044,\n",
+ " 'value': 0.6036144,\n",
+ " 'values': 0.33944046,\n",
+ " 'w': 0.4972394,\n",
+ " 'wait': 0.005872378,\n",
+ " 'wall': 1.1351137,\n",
+ " 'weaving': 0.13777943,\n",
+ " 'web': 0.2821159,\n",
+ " 'weighted': 1.1533256,\n",
+ " 'worker': 1.0417976,\n",
+ " 'workers': 1.2245823,\n",
+ " 'z': 0.29032487},\n",
+ " 'text': 'r '\n",
+ " 'e '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'p '\n",
+ " 'o '\n",
+ " 'o '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'l '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'a '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '+ '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 't '\n",
+ " 'o '\n",
+ " 't '\n",
+ " 'a '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " '\\\\small '\n",
+ " 'node\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\sum(thread\\\\_pool\\\\_utilization '\n",
+ " '+ '\n",
+ " 'queued\\\\_ingestion\\\\_load) '\n",
+ " '\\\\newline '\n",
+ " 'total\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\sum(node\\\\_ingestion\\\\_load) '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 't '\n",
+ " 'h '\n",
+ " 're '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'p '\n",
+ " 'oo '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'l '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'a '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '+ '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 't '\n",
+ " 'o '\n",
+ " 't '\n",
+ " 'a '\n",
+ " 'l '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '∑ '\n",
+ " '( '\n",
+ " 'n '\n",
+ " 'o '\n",
+ " 'd '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " ') '\n",
+ " 'Figure '\n",
+ " '2 '\n",
+ " ': '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'components '\n",
+ " 'The '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'is '\n",
+ " 'an '\n",
+ " 'exponentially '\n",
+ " 'weighted '\n",
+ " 'moving '\n",
+ " 'average '\n",
+ " '(EWMA) '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'busy '\n",
+ " 'threads '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'sampled '\n",
+ " 'every '\n",
+ " 'second. '\n",
+ " 'The '\n",
+ " 'EWMA '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'sampled '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'values '\n",
+ " 'is '\n",
+ " 'configured '\n",
+ " 'such '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'sampled '\n",
+ " 'values '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'past '\n",
+ " '10 '\n",
+ " 'seconds '\n",
+ " 'have '\n",
+ " 'the '\n",
+ " 'most '\n",
+ " 'effect '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'component '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'samples '\n",
+ " 'older '\n",
+ " 'than '\n",
+ " '60 '\n",
+ " 'seconds '\n",
+ " 'have '\n",
+ " 'very '\n",
+ " 'negligible '\n",
+ " 'impact. '\n",
+ " 'To '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'required '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'queued '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'we '\n",
+ " 'need '\n",
+ " 'to '\n",
+ " 'have '\n",
+ " 'an '\n",
+ " 'estimate '\n",
+ " 'for '\n",
+ " 'how '\n",
+ " 'long '\n",
+ " 'each '\n",
+ " 'queued '\n",
+ " 'task '\n",
+ " 'can '\n",
+ " 'take '\n",
+ " 'to '\n",
+ " 'execute. '\n",
+ " 'To '\n",
+ " 'achieve '\n",
+ " 'this, '\n",
+ " 'each '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'also '\n",
+ " 'provides '\n",
+ " 'an '\n",
+ " 'EWMA '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'request '\n",
+ " 'execution '\n",
+ " 'time. '\n",
+ " 'The '\n",
+ " 'request '\n",
+ " 'execution '\n",
+ " 'time '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'indexing '\n",
+ " 'request '\n",
+ " 'is '\n",
+ " 'the '\n",
+ " '(wall-clock) '\n",
+ " 'time '\n",
+ " 'taken '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'request '\n",
+ " 'to '\n",
+ " 'finish '\n",
+ " 'once '\n",
+ " 'it '\n",
+ " 'is '\n",
+ " 'out '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'queue '\n",
+ " 'and '\n",
+ " 'a '\n",
+ " 'worker '\n",
+ " 'thread '\n",
+ " 'starts '\n",
+ " 'executing '\n",
+ " 'it. '\n",
+ " 'As '\n",
+ " 'some '\n",
+ " 'queueing '\n",
+ " 'is '\n",
+ " 'acceptable '\n",
+ " 'and '\n",
+ " 'should '\n",
+ " 'be '\n",
+ " 'manageable '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'we '\n",
+ " 'try '\n",
+ " 'to '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'excess '\n",
+ " 'queueing. '\n",
+ " 'We '\n",
+ " 'consider '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " '30s '\n",
+ " 'worth '\n",
+ " 'of '\n",
+ " 'tasks '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'queue '\n",
+ " 'manageable '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'existing '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'workers '\n",
+ " 'and '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'extra '\n",
+ " 'thread '\n",
+ " 'proportional '\n",
+ " 'to '\n",
+ " 'this '\n",
+ " 'value. '\n",
+ " 'For '\n",
+ " 'example, '\n",
+ " 'if '\n",
+ " 'the '\n",
+ " 'average '\n",
+ " 'task '\n",
+ " 'execution '\n",
+ " 'time '\n",
+ " 'is '\n",
+ " '200ms, '\n",
+ " 'we '\n",
+ " 'estimate '\n",
+ " 'that'},\n",
+ " {'embeddings': {'##d': 0.06352329,\n",
+ " '##est': 0.89852107,\n",
+ " '##estinal': 0.13183321,\n",
+ " '##ima': 0.40056115,\n",
+ " '##ing': 0.61320734,\n",
+ " '##ion': 0.72260284,\n",
+ " '##ling': 0.8949169,\n",
+ " '##load': 0.57369965,\n",
+ " '##m': 0.23721623,\n",
+ " '##ma': 1.4438714,\n",
+ " '##mas': 0.24820994,\n",
+ " '##mat': 0.24343531,\n",
+ " '##sca': 0.92204034,\n",
+ " '##w': 1.6598973,\n",
+ " '##ws': 0.6782139,\n",
+ " '10': 0.7749067,\n",
+ " '150': 1.2471286,\n",
+ " '200': 0.58304185,\n",
+ " '30': 1.076181,\n",
+ " '60': 1.1588365,\n",
+ " '_': 0.17651597,\n",
+ " 'acceptable': 0.0395143,\n",
+ " 'access': 0.05357292,\n",
+ " 'accounting': 0.22549874,\n",
+ " 'achieve': 0.040418815,\n",
+ " 'algorithm': 0.9928478,\n",
+ " 'algorithms': 0.08838318,\n",
+ " 'allocation': 0.7647576,\n",
+ " 'analysis': 0.428812,\n",
+ " 'apache': 0.5859765,\n",
+ " 'api': 0.016843364,\n",
+ " 'approximate': 0.21684457,\n",
+ " 'arithmetic': 0.053462975,\n",
+ " 'array': 0.066098064,\n",
+ " 'auto': 0.53497416,\n",
+ " 'automatic': 0.20355695,\n",
+ " 'availability': 0.6690054,\n",
+ " 'average': 1.0341543,\n",
+ " 'blocking': 0.1431715,\n",
+ " 'buffer': 0.46087772,\n",
+ " 'bug': 0.23163809,\n",
+ " 'busy': 1.3082193,\n",
+ " 'calculate': 0.2015065,\n",
+ " 'calculation': 0.71491575,\n",
+ " 'capacity': 0.8027149,\n",
+ " 'checkpoint': 0.10162155,\n",
+ " 'chess': 0.26765594,\n",
+ " 'class': 0.5377411,\n",
+ " 'client': 0.028412435,\n",
+ " 'clock': 0.81897706,\n",
+ " 'cluster': 0.6336233,\n",
+ " 'component': 1.2550238,\n",
+ " 'components': 1.4753778,\n",
+ " 'computation': 0.5360401,\n",
+ " 'compute': 0.09496682,\n",
+ " 'computer': 0.48583803,\n",
+ " 'computers': 0.082595915,\n",
+ " 'computing': 0.0053236387,\n",
+ " 'concept': 0.09244595,\n",
+ " 'concurrency': 0.080570355,\n",
+ " 'configuration': 0.63552403,\n",
+ " 'configured': 0.49945095,\n",
+ " 'constant': 0.15874276,\n",
+ " 'consumption': 0.3705247,\n",
+ " 'count': 0.15291668,\n",
+ " 'cpu': 0.4727478,\n",
+ " 'data': 0.5534523,\n",
+ " 'database': 0.24513115,\n",
+ " 'definition': 0.25252765,\n",
+ " 'dew': 0.027248075,\n",
+ " 'disadvantage': 0.043538865,\n",
+ " 'disk': 1.0258542,\n",
+ " 'during': 0.024176076,\n",
+ " 'e': 1.3067937,\n",
+ " 'each': 0.01788934,\n",
+ " 'ec': 0.5695534,\n",
+ " 'ee': 0.08090695,\n",
+ " 'effect': 0.33151782,\n",
+ " 'employee': 0.14918438,\n",
+ " 'employees': 0.026578736,\n",
+ " 'equation': 0.42684066,\n",
+ " 'es': 0.18498634,\n",
+ " 'est': 0.098570675,\n",
+ " 'estimate': 0.83097947,\n",
+ " 'estimated': 0.19130428,\n",
+ " 'estimates': 0.04933924,\n",
+ " 'every': 0.384432,\n",
+ " 'excess': 0.44124436,\n",
+ " 'execute': 0.56965685,\n",
+ " 'execution': 1.092663,\n",
+ " 'exponential': 1.2772857,\n",
+ " 'extra': 0.3341091,\n",
+ " 'finish': 0.47172138,\n",
+ " 'finished': 0.5516902,\n",
+ " 'flow': 0.1065439,\n",
+ " 'fra': 0.5131407,\n",
+ " 'gage': 0.41627494,\n",
+ " 'group': 0.40121686,\n",
+ " 'handle': 0.76723486,\n",
+ " 'handling': 0.8265911,\n",
+ " 'hardware': 0.007931168,\n",
+ " 'heap': 0.055197764,\n",
+ " 'hours': 0.5783272,\n",
+ " 'http': 0.16334121,\n",
+ " 'implement': 0.20851848,\n",
+ " 'improve': 0.033503063,\n",
+ " 'index': 1.351592,\n",
+ " 'indexed': 1.2516088,\n",
+ " 'ing': 1.2539797,\n",
+ " 'inventory': 0.26884475,\n",
+ " 'io': 0.49151403,\n",
+ " 'is': 0.67021686,\n",
+ " 'items': 0.30828458,\n",
+ " 'java': 1.233984,\n",
+ " 'lake': 0.37700737,\n",
+ " 'lane': 0.35798323,\n",
+ " 'lang': 0.11334816,\n",
+ " 'length': 0.39039937,\n",
+ " 'library': 0.0020271246,\n",
+ " 'load': 1.839116,\n",
+ " 'loading': 0.52925104,\n",
+ " 'log': 0.026120221,\n",
+ " 'ma': 0.37466413,\n",
+ " 'machine': 0.41295668,\n",
+ " 'managed': 0.016499385,\n",
+ " 'management': 0.24261811,\n",
+ " 'many': 0.0001822544,\n",
+ " 'map': 0.16712263,\n",
+ " 'mat': 0.08338378,\n",
+ " 'math': 0.69625205,\n",
+ " 'maximum': 0.34880605,\n",
+ " 'mb': 0.37918818,\n",
+ " 'measure': 0.14309268,\n",
+ " 'memory': 0.58699423,\n",
+ " 'metric': 0.113157846,\n",
+ " 'mill': 0.087879546,\n",
+ " 'minimum': 0.042228475,\n",
+ " 'mining': 0.31173173,\n",
+ " 'minute': 0.2855463,\n",
+ " 'minutes': 0.037687548,\n",
+ " 'mm': 0.04705554,\n",
+ " 'move': 0.24638273,\n",
+ " 'moving': 1.068798,\n",
+ " 'mp': 0.339956,\n",
+ " 'mt': 0.18115476,\n",
+ " 'multi': 0.045562405,\n",
+ " 'multiple': 0.2256053,\n",
+ " 'n': 0.20722932,\n",
+ " 'network': 0.2870649,\n",
+ " 'node': 0.74391615,\n",
+ " 'nodes': 0.40956134,\n",
+ " 'number': 0.5414315,\n",
+ " 'object': 0.36274558,\n",
+ " 'old': 0.026420968,\n",
+ " 'older': 0.14505674,\n",
+ " 'operation': 0.137978,\n",
+ " 'optimal': 0.03703803,\n",
+ " 'par': 0.0058114612,\n",
+ " 'parts': 0.011510156,\n",
+ " 'past': 0.25731233,\n",
+ " 'percent': 0.35817072,\n",
+ " 'performance': 0.801656,\n",
+ " 'pool': 1.8708751,\n",
+ " 'poole': 0.2727913,\n",
+ " 'pools': 1.2964886,\n",
+ " 'population': 0.11810607,\n",
+ " 'predict': 0.18177378,\n",
+ " 'probe': 0.21369988,\n",
+ " 'processing': 0.4105097,\n",
+ " 'proportional': 0.6098035,\n",
+ " 'q': 0.13568267,\n",
+ " 'queue': 1.2824515,\n",
+ " 'rank': 0.40675223,\n",
+ " 'rate': 0.46714726,\n",
+ " 'request': 0.949167,\n",
+ " 'requests': 0.6644938,\n",
+ " 'requirements': 0.3288823,\n",
+ " 'resource': 0.4609863,\n",
+ " 'resources': 0.9455237,\n",
+ " 'routing': 0.18650433,\n",
+ " 'sample': 1.0472832,\n",
+ " 'sampled': 0.8309003,\n",
+ " 'samples': 1.1415888,\n",
+ " 'sampling': 0.45636305,\n",
+ " 'scala': 0.12271185,\n",
+ " 'scale': 0.3144392,\n",
+ " 'second': 0.49777645,\n",
+ " 'seconds': 0.7695267,\n",
+ " 'sequence': 0.21608938,\n",
+ " 'serial': 0.049026124,\n",
+ " 'server': 0.37191278,\n",
+ " 'share': 0.19251333,\n",
+ " 'si': 0.020900367,\n",
+ " 'sid': 0.41317028,\n",
+ " 'size': 0.7470095,\n",
+ " 'sizes': 0.060290556,\n",
+ " 'small': 0.015217632,\n",
+ " 'speed': 0.21846266,\n",
+ " 'sql': 0.39542097,\n",
+ " 'stack': 0.047259662,\n",
+ " 'start': 0.15702806,\n",
+ " 'statistical': 0.031916108,\n",
+ " 'statistics': 0.08593676,\n",
+ " 'storage': 0.034532573,\n",
+ " 'store': 0.053150244,\n",
+ " 'survey': 0.1747176,\n",
+ " 'system': 0.08567025,\n",
+ " 'table': 0.006464522,\n",
+ " 'task': 1.1504556,\n",
+ " 'tasks': 0.7951614,\n",
+ " 'taylor': 0.14394312,\n",
+ " 'term': 0.63525033,\n",
+ " 'thirty': 0.26077473,\n",
+ " 'thread': 2.0543768,\n",
+ " 'threads': 1.1089593,\n",
+ " 'tier': 1.207179,\n",
+ " 'time': 0.68932414,\n",
+ " 'timer': 0.14907645,\n",
+ " 'times': 0.32087305,\n",
+ " 'total': 0.22359692,\n",
+ " 'traffic': 0.26179498,\n",
+ " 'trial': 0.2198535,\n",
+ " 'u': 0.064360306,\n",
+ " 'unit': 0.13278264,\n",
+ " 'usage': 0.6241088,\n",
+ " 'utilization': 1.6971744,\n",
+ " 'value': 0.66488856,\n",
+ " 'values': 0.2064584,\n",
+ " 'w': 0.81893605,\n",
+ " 'wait': 0.103130125,\n",
+ " 'wall': 1.0635448,\n",
+ " 'weaving': 0.07162173,\n",
+ " 'web': 0.23646998,\n",
+ " 'weight': 0.030211551,\n",
+ " 'weighted': 1.2184887,\n",
+ " 'work': 0.23164386,\n",
+ " 'worker': 0.7420831,\n",
+ " 'workers': 1.0619413,\n",
+ " 'ze': 0.40276462},\n",
+ " 'text': 'load '\n",
+ " 'components '\n",
+ " 'The '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'is '\n",
+ " 'an '\n",
+ " 'exponentially '\n",
+ " 'weighted '\n",
+ " 'moving '\n",
+ " 'average '\n",
+ " '(EWMA) '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'busy '\n",
+ " 'threads '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'sampled '\n",
+ " 'every '\n",
+ " 'second. '\n",
+ " 'The '\n",
+ " 'EWMA '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'sampled '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'values '\n",
+ " 'is '\n",
+ " 'configured '\n",
+ " 'such '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'sampled '\n",
+ " 'values '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'past '\n",
+ " '10 '\n",
+ " 'seconds '\n",
+ " 'have '\n",
+ " 'the '\n",
+ " 'most '\n",
+ " 'effect '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'utilization '\n",
+ " 'component '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'samples '\n",
+ " 'older '\n",
+ " 'than '\n",
+ " '60 '\n",
+ " 'seconds '\n",
+ " 'have '\n",
+ " 'very '\n",
+ " 'negligible '\n",
+ " 'impact. '\n",
+ " 'To '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'required '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'queued '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'we '\n",
+ " 'need '\n",
+ " 'to '\n",
+ " 'have '\n",
+ " 'an '\n",
+ " 'estimate '\n",
+ " 'for '\n",
+ " 'how '\n",
+ " 'long '\n",
+ " 'each '\n",
+ " 'queued '\n",
+ " 'task '\n",
+ " 'can '\n",
+ " 'take '\n",
+ " 'to '\n",
+ " 'execute. '\n",
+ " 'To '\n",
+ " 'achieve '\n",
+ " 'this, '\n",
+ " 'each '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'also '\n",
+ " 'provides '\n",
+ " 'an '\n",
+ " 'EWMA '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'request '\n",
+ " 'execution '\n",
+ " 'time. '\n",
+ " 'The '\n",
+ " 'request '\n",
+ " 'execution '\n",
+ " 'time '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'indexing '\n",
+ " 'request '\n",
+ " 'is '\n",
+ " 'the '\n",
+ " '(wall-clock) '\n",
+ " 'time '\n",
+ " 'taken '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'request '\n",
+ " 'to '\n",
+ " 'finish '\n",
+ " 'once '\n",
+ " 'it '\n",
+ " 'is '\n",
+ " 'out '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'queue '\n",
+ " 'and '\n",
+ " 'a '\n",
+ " 'worker '\n",
+ " 'thread '\n",
+ " 'starts '\n",
+ " 'executing '\n",
+ " 'it. '\n",
+ " 'As '\n",
+ " 'some '\n",
+ " 'queueing '\n",
+ " 'is '\n",
+ " 'acceptable '\n",
+ " 'and '\n",
+ " 'should '\n",
+ " 'be '\n",
+ " 'manageable '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'thread '\n",
+ " 'pool, '\n",
+ " 'we '\n",
+ " 'try '\n",
+ " 'to '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'excess '\n",
+ " 'queueing. '\n",
+ " 'We '\n",
+ " 'consider '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " '30s '\n",
+ " 'worth '\n",
+ " 'of '\n",
+ " 'tasks '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'queue '\n",
+ " 'manageable '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'existing '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'workers '\n",
+ " 'and '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'extra '\n",
+ " 'thread '\n",
+ " 'proportional '\n",
+ " 'to '\n",
+ " 'this '\n",
+ " 'value. '\n",
+ " 'For '\n",
+ " 'example, '\n",
+ " 'if '\n",
+ " 'the '\n",
+ " 'average '\n",
+ " 'task '\n",
+ " 'execution '\n",
+ " 'time '\n",
+ " 'is '\n",
+ " '200ms, '\n",
+ " 'we '\n",
+ " 'estimate '\n",
+ " 'that '\n",
+ " 'each '\n",
+ " 'thread '\n",
+ " 'is '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " '150 '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'within '\n",
+ " '30s, '\n",
+ " 'and '\n",
+ " 'therefore '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'one '\n",
+ " 'extra '\n",
+ " 'thread '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " '150 '\n",
+ " 'queued '\n",
+ " 'items. '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 's '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'e '\n",
+ " '× '\n",
+ " 'a '\n",
+ " 'v '\n",
+ " 'e '\n",
+ " 'r '\n",
+ " 'a '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'r '\n",
+ " 'e '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " '_ '\n",
+ " 'e '\n",
+ " 'x '\n",
+ " 'e '\n",
+ " 'c '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'm '\n",
+ " 'e '\n",
+ " '30 '\n",
+ " 's '\n",
+ " '\\\\small '\n",
+ " 'queued\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\frac{queue\\\\_size '\n",
+ " '\\\\times '\n",
+ " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '30 '\n",
+ " 's '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 's '\n",
+ " 'i '\n",
+ " 'ze '\n",
+ " '× '\n",
+ " 'a '\n",
+ " 'v '\n",
+ " 'er '\n",
+ " 'a '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 're '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'es '\n",
+ " 't '\n",
+ " '_ '\n",
+ " 'e '\n",
+ " 'x '\n",
+ " 'ec '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 't '\n",
+ " 'im '\n",
+ " 'e '\n",
+ " '\\u200b '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'since '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'nodes '\n",
+ " 'rely '\n",
+ " 'on '\n",
+ " 'pushing '\n",
+ " 'indexed '\n",
+ " 'data '\n",
+ " 'into '\n",
+ " 'the '\n",
+ " 'object '\n",
+ " 'store '\n",
+ " 'periodically, '\n",
+ " 'we '\n",
+ " 'do '\n",
+ " 'not '\n",
+ " 'need '\n",
+ " 'to '\n",
+ " 'scale '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'size '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexed '\n",
+ " 'data. '\n",
+ " 'However, '\n",
+ " 'the '\n",
+ " 'disk '\n",
+ " 'IO '\n",
+ " 'requirements '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload '\n",
+ " 'needs '\n",
+ " 'to '\n",
+ " 'be '\n",
+ " 'considered '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'decisions. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'represents'},\n",
+ " {'embeddings': {'##d': 0.38506436,\n",
+ " '##est': 0.8363302,\n",
+ " '##frame': 0.039107077,\n",
+ " '##ing': 1.0441189,\n",
+ " '##ion': 1.1721121,\n",
+ " '##ler': 1.0595164,\n",
+ " '##ling': 0.99718106,\n",
+ " '##load': 0.8622203,\n",
+ " '##s': 0.26257822,\n",
+ " '##sca': 1.4883617,\n",
+ " '(': 0.04112861,\n",
+ " '120': 0.10787471,\n",
+ " '150': 1.5649581,\n",
+ " '200': 0.78864884,\n",
+ " '30': 1.3745978,\n",
+ " '300': 0.21148267,\n",
+ " '50': 0.031711366,\n",
+ " '500': 0.8493792,\n",
+ " '_': 0.24777141,\n",
+ " 'accounting': 0.64968836,\n",
+ " 'additional': 0.3232339,\n",
+ " 'algorithm': 1.0360106,\n",
+ " 'algorithms': 0.20798434,\n",
+ " 'analysis': 0.25909927,\n",
+ " 'analyze': 0.18533573,\n",
+ " 'apache': 0.8096589,\n",
+ " 'api': 1.3224775,\n",
+ " 'approximate': 0.0154337585,\n",
+ " 'array': 0.23401959,\n",
+ " 'auto': 1.4535567,\n",
+ " 'automatic': 0.7868701,\n",
+ " 'availability': 0.21982048,\n",
+ " 'available': 0.030020691,\n",
+ " 'average': 0.098859586,\n",
+ " 'basic': 0.2743477,\n",
+ " 'blocking': 0.10501332,\n",
+ " 'bot': 0.07765888,\n",
+ " 'buffer': 0.36042303,\n",
+ " 'calculate': 0.21506485,\n",
+ " 'calculation': 0.81758976,\n",
+ " 'capacity': 0.58354694,\n",
+ " 'cassandra': 0.22208737,\n",
+ " 'checkpoint': 0.031537656,\n",
+ " 'chess': 0.6237735,\n",
+ " 'class': 0.439471,\n",
+ " 'clock': 0.54654706,\n",
+ " 'cluster': 1.4933486,\n",
+ " 'cod': 0.12783043,\n",
+ " 'computation': 0.39954206,\n",
+ " 'compute': 0.042445127,\n",
+ " 'computer': 0.13797997,\n",
+ " 'constant': 0.2067099,\n",
+ " 'cpu': 0.5182024,\n",
+ " 'crawl': 0.22104222,\n",
+ " 'data': 0.51176333,\n",
+ " 'database': 0.440294,\n",
+ " 'determined': 0.23795621,\n",
+ " 'disk': 0.5893501,\n",
+ " 'e': 0.05990428,\n",
+ " 'each': 0.46478215,\n",
+ " 'equation': 0.008288982,\n",
+ " 'er': 0.43452957,\n",
+ " 'es': 0.14311427,\n",
+ " 'estimate': 0.25439763,\n",
+ " 'every': 0.1305604,\n",
+ " 'execution': 0.7186893,\n",
+ " 'exposed': 0.23602542,\n",
+ " 'extra': 0.7385199,\n",
+ " 'fixed': 0.11877214,\n",
+ " 'forum': 0.3137529,\n",
+ " 'fra': 1.0726693,\n",
+ " 'fragment': 0.030604606,\n",
+ " 'g': 0.026902322,\n",
+ " 'gage': 0.12548852,\n",
+ " 'guild': 0.27722847,\n",
+ " 'handle': 0.8976072,\n",
+ " 'handling': 0.69513077,\n",
+ " 'heap': 0.26846212,\n",
+ " 'hours': 0.7121461,\n",
+ " 'http': 0.10318518,\n",
+ " 'index': 1.6740144,\n",
+ " 'indexed': 1.1180266,\n",
+ " 'indices': 0.88624585,\n",
+ " 'ing': 1.10228,\n",
+ " 'integer': 0.2208937,\n",
+ " 'inventory': 0.44952998,\n",
+ " 'io': 0.85926545,\n",
+ " 'item': 0.48019466,\n",
+ " 'items': 0.7935411,\n",
+ " 'java': 1.237859,\n",
+ " 'lane': 0.39564016,\n",
+ " 'length': 0.47680393,\n",
+ " 'limit': 0.4967848,\n",
+ " 'load': 1.2765044,\n",
+ " 'loading': 0.25379905,\n",
+ " 'm': 0.06343312,\n",
+ " 'machine': 0.19301167,\n",
+ " 'maintenance': 0.23043938,\n",
+ " 'map': 0.07359305,\n",
+ " 'mass': 0.08436136,\n",
+ " 'master': 1.1724675,\n",
+ " 'matching': 0.044185776,\n",
+ " 'math': 0.71257645,\n",
+ " 'max': 0.16343911,\n",
+ " 'maximum': 0.8216195,\n",
+ " 'mb': 0.74474645,\n",
+ " 'measure': 0.22327076,\n",
+ " 'memory': 1.4785702,\n",
+ " 'metadata': 0.8341058,\n",
+ " 'metric': 0.9043063,\n",
+ " 'minimal': 0.36312523,\n",
+ " 'minimum': 1.0762551,\n",
+ " 'mining': 0.6374103,\n",
+ " 'mp': 0.18194582,\n",
+ " 'multi': 0.19790418,\n",
+ " 'multiple': 0.08082614,\n",
+ " 'n': 0.2315838,\n",
+ " 'network': 0.5508067,\n",
+ " 'node': 1.3963627,\n",
+ " 'nodes': 0.73737425,\n",
+ " 'number': 0.082121976,\n",
+ " 'o': 0.11493757,\n",
+ " 'object': 0.5812754,\n",
+ " 'par': 0.023205614,\n",
+ " 'per': 0.23101303,\n",
+ " 'performance': 0.23446344,\n",
+ " 'pool': 0.8049336,\n",
+ " 'pools': 0.15594147,\n",
+ " 'predict': 0.024841096,\n",
+ " 'processing': 0.36487442,\n",
+ " 'pushing': 0.20726342,\n",
+ " 'q': 0.8291657,\n",
+ " 'quarterly': 0.13623458,\n",
+ " 'queue': 1.481917,\n",
+ " 'rail': 0.078313634,\n",
+ " 'ram': 0.28152135,\n",
+ " 'rank': 0.3435108,\n",
+ " 'ratio': 0.06241234,\n",
+ " 're': 0.2784615,\n",
+ " 'regional': 0.34884617,\n",
+ " 'request': 0.99899644,\n",
+ " 'requests': 0.99197084,\n",
+ " 'requirement': 0.62241584,\n",
+ " 'requirements': 0.674187,\n",
+ " 'resolution': 0.02591185,\n",
+ " 'routing': 0.19566713,\n",
+ " 'scala': 0.17918167,\n",
+ " 'scale': 0.15746343,\n",
+ " 'seconds': 0.13917202,\n",
+ " 'semi': 0.23686175,\n",
+ " 'sequence': 0.5461212,\n",
+ " 'ser': 0.08773902,\n",
+ " 'serial': 0.29184434,\n",
+ " 'server': 0.5091232,\n",
+ " 'shards': 1.1462573,\n",
+ " 'sid': 0.5460215,\n",
+ " 'size': 0.5671189,\n",
+ " 'small': 0.1666983,\n",
+ " 'sort': 0.20719269,\n",
+ " 'sql': 0.21473138,\n",
+ " 'stack': 0.042597417,\n",
+ " 'statistics': 0.019139726,\n",
+ " 'storage': 0.11576759,\n",
+ " 'strategy': 0.06358851,\n",
+ " 'swarm': 0.08892168,\n",
+ " 't': 0.15734711,\n",
+ " 'task': 0.2625412,\n",
+ " 'taylor': 0.059171513,\n",
+ " 'thirty': 0.59235644,\n",
+ " 'thread': 1.7254765,\n",
+ " 'threads': 1.1326298,\n",
+ " 'tier': 2.0103586,\n",
+ " 'time': 0.5197543,\n",
+ " 'times': 0.19328791,\n",
+ " 'total': 0.9341554,\n",
+ " 'trial': 1.0915743,\n",
+ " 'ur': 0.041876547,\n",
+ " 'value': 0.39162463,\n",
+ " 'values': 0.10083909,\n",
+ " 'wall': 0.93653333,\n",
+ " 'web': 0.1397472,\n",
+ " 'weeks': 0.027450949,\n",
+ " 'within': 0.38789856,\n",
+ " 'work': 0.1474287,\n",
+ " 'workers': 0.30503651,\n",
+ " 'write': 0.33134767,\n",
+ " 'x': 0.027046092,\n",
+ " 'z': 0.06591661,\n",
+ " 'ze': 0.69916034},\n",
+ " 'text': 'each '\n",
+ " 'thread '\n",
+ " 'is '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " '150 '\n",
+ " 'indexing '\n",
+ " 'requests '\n",
+ " 'within '\n",
+ " '30s, '\n",
+ " 'and '\n",
+ " 'therefore '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'one '\n",
+ " 'extra '\n",
+ " 'thread '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " '150 '\n",
+ " 'queued '\n",
+ " 'items. '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'i '\n",
+ " 'n '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 's '\n",
+ " 'i '\n",
+ " 'z '\n",
+ " 'e '\n",
+ " '× '\n",
+ " 'a '\n",
+ " 'v '\n",
+ " 'e '\n",
+ " 'r '\n",
+ " 'a '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 'r '\n",
+ " 'e '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 's '\n",
+ " 't '\n",
+ " '_ '\n",
+ " 'e '\n",
+ " 'x '\n",
+ " 'e '\n",
+ " 'c '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'm '\n",
+ " 'e '\n",
+ " '30 '\n",
+ " 's '\n",
+ " '\\\\small '\n",
+ " 'queued\\\\_ingestion\\\\_load '\n",
+ " '= '\n",
+ " '\\\\frac{queue\\\\_size '\n",
+ " '\\\\times '\n",
+ " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'd '\n",
+ " '_ '\n",
+ " 'in '\n",
+ " 'g '\n",
+ " 'es '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 'l '\n",
+ " 'o '\n",
+ " 'a '\n",
+ " 'd '\n",
+ " '= '\n",
+ " '30 '\n",
+ " 's '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " 'u '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 's '\n",
+ " 'i '\n",
+ " 'ze '\n",
+ " '× '\n",
+ " 'a '\n",
+ " 'v '\n",
+ " 'er '\n",
+ " 'a '\n",
+ " 'g '\n",
+ " 'e '\n",
+ " '_ '\n",
+ " 're '\n",
+ " 'q '\n",
+ " 'u '\n",
+ " 'es '\n",
+ " 't '\n",
+ " '_ '\n",
+ " 'e '\n",
+ " 'x '\n",
+ " 'ec '\n",
+ " 'u '\n",
+ " 't '\n",
+ " 'i '\n",
+ " 'o '\n",
+ " 'n '\n",
+ " '_ '\n",
+ " 't '\n",
+ " 'im '\n",
+ " 'e '\n",
+ " '\\u200b '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'since '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'nodes '\n",
+ " 'rely '\n",
+ " 'on '\n",
+ " 'pushing '\n",
+ " 'indexed '\n",
+ " 'data '\n",
+ " 'into '\n",
+ " 'the '\n",
+ " 'object '\n",
+ " 'store '\n",
+ " 'periodically, '\n",
+ " 'we '\n",
+ " 'do '\n",
+ " 'not '\n",
+ " 'need '\n",
+ " 'to '\n",
+ " 'scale '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'size '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexed '\n",
+ " 'data. '\n",
+ " 'However, '\n",
+ " 'the '\n",
+ " 'disk '\n",
+ " 'IO '\n",
+ " 'requirements '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload '\n",
+ " 'needs '\n",
+ " 'to '\n",
+ " 'be '\n",
+ " 'considered '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'decisions. '\n",
+ " 'The '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'represents '\n",
+ " 'both '\n",
+ " 'CPU '\n",
+ " 'requirements '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'nodes '\n",
+ " 'as '\n",
+ " 'well '\n",
+ " 'as '\n",
+ " 'disk '\n",
+ " 'IO '\n",
+ " 'since '\n",
+ " 'both '\n",
+ " 'CPU '\n",
+ " 'and '\n",
+ " 'IO '\n",
+ " 'work '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'workers '\n",
+ " 'and '\n",
+ " 'we '\n",
+ " 'rely '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'wall '\n",
+ " 'clock '\n",
+ " 'time '\n",
+ " 'to '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'required '\n",
+ " 'time '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'queued '\n",
+ " 'requests. '\n",
+ " 'Each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'calculates '\n",
+ " 'its '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'publishes '\n",
+ " 'this '\n",
+ " 'value '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'periodically. '\n",
+ " 'The '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'serves '\n",
+ " 'the '\n",
+ " 'per '\n",
+ " 'node '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'values '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'autoscaler. '\n",
+ " 'Memory '\n",
+ " 'The '\n",
+ " 'memory '\n",
+ " 'metrics '\n",
+ " 'exposed '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'are '\n",
+ " 'node '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'tier '\n",
+ " 'memory. '\n",
+ " 'The '\n",
+ " 'node '\n",
+ " 'memory '\n",
+ " 'represents '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'memory '\n",
+ " 'requirement '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster. '\n",
+ " 'The '\n",
+ " 'tier '\n",
+ " 'memory '\n",
+ " 'metric '\n",
+ " 'represents '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'total '\n",
+ " 'memory '\n",
+ " 'that '\n",
+ " 'should '\n",
+ " 'be '\n",
+ " 'available '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'these '\n",
+ " 'values '\n",
+ " 'only '\n",
+ " 'indicate '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'to '\n",
+ " 'ensure '\n",
+ " 'that '\n",
+ " 'each '\n",
+ " 'node '\n",
+ " 'is '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'basic '\n",
+ " 'indexing '\n",
+ " 'workload '\n",
+ " 'and '\n",
+ " 'hold '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'indices '\n",
+ " 'metadata, '\n",
+ " 'while '\n",
+ " 'ensuring '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'tier '\n",
+ " 'includes '\n",
+ " 'enough '\n",
+ " 'nodes '\n",
+ " 'to '\n",
+ " 'accommodate '\n",
+ " 'all '\n",
+ " 'index '\n",
+ " 'shards. '\n",
+ " 'Node '\n",
+ " 'memory '\n",
+ " 'must '\n",
+ " 'have '\n",
+ " 'a '\n",
+ " 'minimum '\n",
+ " 'of '\n",
+ " '500MB '\n",
+ " 'to '\n",
+ " 'be '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'indexing '\n",
+ " 'workloads '\n",
+ " ', '\n",
+ " 'as '\n",
+ " 'well '\n",
+ " 'as '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'amount '\n",
+ " 'of '\n",
+ " 'memory '\n",
+ " 'per '\n",
+ " 'each '\n",
+ " 'index '\n",
+ " '. '\n",
+ " 'This '\n",
+ " 'ensures '\n",
+ " 'all '\n",
+ " 'nodes '\n",
+ " 'can '\n",
+ " 'hold '\n",
+ " 'metadata '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'cluster, '\n",
+ " 'which '\n",
+ " 'includes '\n",
+ " 'metadata '\n",
+ " 'for '\n",
+ " 'every '\n",
+ " 'index. '\n",
+ " 'Tier '\n",
+ " 'memory '\n",
+ " 'is '\n",
+ " 'determined '\n",
+ " 'by '\n",
+ " 'accounting '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'memory'},\n",
+ " {'embeddings': {'##d': 0.055720266,\n",
+ " '##est': 0.87620574,\n",
+ " '##ging': 0.12167851,\n",
+ " '##id': 0.007303444,\n",
+ " '##ing': 1.0664626,\n",
+ " '##ion': 0.5800176,\n",
+ " '##ler': 1.1925261,\n",
+ " '##ling': 1.0163201,\n",
+ " '##load': 0.81047934,\n",
+ " '##mb': 0.41285288,\n",
+ " '##rch': 0.9021695,\n",
+ " '##rd': 1.5396098,\n",
+ " '##rds': 0.47700712,\n",
+ " '##s': 0.033316635,\n",
+ " '##sca': 1.5766962,\n",
+ " '##sea': 1.0991455,\n",
+ " '500': 0.8151243,\n",
+ " '6': 0.5519658,\n",
+ " 'accounting': 0.74103206,\n",
+ " 'algorithm': 1.0231093,\n",
+ " 'algorithms': 0.065428115,\n",
+ " 'allocated': 0.19617477,\n",
+ " 'amazon': 0.31502825,\n",
+ " 'analysis': 0.5597703,\n",
+ " 'analyze': 0.30770445,\n",
+ " 'apache': 0.8908353,\n",
+ " 'api': 1.1461797,\n",
+ " 'approximate': 0.21645284,\n",
+ " 'archive': 0.013153568,\n",
+ " 'array': 0.047213156,\n",
+ " 'auto': 1.3802772,\n",
+ " 'automatic': 0.7499421,\n",
+ " 'availability': 0.10610637,\n",
+ " 'basic': 0.5700848,\n",
+ " 'blocking': 0.03154505,\n",
+ " 'bot': 0.2956401,\n",
+ " 'brain': 0.13824557,\n",
+ " 'brick': 0.34880513,\n",
+ " 'broken': 0.1587869,\n",
+ " 'buffer': 0.27810082,\n",
+ " 'bug': 0.019329984,\n",
+ " 'cad': 0.010832788,\n",
+ " 'calculate': 0.71264565,\n",
+ " 'calculated': 0.19991197,\n",
+ " 'calculation': 0.90854484,\n",
+ " 'capacity': 0.13310817,\n",
+ " 'cassandra': 0.269642,\n",
+ " 'checkpoint': 0.33004454,\n",
+ " 'chess': 0.6517597,\n",
+ " 'class': 0.40205157,\n",
+ " 'clock': 1.2123855,\n",
+ " 'cluster': 1.5899432,\n",
+ " 'clusters': 0.21755162,\n",
+ " 'computation': 0.3360238,\n",
+ " 'compute': 0.15521479,\n",
+ " 'computer': 0.4586727,\n",
+ " 'computers': 0.09730453,\n",
+ " 'core': 0.18051882,\n",
+ " 'cores': 0.54003507,\n",
+ " 'cpu': 1.4255431,\n",
+ " 'data': 0.7048903,\n",
+ " 'database': 0.5640705,\n",
+ " 'depend': 0.08640857,\n",
+ " 'deploy': 0.116062716,\n",
+ " 'deployed': 0.16281521,\n",
+ " 'deployment': 1.375697,\n",
+ " 'dev': 0.16744493,\n",
+ " 'disk': 1.2671278,\n",
+ " 'display': 0.10427013,\n",
+ " 'done': 0.057584852,\n",
+ " 'each': 0.44890955,\n",
+ " 'elastic': 1.3546548,\n",
+ " 'estimate': 1.1541563,\n",
+ " 'estimated': 0.4820726,\n",
+ " 'estimates': 0.68956727,\n",
+ " 'execution': 0.025004579,\n",
+ " 'expose': 0.3791655,\n",
+ " 'exposed': 1.4152902,\n",
+ " 'exposing': 0.2018034,\n",
+ " 'exposure': 0.22712028,\n",
+ " 'field': 0.43335024,\n",
+ " 'fixed': 0.3727484,\n",
+ " 'fragment': 0.3541149,\n",
+ " 'fragments': 0.19871251,\n",
+ " 'framework': 0.0067325183,\n",
+ " 'gage': 0.062432837,\n",
+ " 'gb': 0.23573099,\n",
+ " 'guild': 0.06864197,\n",
+ " 'handle': 0.6664566,\n",
+ " 'handling': 0.79544353,\n",
+ " 'hardware': 0.15463935,\n",
+ " 'hash': 0.056183893,\n",
+ " 'host': 0.49334934,\n",
+ " 'hours': 0.23847345,\n",
+ " 'hu': 0.12027907,\n",
+ " 'index': 1.84248,\n",
+ " 'indexed': 0.5543888,\n",
+ " 'indices': 0.8364849,\n",
+ " 'ing': 1.1731079,\n",
+ " 'integration': 0.43307945,\n",
+ " 'interface': 0.13424914,\n",
+ " 'inventory': 0.43660846,\n",
+ " 'io': 1.1710184,\n",
+ " 'java': 1.1948129,\n",
+ " 'kb': 0.275635,\n",
+ " 'lane': 0.065143116,\n",
+ " 'lang': 0.07760714,\n",
+ " 'length': 0.19545008,\n",
+ " 'limit': 0.14939034,\n",
+ " 'load': 1.068046,\n",
+ " 'loading': 0.3452746,\n",
+ " 'machine': 0.28579098,\n",
+ " 'maintenance': 0.24792214,\n",
+ " 'management': 0.016834572,\n",
+ " 'mandatory': 0.09757359,\n",
+ " 'map': 0.33999705,\n",
+ " 'mapped': 0.4253768,\n",
+ " 'mapping': 0.7739739,\n",
+ " 'master': 1.514614,\n",
+ " 'math': 0.62235314,\n",
+ " 'maximum': 0.4592383,\n",
+ " 'mb': 0.8386821,\n",
+ " 'measure': 0.35868418,\n",
+ " 'memory': 1.4037786,\n",
+ " 'metadata': 0.57345796,\n",
+ " 'metric': 1.0478114,\n",
+ " 'minimal': 0.55310273,\n",
+ " 'minimum': 1.1779544,\n",
+ " 'mining': 0.60987383,\n",
+ " 'monitor': 0.41601682,\n",
+ " 'monitoring': 0.80379987,\n",
+ " 'multiple': 0.0046412363,\n",
+ " 'need': 0.13691676,\n",
+ " 'needs': 0.09020152,\n",
+ " 'network': 0.5226748,\n",
+ " 'node': 1.5207812,\n",
+ " 'nodes': 0.9873411,\n",
+ " 'number': 0.08917359,\n",
+ " 'o': 0.47437057,\n",
+ " 'open': 0.9998891,\n",
+ " 'operation': 0.059715636,\n",
+ " 'parameters': 0.06929999,\n",
+ " 'per': 1.2698478,\n",
+ " 'performance': 0.27903107,\n",
+ " 'pool': 1.1343037,\n",
+ " 'pools': 0.5005684,\n",
+ " 'predict': 0.15172759,\n",
+ " 'processing': 0.34928247,\n",
+ " 'processor': 0.06942589,\n",
+ " 'provided': 0.33421612,\n",
+ " 'published': 0.35502988,\n",
+ " 'queue': 1.4328028,\n",
+ " 'ram': 0.07832895,\n",
+ " 'rank': 0.09849679,\n",
+ " 'regional': 0.023943441,\n",
+ " 'request': 0.58130133,\n",
+ " 'requests': 0.4985438,\n",
+ " 'require': 0.054292977,\n",
+ " 'required': 0.20457663,\n",
+ " 'requirement': 0.9255918,\n",
+ " 'requirements': 1.1021699,\n",
+ " 'resolution': 0.2503146,\n",
+ " 'resource': 0.22062841,\n",
+ " 'resources': 0.7977981,\n",
+ " 'scala': 0.046379413,\n",
+ " 'scale': 0.34393448,\n",
+ " 'scaling': 0.5871495,\n",
+ " 'script': 0.07091305,\n",
+ " 'search': 0.2748066,\n",
+ " 'semi': 0.19345926,\n",
+ " 'sequence': 0.2634719,\n",
+ " 'serial': 0.281783,\n",
+ " 'serve': 0.3122354,\n",
+ " 'server': 0.62030464,\n",
+ " 'sha': 1.412181,\n",
+ " 'shards': 1.2690446,\n",
+ " 'sid': 0.5395205,\n",
+ " 'size': 0.37528938,\n",
+ " 'software': 0.2301807,\n",
+ " 'sql': 0.28173122,\n",
+ " 'storage': 0.17134488,\n",
+ " 'sum': 0.48667532,\n",
+ " 'swarm': 0.09873215,\n",
+ " 'task': 0.15503421,\n",
+ " 'thread': 1.2720325,\n",
+ " 'threads': 0.5098314,\n",
+ " 'tier': 2.0405457,\n",
+ " 'time': 0.691699,\n",
+ " 'timer': 0.3272765,\n",
+ " 'total': 0.853305,\n",
+ " 'trial': 0.75489986,\n",
+ " 'value': 0.55824566,\n",
+ " 'values': 0.18979663,\n",
+ " 'wall': 1.5562296,\n",
+ " 'walls': 0.57668746,\n",
+ " 'web': 0.12833436,\n",
+ " 'workers': 0.30275372,\n",
+ " 'write': 0.8986184},\n",
+ " 'text': 'both '\n",
+ " 'CPU '\n",
+ " 'requirements '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'nodes '\n",
+ " 'as '\n",
+ " 'well '\n",
+ " 'as '\n",
+ " 'disk '\n",
+ " 'IO '\n",
+ " 'since '\n",
+ " 'both '\n",
+ " 'CPU '\n",
+ " 'and '\n",
+ " 'IO '\n",
+ " 'work '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'write '\n",
+ " 'thread '\n",
+ " 'pool '\n",
+ " 'workers '\n",
+ " 'and '\n",
+ " 'we '\n",
+ " 'rely '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'wall '\n",
+ " 'clock '\n",
+ " 'time '\n",
+ " 'to '\n",
+ " 'estimate '\n",
+ " 'the '\n",
+ " 'required '\n",
+ " 'time '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'queued '\n",
+ " 'requests. '\n",
+ " 'Each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'calculates '\n",
+ " 'its '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'publishes '\n",
+ " 'this '\n",
+ " 'value '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'periodically. '\n",
+ " 'The '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'serves '\n",
+ " 'the '\n",
+ " 'per '\n",
+ " 'node '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'values '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'autoscaler. '\n",
+ " 'Memory '\n",
+ " 'The '\n",
+ " 'memory '\n",
+ " 'metrics '\n",
+ " 'exposed '\n",
+ " 'by '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'are '\n",
+ " 'node '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'tier '\n",
+ " 'memory. '\n",
+ " 'The '\n",
+ " 'node '\n",
+ " 'memory '\n",
+ " 'represents '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'memory '\n",
+ " 'requirement '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'indexing '\n",
+ " 'node '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster. '\n",
+ " 'The '\n",
+ " 'tier '\n",
+ " 'memory '\n",
+ " 'metric '\n",
+ " 'represents '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'total '\n",
+ " 'memory '\n",
+ " 'that '\n",
+ " 'should '\n",
+ " 'be '\n",
+ " 'available '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'these '\n",
+ " 'values '\n",
+ " 'only '\n",
+ " 'indicate '\n",
+ " 'the '\n",
+ " 'minimum '\n",
+ " 'to '\n",
+ " 'ensure '\n",
+ " 'that '\n",
+ " 'each '\n",
+ " 'node '\n",
+ " 'is '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'basic '\n",
+ " 'indexing '\n",
+ " 'workload '\n",
+ " 'and '\n",
+ " 'hold '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'indices '\n",
+ " 'metadata, '\n",
+ " 'while '\n",
+ " 'ensuring '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'tier '\n",
+ " 'includes '\n",
+ " 'enough '\n",
+ " 'nodes '\n",
+ " 'to '\n",
+ " 'accommodate '\n",
+ " 'all '\n",
+ " 'index '\n",
+ " 'shards. '\n",
+ " 'Node '\n",
+ " 'memory '\n",
+ " 'must '\n",
+ " 'have '\n",
+ " 'a '\n",
+ " 'minimum '\n",
+ " 'of '\n",
+ " '500MB '\n",
+ " 'to '\n",
+ " 'be '\n",
+ " 'able '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'indexing '\n",
+ " 'workloads '\n",
+ " ', '\n",
+ " 'as '\n",
+ " 'well '\n",
+ " 'as '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'amount '\n",
+ " 'of '\n",
+ " 'memory '\n",
+ " 'per '\n",
+ " 'each '\n",
+ " 'index '\n",
+ " '. '\n",
+ " 'This '\n",
+ " 'ensures '\n",
+ " 'all '\n",
+ " 'nodes '\n",
+ " 'can '\n",
+ " 'hold '\n",
+ " 'metadata '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'cluster, '\n",
+ " 'which '\n",
+ " 'includes '\n",
+ " 'metadata '\n",
+ " 'for '\n",
+ " 'every '\n",
+ " 'index. '\n",
+ " 'Tier '\n",
+ " 'memory '\n",
+ " 'is '\n",
+ " 'determined '\n",
+ " 'by '\n",
+ " 'accounting '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'memory '\n",
+ " 'overhead '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'field '\n",
+ " 'mappings '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indices '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'amount '\n",
+ " 'of '\n",
+ " 'memory '\n",
+ " 'needed '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'open '\n",
+ " 'shard '\n",
+ " 'allocated '\n",
+ " 'on '\n",
+ " 'a '\n",
+ " 'node '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster. '\n",
+ " 'Currently, '\n",
+ " 'the '\n",
+ " 'per-shard '\n",
+ " 'memory '\n",
+ " 'requirement '\n",
+ " 'uses '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'estimate '\n",
+ " 'of '\n",
+ " '6MB. '\n",
+ " 'We '\n",
+ " 'plan '\n",
+ " 'to '\n",
+ " 'refine '\n",
+ " 'this '\n",
+ " 'value. '\n",
+ " 'The '\n",
+ " 'estimate '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'memory '\n",
+ " 'requirements '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'mappings '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'index '\n",
+ " 'is '\n",
+ " 'calculated '\n",
+ " 'by '\n",
+ " 'one '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'data '\n",
+ " 'nodes '\n",
+ " 'that '\n",
+ " 'hosts '\n",
+ " 'a '\n",
+ " 'shard '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'index. '\n",
+ " 'The '\n",
+ " 'calculated '\n",
+ " 'estimates '\n",
+ " 'are '\n",
+ " 'sent '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node. '\n",
+ " 'Whenever '\n",
+ " 'there '\n",
+ " 'is '\n",
+ " 'a '\n",
+ " 'mapping '\n",
+ " 'change '\n",
+ " 'this '\n",
+ " 'estimate '\n",
+ " 'is '\n",
+ " 'updated '\n",
+ " 'and '\n",
+ " 'published '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'again. '\n",
+ " 'The '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'serves '\n",
+ " 'the '\n",
+ " 'node '\n",
+ " 'and '\n",
+ " 'total '\n",
+ " 'memory '\n",
+ " 'metrics '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'these '\n",
+ " 'information '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'autoscaler. '\n",
+ " 'Scaling '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'The '\n",
+ " 'autoscaler '\n",
+ " 'is '\n",
+ " 'responsible '\n",
+ " 'for '\n",
+ " 'monitoring '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'exposed '\n",
+ " 'metrics, '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'desirable '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'to '\n",
+ " 'adapt '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload, '\n",
+ " 'and '\n",
+ " 'updating '\n",
+ " 'the '\n",
+ " 'deployment '\n",
+ " 'accordingly. '\n",
+ " 'This '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'required '\n",
+ " 'CPU '\n",
+ " 'and '\n",
+ " 'memory '\n",
+ " 'resources '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'memory '\n",
+ " 'metrics. '\n",
+ " 'The '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'all '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'per '\n",
+ " 'node '\n",
+ " 'values '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'CPU '\n",
+ " 'cores '\n",
+ " 'needed '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier. '\n",
+ " 'The '\n",
+ " 'calculated '\n",
+ " 'CPU '\n",
+ " 'requirement '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'provided '\n",
+ " 'minimum '\n",
+ " 'node '\n",
+ " 'and '\n",
+ " 'tier '\n",
+ " 'memory '\n",
+ " 'resources '\n",
+ " 'are '\n",
+ " 'mapped '\n",
+ " 'to '\n",
+ " 'a '\n",
+ " 'predetermined '\n",
+ " 'set'},\n",
+ " {'embeddings': {'##ber': 0.9460652,\n",
+ " '##d': 0.10023495,\n",
+ " '##es': 0.14341043,\n",
+ " '##gb': 0.6906553,\n",
+ " '##ine': 0.9458122,\n",
+ " '##ing': 0.42145026,\n",
+ " '##ler': 1.2356958,\n",
+ " '##ling': 0.63835293,\n",
+ " '##load': 0.2904571,\n",
+ " '##mb': 0.6970242,\n",
+ " '##net': 0.7010928,\n",
+ " '##pu': 1.0257086,\n",
+ " '##rch': 1.0700952,\n",
+ " '##rd': 1.6493205,\n",
+ " '##rds': 0.6754141,\n",
+ " '##rt': 0.12942569,\n",
+ " '##sca': 1.4853197,\n",
+ " '##sea': 1.4192088,\n",
+ " '##vc': 1.405061,\n",
+ " '100': 0.26849923,\n",
+ " '16': 0.19268984,\n",
+ " '160': 0.2302431,\n",
+ " '1600': 0.8732733,\n",
+ " '32': 1.2120824,\n",
+ " '6': 0.70548016,\n",
+ " '64': 1.202607,\n",
+ " 'algorithm': 0.937971,\n",
+ " 'allocated': 0.73692024,\n",
+ " 'allocation': 0.4625666,\n",
+ " 'amazon': 0.86137766,\n",
+ " 'analysis': 0.58160084,\n",
+ " 'analyze': 0.023657316,\n",
+ " 'apache': 0.85805637,\n",
+ " 'api': 0.9369967,\n",
+ " 'approximate': 0.15172462,\n",
+ " 'auto': 1.225151,\n",
+ " 'automatic': 0.7224918,\n",
+ " 'availability': 0.3053787,\n",
+ " 'bot': 0.33649588,\n",
+ " 'brick': 0.28021842,\n",
+ " 'buffer': 0.27807808,\n",
+ " 'bug': 0.12689802,\n",
+ " 'calculate': 0.56475216,\n",
+ " 'calculated': 0.2805605,\n",
+ " 'calculating': 0.18157567,\n",
+ " 'calculation': 1.0562031,\n",
+ " 'capacity': 0.19689727,\n",
+ " 'certification': 0.030283952,\n",
+ " 'checkpoint': 0.1251825,\n",
+ " 'chess': 0.38721076,\n",
+ " 'class': 0.044428803,\n",
+ " 'closed': 0.20298174,\n",
+ " 'cluster': 1.8217679,\n",
+ " 'clusters': 0.40412048,\n",
+ " 'computation': 0.27228907,\n",
+ " 'compute': 0.157462,\n",
+ " 'computer': 0.07424284,\n",
+ " 'cores': 0.28018573,\n",
+ " 'cpu': 0.874331,\n",
+ " 'criteria': 0.20424062,\n",
+ " 'cube': 0.078070216,\n",
+ " 'currently': 0.26391146,\n",
+ " 'data': 0.57366157,\n",
+ " 'database': 0.5346718,\n",
+ " 'deploy': 0.31853938,\n",
+ " 'deployed': 0.23235346,\n",
+ " 'deployment': 1.38996,\n",
+ " 'desirable': 0.25084683,\n",
+ " 'desired': 0.05757945,\n",
+ " 'determine': 0.07967118,\n",
+ " 'determined': 0.38774973,\n",
+ " 'dimensions': 0.3834306,\n",
+ " 'disk': 0.7686433,\n",
+ " 'display': 0.044948753,\n",
+ " 'domain': 0.05484484,\n",
+ " 'each': 0.026949435,\n",
+ " 'elastic': 1.7217911,\n",
+ " 'equation': 0.07899539,\n",
+ " 'estimate': 1.0816743,\n",
+ " 'estimated': 0.2908085,\n",
+ " 'estimates': 0.7743369,\n",
+ " 'existing': 0.50358754,\n",
+ " 'exposed': 0.91814655,\n",
+ " 'field': 1.4176838,\n",
+ " 'fields': 0.56111515,\n",
+ " 'fixed': 0.653671,\n",
+ " 'forest': 0.088545434,\n",
+ " 'gage': 0.23066506,\n",
+ " 'gb': 0.7216355,\n",
+ " 'hardware': 0.5457616,\n",
+ " 'honey': 0.13710178,\n",
+ " 'host': 0.32896483,\n",
+ " 'hu': 0.022061992,\n",
+ " 'implement': 0.19801763,\n",
+ " 'index': 1.5813339,\n",
+ " 'indexed': 0.33440682,\n",
+ " 'indicator': 0.07646061,\n",
+ " 'indices': 1.0497515,\n",
+ " 'ing': 0.44711637,\n",
+ " 'integration': 0.38794386,\n",
+ " 'inventory': 0.55072165,\n",
+ " 'java': 1.0091366,\n",
+ " 'kb': 0.31603098,\n",
+ " 'ku': 1.2214607,\n",
+ " 'largest': 0.55517995,\n",
+ " 'length': 0.1961873,\n",
+ " 'limit': 0.12602727,\n",
+ " 'linear': 0.13019355,\n",
+ " 'load': 0.7046929,\n",
+ " 'map': 0.6723943,\n",
+ " 'mapped': 0.6155787,\n",
+ " 'mapping': 0.95820665,\n",
+ " 'maps': 0.19839133,\n",
+ " 'master': 1.3583598,\n",
+ " 'math': 0.52316844,\n",
+ " 'maximum': 0.17016214,\n",
+ " 'mb': 0.8793483,\n",
+ " 'measure': 0.37326512,\n",
+ " 'memory': 1.3331418,\n",
+ " 'metric': 0.9261499,\n",
+ " 'minimum': 0.4176075,\n",
+ " 'mining': 0.42999497,\n",
+ " 'monitor': 0.34513482,\n",
+ " 'monitoring': 0.6307714,\n",
+ " 'multi': 0.3034215,\n",
+ " 'network': 0.67814016,\n",
+ " 'node': 1.2861586,\n",
+ " 'nodes': 0.6710798,\n",
+ " 'open': 1.3986069,\n",
+ " 'optimal': 0.0624708,\n",
+ " 'overhead': 0.69991654,\n",
+ " 'parameters': 0.11732358,\n",
+ " 'pattern': 0.005440311,\n",
+ " 'per': 1.2889819,\n",
+ " 'performance': 0.14103872,\n",
+ " 'poll': 0.52450436,\n",
+ " 'polling': 0.3777002,\n",
+ " 'polls': 0.60389787,\n",
+ " 'predict': 0.038165692,\n",
+ " 'published': 0.06970011,\n",
+ " 'radar': 0.004892402,\n",
+ " 'ram': 0.1705884,\n",
+ " 'rank': 0.1464829,\n",
+ " 'ratio': 0.6063533,\n",
+ " 'reconciliation': 0.4469912,\n",
+ " 'ref': 0.5476266,\n",
+ " 'requirement': 0.92776734,\n",
+ " 'requirements': 1.1151919,\n",
+ " 'resolution': 0.34558743,\n",
+ " 'resource': 0.21023308,\n",
+ " 'resources': 0.925664,\n",
+ " 'scale': 1.1254972,\n",
+ " 'scaled': 0.25958243,\n",
+ " 'scaling': 1.3571583,\n",
+ " 'scope': 0.007439173,\n",
+ " 'script': 0.108936414,\n",
+ " 'search': 0.4840181,\n",
+ " 'serial': 0.38776705,\n",
+ " 'server': 0.36229628,\n",
+ " 'sha': 1.6222633,\n",
+ " 'sid': 0.4845318,\n",
+ " 'since': 0.0958648,\n",
+ " 'size': 1.1212213,\n",
+ " 'sizes': 0.8831621,\n",
+ " 'software': 0.10655975,\n",
+ " 'sort': 0.23242046,\n",
+ " 'specification': 0.36318856,\n",
+ " 'specifications': 0.36570984,\n",
+ " 'storage': 0.16639474,\n",
+ " 'swarm': 0.012647891,\n",
+ " 'target': 0.097013876,\n",
+ " 'tier': 1.3347368,\n",
+ " 'total': 0.2700686,\n",
+ " 'trial': 0.48382765,\n",
+ " 'up': 0.009041203,\n",
+ " 'value': 0.5148574,\n",
+ " 'version': 0.00331044,\n",
+ " 'vote': 0.19521642,\n",
+ " 'voting': 0.32694972,\n",
+ " 'web': 0.43445045,\n",
+ " 'which': 0.22146864},\n",
+ " 'text': 'overhead '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'field '\n",
+ " 'mappings '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'indices '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'amount '\n",
+ " 'of '\n",
+ " 'memory '\n",
+ " 'needed '\n",
+ " 'for '\n",
+ " 'each '\n",
+ " 'open '\n",
+ " 'shard '\n",
+ " 'allocated '\n",
+ " 'on '\n",
+ " 'a '\n",
+ " 'node '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster. '\n",
+ " 'Currently, '\n",
+ " 'the '\n",
+ " 'per-shard '\n",
+ " 'memory '\n",
+ " 'requirement '\n",
+ " 'uses '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'estimate '\n",
+ " 'of '\n",
+ " '6MB. '\n",
+ " 'We '\n",
+ " 'plan '\n",
+ " 'to '\n",
+ " 'refine '\n",
+ " 'this '\n",
+ " 'value. '\n",
+ " 'The '\n",
+ " 'estimate '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'memory '\n",
+ " 'requirements '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'mappings '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'index '\n",
+ " 'is '\n",
+ " 'calculated '\n",
+ " 'by '\n",
+ " 'one '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'data '\n",
+ " 'nodes '\n",
+ " 'that '\n",
+ " 'hosts '\n",
+ " 'a '\n",
+ " 'shard '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'index. '\n",
+ " 'The '\n",
+ " 'calculated '\n",
+ " 'estimates '\n",
+ " 'are '\n",
+ " 'sent '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node. '\n",
+ " 'Whenever '\n",
+ " 'there '\n",
+ " 'is '\n",
+ " 'a '\n",
+ " 'mapping '\n",
+ " 'change '\n",
+ " 'this '\n",
+ " 'estimate '\n",
+ " 'is '\n",
+ " 'updated '\n",
+ " 'and '\n",
+ " 'published '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'again. '\n",
+ " 'The '\n",
+ " 'master '\n",
+ " 'node '\n",
+ " 'serves '\n",
+ " 'the '\n",
+ " 'node '\n",
+ " 'and '\n",
+ " 'total '\n",
+ " 'memory '\n",
+ " 'metrics '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'these '\n",
+ " 'information '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'autoscaling '\n",
+ " 'metrics '\n",
+ " 'API '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'autoscaler. '\n",
+ " 'Scaling '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'The '\n",
+ " 'autoscaler '\n",
+ " 'is '\n",
+ " 'responsible '\n",
+ " 'for '\n",
+ " 'monitoring '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'via '\n",
+ " 'the '\n",
+ " 'exposed '\n",
+ " 'metrics, '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'desirable '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'to '\n",
+ " 'adapt '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload, '\n",
+ " 'and '\n",
+ " 'updating '\n",
+ " 'the '\n",
+ " 'deployment '\n",
+ " 'accordingly. '\n",
+ " 'This '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'required '\n",
+ " 'CPU '\n",
+ " 'and '\n",
+ " 'memory '\n",
+ " 'resources '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'and '\n",
+ " 'memory '\n",
+ " 'metrics. '\n",
+ " 'The '\n",
+ " 'sum '\n",
+ " 'of '\n",
+ " 'all '\n",
+ " 'the '\n",
+ " 'ingestion '\n",
+ " 'load '\n",
+ " 'per '\n",
+ " 'node '\n",
+ " 'values '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'total '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'CPU '\n",
+ " 'cores '\n",
+ " 'needed '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier. '\n",
+ " 'The '\n",
+ " 'calculated '\n",
+ " 'CPU '\n",
+ " 'requirement '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'provided '\n",
+ " 'minimum '\n",
+ " 'node '\n",
+ " 'and '\n",
+ " 'tier '\n",
+ " 'memory '\n",
+ " 'resources '\n",
+ " 'are '\n",
+ " 'mapped '\n",
+ " 'to '\n",
+ " 'a '\n",
+ " 'predetermined '\n",
+ " 'set '\n",
+ " 'of '\n",
+ " 'cluster '\n",
+ " 'sizes. '\n",
+ " 'Each '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'nodes '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'CPU, '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'disk '\n",
+ " 'size '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'node. '\n",
+ " 'All '\n",
+ " 'nodes '\n",
+ " 'within '\n",
+ " 'a '\n",
+ " 'certain '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'have '\n",
+ " 'the '\n",
+ " 'same '\n",
+ " 'hardware '\n",
+ " 'specification. '\n",
+ " 'There '\n",
+ " 'is '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'ratio '\n",
+ " 'between '\n",
+ " 'CPU, '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'disk, '\n",
+ " 'thus '\n",
+ " 'always '\n",
+ " 'scaling '\n",
+ " 'all '\n",
+ " '3 '\n",
+ " 'resources '\n",
+ " 'linearly. '\n",
+ " 'The '\n",
+ " 'existing '\n",
+ " 'cluster '\n",
+ " 'sizes '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier '\n",
+ " 'are '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'node '\n",
+ " 'sizes '\n",
+ " 'starting '\n",
+ " 'from '\n",
+ " '4GB/2vCPU/100GB '\n",
+ " 'disk '\n",
+ " 'to '\n",
+ " '64GB/32vCPU/1600GB '\n",
+ " 'disk. '\n",
+ " 'Once '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'scales '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'largest '\n",
+ " 'node '\n",
+ " 'size '\n",
+ " '(64GB '\n",
+ " 'memory), '\n",
+ " 'any '\n",
+ " 'further '\n",
+ " 'scale-up '\n",
+ " 'adds '\n",
+ " 'new '\n",
+ " '64GB '\n",
+ " 'nodes, '\n",
+ " 'allowing '\n",
+ " 'a '\n",
+ " 'cluster '\n",
+ " 'to '\n",
+ " 'scale '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " '32 '\n",
+ " 'nodes '\n",
+ " 'of '\n",
+ " '64GB. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'this '\n",
+ " 'is '\n",
+ " 'not '\n",
+ " 'a '\n",
+ " 'hard '\n",
+ " 'upper '\n",
+ " 'bound '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'Elasticsearch '\n",
+ " 'nodes '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'can '\n",
+ " 'be '\n",
+ " 'increased '\n",
+ " 'if '\n",
+ " 'necessary. '\n",
+ " 'Every '\n",
+ " '5 '\n",
+ " 'seconds '\n",
+ " 'the '\n",
+ " 'autoscaler '\n",
+ " 'polls '\n",
+ " 'metrics '\n",
+ " 'from '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node, '\n",
+ " 'calculates '\n",
+ " 'the '\n",
+ " 'desirable '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'and '\n",
+ " 'if '\n",
+ " 'it '\n",
+ " 'is '\n",
+ " 'different '\n",
+ " 'from '\n",
+ " 'the '\n",
+ " 'current '\n",
+ " 'cluster '\n",
+ " 'size, '\n",
+ " 'it '\n",
+ " 'updates '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'Kubernetes '\n",
+ " 'Deployment '\n",
+ " 'accordingly. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'actual '\n",
+ " 'reconciliation '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'deployment '\n",
+ " 'towards '\n",
+ " 'the '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'and '\n",
+ " 'adding '\n",
+ " 'and '\n",
+ " 'removing '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'nodes '\n",
+ " 'to '\n",
+ " 'achieve '\n",
+ " 'this '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'Kubernetes. '\n",
+ " 'In '\n",
+ " 'order '\n",
+ " 'to '\n",
+ " 'avoid '\n",
+ " 'very '\n",
+ " 'short-lived '\n",
+ " 'changes '\n",
+ " 'to '\n",
+ " 'the'},\n",
+ " {'embeddings': {'##ber': 0.03804658,\n",
+ " '##es': 0.1512185,\n",
+ " '##gb': 0.6443679,\n",
+ " '##hi': 0.36000288,\n",
+ " '##ika': 0.07467539,\n",
+ " '##ing': 0.6129379,\n",
+ " '##ler': 1.1574837,\n",
+ " '##less': 0.5735957,\n",
+ " '##ling': 1.1661593,\n",
+ " '##load': 0.62337583,\n",
+ " '##net': 0.58226395,\n",
+ " '##oya': 1.7074469,\n",
+ " '##pu': 1.1345644,\n",
+ " '##rch': 1.0119687,\n",
+ " '##sca': 1.5153302,\n",
+ " '##sea': 1.4253823,\n",
+ " '##vc': 1.4631956,\n",
+ " '100': 0.55265766,\n",
+ " '15': 0.052379817,\n",
+ " '16': 0.33394203,\n",
+ " '160': 0.118766,\n",
+ " '1600': 0.8028694,\n",
+ " '32': 1.1772103,\n",
+ " '4': 0.16181825,\n",
+ " '64': 1.4588842,\n",
+ " 'algorithm': 0.94727564,\n",
+ " 'always': 0.38941032,\n",
+ " 'amazon': 0.89331883,\n",
+ " 'analysis': 0.4050502,\n",
+ " 'analyze': 0.023668261,\n",
+ " 'andersen': 0.49676144,\n",
+ " 'apache': 0.80054885,\n",
+ " 'ariel': 0.4422102,\n",
+ " 'auto': 1.2729144,\n",
+ " 'automatic': 0.7698037,\n",
+ " 'automatically': 0.04643825,\n",
+ " 'availability': 0.49544457,\n",
+ " 'available': 0.19981025,\n",
+ " 'blog': 0.50581634,\n",
+ " 'boat': 0.4211383,\n",
+ " 'bot': 0.44343898,\n",
+ " 'bug': 0.16439897,\n",
+ " 'calculate': 0.44946215,\n",
+ " 'calculating': 0.21078831,\n",
+ " 'calculation': 0.91136605,\n",
+ " 'calculations': 0.35172287,\n",
+ " 'capacity': 0.32551798,\n",
+ " 'certification': 0.96537966,\n",
+ " 'certified': 0.86568826,\n",
+ " 'change': 0.091490604,\n",
+ " 'checkpoint': 0.13703609,\n",
+ " 'chess': 0.30361477,\n",
+ " 'class': 0.12189255,\n",
+ " 'cloud': 0.36273655,\n",
+ " 'cluster': 2.1554685,\n",
+ " 'clusters': 0.84253734,\n",
+ " 'competition': 0.0070358375,\n",
+ " 'component': 0.16093102,\n",
+ " 'components': 0.688979,\n",
+ " 'computation': 0.0109849,\n",
+ " 'computer': 0.37449652,\n",
+ " 'computers': 0.29611063,\n",
+ " 'constant': 0.21192689,\n",
+ " 'cpu': 0.9483953,\n",
+ " 'crawl': 0.061979044,\n",
+ " 'data': 0.29847682,\n",
+ " 'database': 0.53361094,\n",
+ " 'define': 0.30592072,\n",
+ " 'deployment': 1.1050912,\n",
+ " 'desirable': 0.28776327,\n",
+ " 'determination': 0.25265238,\n",
+ " 'determine': 0.4538456,\n",
+ " 'determined': 0.5666302,\n",
+ " 'determines': 0.02666208,\n",
+ " 'dimensions': 0.43506965,\n",
+ " 'disadvantage': 0.40544793,\n",
+ " 'disk': 1.0043706,\n",
+ " 'domain': 0.08386699,\n",
+ " 'down': 1.1079221,\n",
+ " 'each': 0.20502539,\n",
+ " 'elastic': 2.0313072,\n",
+ " 'engineer': 0.41261968,\n",
+ " 'engineering': 0.43656224,\n",
+ " 'existing': 0.82118076,\n",
+ " 'expensive': 0.10213457,\n",
+ " 'factors': 0.04067958,\n",
+ " 'fernandez': 1.1611929,\n",
+ " 'fixed': 0.6458474,\n",
+ " 'forest': 0.07132318,\n",
+ " 'francisco': 1.0563725,\n",
+ " 'garcia': 0.13344267,\n",
+ " 'gb': 0.6862939,\n",
+ " 'global': 0.0054082987,\n",
+ " 'hardware': 0.7944886,\n",
+ " 'hen': 0.9853478,\n",
+ " 'honey': 0.081156164,\n",
+ " 'hour': 0.0074544367,\n",
+ " 'hours': 0.24539681,\n",
+ " 'hu': 0.06941744,\n",
+ " 'implement': 0.23772681,\n",
+ " 'implementation': 0.07986039,\n",
+ " 'improve': 0.2981144,\n",
+ " 'increase': 0.7570058,\n",
+ " 'increasing': 0.25063965,\n",
+ " 'index': 1.358504,\n",
+ " 'indexed': 0.29916498,\n",
+ " 'ing': 0.49232894,\n",
+ " 'integration': 0.20372295,\n",
+ " 'inventory': 0.49392712,\n",
+ " 'java': 0.96544707,\n",
+ " 'jose': 0.014233379,\n",
+ " 'ku': 1.0064884,\n",
+ " 'large': 0.009199611,\n",
+ " 'largest': 0.5853634,\n",
+ " 'latest': 0.075750045,\n",
+ " 'learning': 0.14278692,\n",
+ " 'length': 0.2575359,\n",
+ " 'limit': 0.27284575,\n",
+ " 'linear': 0.99686086,\n",
+ " 'load': 0.78078943,\n",
+ " 'loading': 0.09809506,\n",
+ " 'log': 0.053032227,\n",
+ " 'lopez': 0.37077188,\n",
+ " 'machine': 0.1154489,\n",
+ " 'maintenance': 0.24795005,\n",
+ " 'management': 0.28454626,\n",
+ " 'map': 0.12368915,\n",
+ " 'master': 1.0599743,\n",
+ " 'math': 0.39245087,\n",
+ " 'maximum': 0.37043598,\n",
+ " 'mb': 0.65867126,\n",
+ " 'measure': 0.401138,\n",
+ " 'mechanism': 0.5363481,\n",
+ " 'memory': 1.0781962,\n",
+ " 'metric': 0.9361899,\n",
+ " 'mining': 0.4610803,\n",
+ " 'minute': 0.7122368,\n",
+ " 'minutes': 0.03330799,\n",
+ " 'multiple': 0.28440112,\n",
+ " 'network': 0.70334154,\n",
+ " 'new': 0.36585885,\n",
+ " 'node': 1.1508181,\n",
+ " 'nodes': 0.6786249,\n",
+ " 'number': 0.46848533,\n",
+ " 'online': 0.10060778,\n",
+ " 'operation': 0.013929884,\n",
+ " 'optimal': 0.052087568,\n",
+ " 'overhead': 0.12910955,\n",
+ " 'performance': 0.10508823,\n",
+ " 'po': 0.030801829,\n",
+ " 'poll': 0.032789562,\n",
+ " 'polling': 0.08606442,\n",
+ " 'polls': 0.31255096,\n",
+ " 'predict': 0.038815167,\n",
+ " 'process': 0.32648584,\n",
+ " 'processing': 0.13010792,\n",
+ " 'quan': 0.30870175,\n",
+ " 'rank': 0.23912333,\n",
+ " 'ratio': 1.1149174,\n",
+ " 'ratios': 0.17480499,\n",
+ " 'ready': 0.7220055,\n",
+ " 'reconciliation': 0.03476886,\n",
+ " 'reduce': 0.48650545,\n",
+ " 'regulation': 0.14490134,\n",
+ " 'requirements': 0.26383802,\n",
+ " 'resource': 0.48044914,\n",
+ " 'resources': 0.99925154,\n",
+ " 'sale': 0.23320372,\n",
+ " 'same': 0.04602473,\n",
+ " 'scala': 0.34763098,\n",
+ " 'scale': 1.3520039,\n",
+ " 'scaled': 0.373489,\n",
+ " 'scales': 0.23150739,\n",
+ " 'scaling': 1.3547646,\n",
+ " 'scope': 0.24351352,\n",
+ " 'sea': 0.012636473,\n",
+ " 'search': 0.5437506,\n",
+ " 'seconds': 0.21717648,\n",
+ " 'serial': 0.084758565,\n",
+ " 'server': 0.66100806,\n",
+ " 'si': 0.13631321,\n",
+ " 'sid': 0.4065147,\n",
+ " 'size': 1.4813008,\n",
+ " 'sizes': 1.1315687,\n",
+ " 'software': 0.053653706,\n",
+ " 'sort': 0.34857363,\n",
+ " 'specification': 0.47748893,\n",
+ " 'specifications': 0.54209507,\n",
+ " 'square': 0.0464906,\n",
+ " 'storage': 0.2826658,\n",
+ " 'strategy': 0.105019435,\n",
+ " 'swarm': 0.08799058,\n",
+ " 'three': 0.0456386,\n",
+ " 'tier': 1.2590698,\n",
+ " 'torre': 0.033106416,\n",
+ " 'total': 0.15115097,\n",
+ " 'trainer': 0.28730983,\n",
+ " 'training': 0.91525143,\n",
+ " 'trial': 0.40092948,\n",
+ " 'unit': 0.12670164,\n",
+ " 'up': 0.48489103,\n",
+ " 'user': 0.5006898,\n",
+ " 'users': 0.35868,\n",
+ " 'vote': 0.16288216,\n",
+ " 'voting': 0.2478986,\n",
+ " 'web': 0.44947043},\n",
+ " 'text': 'of '\n",
+ " 'cluster '\n",
+ " 'sizes. '\n",
+ " 'Each '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'determines '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'nodes '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'CPU, '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'disk '\n",
+ " 'size '\n",
+ " 'of '\n",
+ " 'each '\n",
+ " 'node. '\n",
+ " 'All '\n",
+ " 'nodes '\n",
+ " 'within '\n",
+ " 'a '\n",
+ " 'certain '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'have '\n",
+ " 'the '\n",
+ " 'same '\n",
+ " 'hardware '\n",
+ " 'specification. '\n",
+ " 'There '\n",
+ " 'is '\n",
+ " 'a '\n",
+ " 'fixed '\n",
+ " 'ratio '\n",
+ " 'between '\n",
+ " 'CPU, '\n",
+ " 'memory '\n",
+ " 'and '\n",
+ " 'disk, '\n",
+ " 'thus '\n",
+ " 'always '\n",
+ " 'scaling '\n",
+ " 'all '\n",
+ " '3 '\n",
+ " 'resources '\n",
+ " 'linearly. '\n",
+ " 'The '\n",
+ " 'existing '\n",
+ " 'cluster '\n",
+ " 'sizes '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'tier '\n",
+ " 'are '\n",
+ " 'based '\n",
+ " 'on '\n",
+ " 'node '\n",
+ " 'sizes '\n",
+ " 'starting '\n",
+ " 'from '\n",
+ " '4GB/2vCPU/100GB '\n",
+ " 'disk '\n",
+ " 'to '\n",
+ " '64GB/32vCPU/1600GB '\n",
+ " 'disk. '\n",
+ " 'Once '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'scales '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'largest '\n",
+ " 'node '\n",
+ " 'size '\n",
+ " '(64GB '\n",
+ " 'memory), '\n",
+ " 'any '\n",
+ " 'further '\n",
+ " 'scale-up '\n",
+ " 'adds '\n",
+ " 'new '\n",
+ " '64GB '\n",
+ " 'nodes, '\n",
+ " 'allowing '\n",
+ " 'a '\n",
+ " 'cluster '\n",
+ " 'to '\n",
+ " 'scale '\n",
+ " 'up '\n",
+ " 'to '\n",
+ " '32 '\n",
+ " 'nodes '\n",
+ " 'of '\n",
+ " '64GB. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'this '\n",
+ " 'is '\n",
+ " 'not '\n",
+ " 'a '\n",
+ " 'hard '\n",
+ " 'upper '\n",
+ " 'bound '\n",
+ " 'on '\n",
+ " 'the '\n",
+ " 'number '\n",
+ " 'of '\n",
+ " 'Elasticsearch '\n",
+ " 'nodes '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'can '\n",
+ " 'be '\n",
+ " 'increased '\n",
+ " 'if '\n",
+ " 'necessary. '\n",
+ " 'Every '\n",
+ " '5 '\n",
+ " 'seconds '\n",
+ " 'the '\n",
+ " 'autoscaler '\n",
+ " 'polls '\n",
+ " 'metrics '\n",
+ " 'from '\n",
+ " 'the '\n",
+ " 'master '\n",
+ " 'node, '\n",
+ " 'calculates '\n",
+ " 'the '\n",
+ " 'desirable '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'and '\n",
+ " 'if '\n",
+ " 'it '\n",
+ " 'is '\n",
+ " 'different '\n",
+ " 'from '\n",
+ " 'the '\n",
+ " 'current '\n",
+ " 'cluster '\n",
+ " 'size, '\n",
+ " 'it '\n",
+ " 'updates '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'Kubernetes '\n",
+ " 'Deployment '\n",
+ " 'accordingly. '\n",
+ " 'Note '\n",
+ " 'that '\n",
+ " 'the '\n",
+ " 'actual '\n",
+ " 'reconciliation '\n",
+ " 'of '\n",
+ " 'the '\n",
+ " 'deployment '\n",
+ " 'towards '\n",
+ " 'the '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'and '\n",
+ " 'adding '\n",
+ " 'and '\n",
+ " 'removing '\n",
+ " 'the '\n",
+ " 'Elasticsearch '\n",
+ " 'nodes '\n",
+ " 'to '\n",
+ " 'achieve '\n",
+ " 'this '\n",
+ " 'is '\n",
+ " 'done '\n",
+ " 'by '\n",
+ " 'Kubernetes. '\n",
+ " 'In '\n",
+ " 'order '\n",
+ " 'to '\n",
+ " 'avoid '\n",
+ " 'very '\n",
+ " 'short-lived '\n",
+ " 'changes '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'size, '\n",
+ " 'we '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'a '\n",
+ " '10% '\n",
+ " 'headroom '\n",
+ " 'when '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'during '\n",
+ " 'a '\n",
+ " 'scale '\n",
+ " 'down '\n",
+ " 'and '\n",
+ " 'a '\n",
+ " 'scale '\n",
+ " 'down '\n",
+ " 'takes '\n",
+ " 'effect '\n",
+ " 'only '\n",
+ " 'if '\n",
+ " 'all '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'calculations '\n",
+ " 'within '\n",
+ " 'the '\n",
+ " 'past '\n",
+ " '15 '\n",
+ " 'minute '\n",
+ " 'have '\n",
+ " 'indicated '\n",
+ " 'a '\n",
+ " 'scale-down. '\n",
+ " 'Currently, '\n",
+ " 'the '\n",
+ " 'time '\n",
+ " 'that '\n",
+ " 'it '\n",
+ " 'takes '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'increase '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'metrics '\n",
+ " 'to '\n",
+ " 'lead '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'first '\n",
+ " 'Elasticsearch '\n",
+ " 'node '\n",
+ " 'being '\n",
+ " 'added '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'ready '\n",
+ " 'to '\n",
+ " 'process '\n",
+ " 'indexing '\n",
+ " 'load '\n",
+ " 'is '\n",
+ " 'under '\n",
+ " '1 '\n",
+ " 'minute. '\n",
+ " 'Conclusion '\n",
+ " 'In '\n",
+ " 'this '\n",
+ " 'blog '\n",
+ " 'post, '\n",
+ " 'we '\n",
+ " 'explained '\n",
+ " 'how '\n",
+ " 'ingest '\n",
+ " 'autoscaling '\n",
+ " 'works '\n",
+ " 'in '\n",
+ " 'Elasticsearch, '\n",
+ " 'the '\n",
+ " 'different '\n",
+ " 'components '\n",
+ " 'involved, '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'metrics '\n",
+ " 'used '\n",
+ " 'to '\n",
+ " 'quantify '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload. '\n",
+ " 'We '\n",
+ " 'believe '\n",
+ " 'that '\n",
+ " 'such '\n",
+ " 'an '\n",
+ " 'autoscaling '\n",
+ " 'mechanism '\n",
+ " 'is '\n",
+ " 'crucial '\n",
+ " 'to '\n",
+ " 'reduce '\n",
+ " 'the '\n",
+ " 'operational '\n",
+ " 'overhead '\n",
+ " 'of '\n",
+ " 'an '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'users '\n",
+ " 'by '\n",
+ " 'automatically '\n",
+ " 'increasing '\n",
+ " 'the '\n",
+ " 'available '\n",
+ " 'resources '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'when '\n",
+ " 'necessary. '\n",
+ " 'Furthermore, '\n",
+ " 'it '\n",
+ " 'leads '\n",
+ " 'to '\n",
+ " 'cost '\n",
+ " 'reduction '\n",
+ " 'by '\n",
+ " 'scaling '\n",
+ " 'down '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'when '\n",
+ " 'the '\n",
+ " 'available '\n",
+ " 'resources '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'are '\n",
+ " 'not '\n",
+ " 'required '\n",
+ " 'anymore. '\n",
+ " 'Ready '\n",
+ " 'to '\n",
+ " 'try '\n",
+ " 'this '\n",
+ " 'out '\n",
+ " 'on '\n",
+ " 'your '\n",
+ " 'own? '\n",
+ " 'Start '\n",
+ " 'a '\n",
+ " 'free '\n",
+ " 'trial '\n",
+ " '. '\n",
+ " 'Want '\n",
+ " 'to '\n",
+ " 'get '\n",
+ " 'Elastic '\n",
+ " 'certified? '\n",
+ " 'Find '\n",
+ " 'out '\n",
+ " 'when '\n",
+ " 'the '\n",
+ " 'next '\n",
+ " 'Elasticsearch '\n",
+ " 'Engineer '\n",
+ " 'training '\n",
+ " 'is '\n",
+ " 'running! '\n",
+ " 'Pooya '\n",
+ " 'Salehi '\n",
+ " 'Henning '\n",
+ " 'Andersen '\n",
+ " 'Francisco '\n",
+ " 'Fernández '\n",
+ " 'Castaño '\n",
+ " '11 '\n",
+ " 'min '\n",
+ " 'read '\n",
+ " '29 '\n",
+ " 'July '\n",
+ " '2024 '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless '\n",
+ " 'Share '\n",
+ " 'Twitter '\n",
+ " 'Facebook '\n",
+ " 'LinkedIn '\n",
+ " 'Recommended '\n",
+ " 'Articles '\n",
+ " 'Elastic '\n",
+ " 'Cloud'},\n",
+ " {'embeddings': {'##4': 0.5609497,\n",
+ " '##down': 0.011559885,\n",
+ " '##est': 1.1421111,\n",
+ " '##hi': 0.0060656513,\n",
+ " '##ing': 0.48465544,\n",
+ " '##ler': 0.12595108,\n",
+ " '##less': 1.3963115,\n",
+ " '##lessly': 0.76121324,\n",
+ " '##ling': 1.03232,\n",
+ " '##load': 0.6918682,\n",
+ " '##oya': 0.56508857,\n",
+ " '##rch': 0.94580704,\n",
+ " '##room': 1.397477,\n",
+ " '##sca': 1.4164101,\n",
+ " '##sea': 1.4075159,\n",
+ " '10': 0.005647892,\n",
+ " '15': 1.0004816,\n",
+ " '16': 0.0726173,\n",
+ " '202': 0.79451597,\n",
+ " 'account': 0.054787852,\n",
+ " 'accounting': 0.2977837,\n",
+ " 'advantage': 0.13797385,\n",
+ " 'after': 0.04746113,\n",
+ " 'algorithm': 0.84724355,\n",
+ " 'amazon': 0.7599511,\n",
+ " 'analysis': 0.4048887,\n",
+ " 'analyze': 0.12881227,\n",
+ " 'andersen': 0.091110215,\n",
+ " 'anya': 0.031511437,\n",
+ " 'apache': 0.8387389,\n",
+ " 'architect': 0.57877886,\n",
+ " 'archive': 0.027499544,\n",
+ " 'august': 0.523268,\n",
+ " 'auto': 1.4506402,\n",
+ " 'automatic': 0.94025064,\n",
+ " 'availability': 0.348747,\n",
+ " 'available': 0.05306761,\n",
+ " 'blog': 0.8397168,\n",
+ " 'bot': 0.38508278,\n",
+ " 'bug': 0.1267487,\n",
+ " 'build': 0.776895,\n",
+ " 'building': 0.7504958,\n",
+ " 'built': 0.19563165,\n",
+ " 'calculate': 0.3598465,\n",
+ " 'calculating': 0.11605539,\n",
+ " 'calculation': 0.8540975,\n",
+ " 'calculations': 0.57275534,\n",
+ " 'capacity': 0.3109483,\n",
+ " 'cave': 0.29021654,\n",
+ " 'certification': 0.64684826,\n",
+ " 'certified': 0.26541537,\n",
+ " 'checkpoint': 0.06267695,\n",
+ " 'chess': 0.22270066,\n",
+ " 'class': 0.044449553,\n",
+ " 'client': 0.05088419,\n",
+ " 'cloud': 0.9856347,\n",
+ " 'cluster': 1.8377897,\n",
+ " 'clustered': 0.18159664,\n",
+ " 'clusters': 0.79538465,\n",
+ " 'collapse': 0.29267746,\n",
+ " 'component': 0.012821147,\n",
+ " 'components': 0.50653857,\n",
+ " 'computer': 0.22416146,\n",
+ " 'cost': 0.06086615,\n",
+ " 'crawl': 0.27863678,\n",
+ " 'data': 0.23600358,\n",
+ " 'database': 0.386357,\n",
+ " 'decrease': 0.29198787,\n",
+ " 'deployment': 0.4085412,\n",
+ " 'desired': 0.04168813,\n",
+ " 'development': 0.0050133946,\n",
+ " 'dimensions': 0.10934332,\n",
+ " 'disadvantage': 0.33458805,\n",
+ " 'domain': 0.16470446,\n",
+ " 'down': 1.343148,\n",
+ " 'downs': 0.2709486,\n",
+ " 'drop': 0.19782026,\n",
+ " 'during': 0.4177895,\n",
+ " 'effect': 0.39730436,\n",
+ " 'elastic': 1.9854976,\n",
+ " 'engineer': 0.58167315,\n",
+ " 'engineering': 0.5884908,\n",
+ " 'ensemble': 0.007619722,\n",
+ " 'facebook': 0.3225428,\n",
+ " 'fernandez': 0.42895493,\n",
+ " 'fifteen': 0.10546452,\n",
+ " 'first': 0.50220585,\n",
+ " 'forest': 0.14911638,\n",
+ " 'framework': 0.047809396,\n",
+ " 'free': 0.3561092,\n",
+ " 'global': 0.09408311,\n",
+ " 'group': 0.14574468,\n",
+ " 'handling': 0.30345336,\n",
+ " 'head': 0.117694445,\n",
+ " 'hour': 0.3250166,\n",
+ " 'hours': 0.70438623,\n",
+ " 'implement': 0.13235687,\n",
+ " 'implementation': 0.13236406,\n",
+ " 'important': 0.055658367,\n",
+ " 'improve': 0.2550515,\n",
+ " 'increase': 0.74923754,\n",
+ " 'increasing': 0.3597461,\n",
+ " 'index': 1.4273754,\n",
+ " 'indexed': 0.2932871,\n",
+ " 'ing': 1.2874681,\n",
+ " 'introduced': 0.10785041,\n",
+ " 'inventory': 0.65916276,\n",
+ " 'java': 0.88944626,\n",
+ " 'july': 0.14186577,\n",
+ " 'large': 0.06278902,\n",
+ " 'latest': 0.068817586,\n",
+ " 'learning': 0.12424224,\n",
+ " 'length': 0.030345708,\n",
+ " 'limit': 0.14073928,\n",
+ " 'load': 1.0610044,\n",
+ " 'loading': 0.39865428,\n",
+ " 'loss': 0.11432742,\n",
+ " 'machine': 0.029201662,\n",
+ " 'maintenance': 0.15768714,\n",
+ " 'management': 0.31734702,\n",
+ " 'math': 0.406777,\n",
+ " 'maximum': 0.13483465,\n",
+ " 'measure': 0.5081328,\n",
+ " 'mechanism': 0.8204686,\n",
+ " 'memory': 1.0461255,\n",
+ " 'metric': 0.9943368,\n",
+ " 'mining': 0.5402124,\n",
+ " 'minute': 0.92393905,\n",
+ " 'minutes': 0.3759728,\n",
+ " 'moment': 0.11160666,\n",
+ " 'morris': 0.060925715,\n",
+ " 'network': 0.51853234,\n",
+ " 'node': 0.99145895,\n",
+ " 'online': 0.36771652,\n",
+ " 'operation': 0.28533393,\n",
+ " 'overhead': 0.086819395,\n",
+ " 'patience': 0.11310515,\n",
+ " 'perfect': 0.12382903,\n",
+ " 'performance': 0.06312573,\n",
+ " 'process': 0.5356137,\n",
+ " 'processing': 0.55718875,\n",
+ " 'production': 0.05736718,\n",
+ " 'project': 0.14496073,\n",
+ " 'prototype': 0.31378728,\n",
+ " 'quan': 0.22408743,\n",
+ " 'ready': 0.25202373,\n",
+ " 'reduce': 0.5264253,\n",
+ " 'reduction': 0.037918843,\n",
+ " 'research': 0.0142833255,\n",
+ " 'resource': 0.09839988,\n",
+ " 'resources': 0.7532266,\n",
+ " 'rights': 0.08338795,\n",
+ " 'room': 0.84089494,\n",
+ " 'rs': 0.47752637,\n",
+ " 'scala': 0.17796026,\n",
+ " 'scale': 1.6349432,\n",
+ " 'scaled': 0.39957505,\n",
+ " 'scales': 0.24761787,\n",
+ " 'scaling': 1.3751862,\n",
+ " 'scope': 0.009172562,\n",
+ " 'search': 0.6669978,\n",
+ " 'seconds': 0.11594447,\n",
+ " 'serial': 0.21314114,\n",
+ " 'server': 1.1875997,\n",
+ " 'servers': 0.3761195,\n",
+ " 'share': 0.21588095,\n",
+ " 'shrink': 0.08177304,\n",
+ " 'si': 0.039096646,\n",
+ " 'sid': 0.26323187,\n",
+ " 'site': 0.27832702,\n",
+ " 'size': 1.2518198,\n",
+ " 'sizes': 0.68347317,\n",
+ " 'small': 0.021309003,\n",
+ " 'software': 0.21712899,\n",
+ " 'sort': 0.46309024,\n",
+ " 'step': 0.13614927,\n",
+ " 'storage': 0.33423752,\n",
+ " 'strategy': 0.2746019,\n",
+ " 'swarm': 0.18959516,\n",
+ " 'task': 0.12210263,\n",
+ " 'time': 0.3716685,\n",
+ " 'traffic': 0.0044686934,\n",
+ " 'training': 0.56078845,\n",
+ " 'trial': 0.30781624,\n",
+ " 'tutor': 0.18126883,\n",
+ " 'twitter': 0.7352328,\n",
+ " 'useful': 0.07486964,\n",
+ " 'user': 0.61840165,\n",
+ " 'users': 0.5178945,\n",
+ " 'wait': 0.12994274,\n",
+ " 'weaving': 0.09568315,\n",
+ " 'web': 0.3402482,\n",
+ " 'website': 0.17116618,\n",
+ " 'work': 0.38590312,\n",
+ " 'working': 0.040917397,\n",
+ " 'works': 0.2640411,\n",
+ " 'years': 0.057129644},\n",
+ " 'text': 'cluster '\n",
+ " 'size, '\n",
+ " 'we '\n",
+ " 'account '\n",
+ " 'for '\n",
+ " 'a '\n",
+ " '10% '\n",
+ " 'headroom '\n",
+ " 'when '\n",
+ " 'calculating '\n",
+ " 'the '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'during '\n",
+ " 'a '\n",
+ " 'scale '\n",
+ " 'down '\n",
+ " 'and '\n",
+ " 'a '\n",
+ " 'scale '\n",
+ " 'down '\n",
+ " 'takes '\n",
+ " 'effect '\n",
+ " 'only '\n",
+ " 'if '\n",
+ " 'all '\n",
+ " 'desired '\n",
+ " 'cluster '\n",
+ " 'size '\n",
+ " 'calculations '\n",
+ " 'within '\n",
+ " 'the '\n",
+ " 'past '\n",
+ " '15 '\n",
+ " 'minute '\n",
+ " 'have '\n",
+ " 'indicated '\n",
+ " 'a '\n",
+ " 'scale-down. '\n",
+ " 'Currently, '\n",
+ " 'the '\n",
+ " 'time '\n",
+ " 'that '\n",
+ " 'it '\n",
+ " 'takes '\n",
+ " 'for '\n",
+ " 'an '\n",
+ " 'increase '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'metrics '\n",
+ " 'to '\n",
+ " 'lead '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'first '\n",
+ " 'Elasticsearch '\n",
+ " 'node '\n",
+ " 'being '\n",
+ " 'added '\n",
+ " 'to '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'and '\n",
+ " 'ready '\n",
+ " 'to '\n",
+ " 'process '\n",
+ " 'indexing '\n",
+ " 'load '\n",
+ " 'is '\n",
+ " 'under '\n",
+ " '1 '\n",
+ " 'minute. '\n",
+ " 'Conclusion '\n",
+ " 'In '\n",
+ " 'this '\n",
+ " 'blog '\n",
+ " 'post, '\n",
+ " 'we '\n",
+ " 'explained '\n",
+ " 'how '\n",
+ " 'ingest '\n",
+ " 'autoscaling '\n",
+ " 'works '\n",
+ " 'in '\n",
+ " 'Elasticsearch, '\n",
+ " 'the '\n",
+ " 'different '\n",
+ " 'components '\n",
+ " 'involved, '\n",
+ " 'and '\n",
+ " 'the '\n",
+ " 'metrics '\n",
+ " 'used '\n",
+ " 'to '\n",
+ " 'quantify '\n",
+ " 'the '\n",
+ " 'resources '\n",
+ " 'needed '\n",
+ " 'to '\n",
+ " 'handle '\n",
+ " 'the '\n",
+ " 'indexing '\n",
+ " 'workload. '\n",
+ " 'We '\n",
+ " 'believe '\n",
+ " 'that '\n",
+ " 'such '\n",
+ " 'an '\n",
+ " 'autoscaling '\n",
+ " 'mechanism '\n",
+ " 'is '\n",
+ " 'crucial '\n",
+ " 'to '\n",
+ " 'reduce '\n",
+ " 'the '\n",
+ " 'operational '\n",
+ " 'overhead '\n",
+ " 'of '\n",
+ " 'an '\n",
+ " 'Elasticsearch '\n",
+ " 'cluster '\n",
+ " 'for '\n",
+ " 'the '\n",
+ " 'users '\n",
+ " 'by '\n",
+ " 'automatically '\n",
+ " 'increasing '\n",
+ " 'the '\n",
+ " 'available '\n",
+ " 'resources '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'when '\n",
+ " 'necessary. '\n",
+ " 'Furthermore, '\n",
+ " 'it '\n",
+ " 'leads '\n",
+ " 'to '\n",
+ " 'cost '\n",
+ " 'reduction '\n",
+ " 'by '\n",
+ " 'scaling '\n",
+ " 'down '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'when '\n",
+ " 'the '\n",
+ " 'available '\n",
+ " 'resources '\n",
+ " 'in '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'are '\n",
+ " 'not '\n",
+ " 'required '\n",
+ " 'anymore. '\n",
+ " 'Ready '\n",
+ " 'to '\n",
+ " 'try '\n",
+ " 'this '\n",
+ " 'out '\n",
+ " 'on '\n",
+ " 'your '\n",
+ " 'own? '\n",
+ " 'Start '\n",
+ " 'a '\n",
+ " 'free '\n",
+ " 'trial '\n",
+ " '. '\n",
+ " 'Want '\n",
+ " 'to '\n",
+ " 'get '\n",
+ " 'Elastic '\n",
+ " 'certified? '\n",
+ " 'Find '\n",
+ " 'out '\n",
+ " 'when '\n",
+ " 'the '\n",
+ " 'next '\n",
+ " 'Elasticsearch '\n",
+ " 'Engineer '\n",
+ " 'training '\n",
+ " 'is '\n",
+ " 'running! '\n",
+ " 'Pooya '\n",
+ " 'Salehi '\n",
+ " 'Henning '\n",
+ " 'Andersen '\n",
+ " 'Francisco '\n",
+ " 'Fernández '\n",
+ " 'Castaño '\n",
+ " '11 '\n",
+ " 'min '\n",
+ " 'read '\n",
+ " '29 '\n",
+ " 'July '\n",
+ " '2024 '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless '\n",
+ " 'Share '\n",
+ " 'Twitter '\n",
+ " 'Facebook '\n",
+ " 'LinkedIn '\n",
+ " 'Recommended '\n",
+ " 'Articles '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless '\n",
+ " '• '\n",
+ " '15 '\n",
+ " 'May '\n",
+ " '2024 '\n",
+ " 'Building '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless '\n",
+ " 'Explore '\n",
+ " 'the '\n",
+ " 'architectural '\n",
+ " 'decisions '\n",
+ " 'we '\n",
+ " 'made '\n",
+ " 'along '\n",
+ " 'the '\n",
+ " 'journey '\n",
+ " 'of '\n",
+ " 'building '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless. '\n",
+ " 'Jason '\n",
+ " 'Tedor '\n",
+ " 'Pooya '\n",
+ " 'Salehi '\n",
+ " 'Henning '\n",
+ " 'Andersen '\n",
+ " 'Francisco '\n",
+ " 'Fernández '\n",
+ " 'Castaño '\n",
+ " '11 '\n",
+ " 'min '\n",
+ " 'read '\n",
+ " '29 '\n",
+ " 'July '\n",
+ " '2024 '\n",
+ " 'Elastic '\n",
+ " 'Cloud '\n",
+ " 'Serverless '\n",
+ " 'Share '\n",
+ " 'Twitter '\n",
+ " 'Facebook '\n",
+ " 'LinkedIn '\n",
+ " 'Jump '\n",
+ " 'to '\n",
+ " 'Ingest '\n",
+ " 'autoscaling '\n",
+ " 'overview '\n",
+ " 'Metrics '\n",
+ " 'Ingestion '\n",
+ " 'load '\n",
+ " 'Memory '\n",
+ " 'Scaling '\n",
+ " 'the '\n",
+ " 'cluster '\n",
+ " 'Show '\n",
+ " 'more '\n",
+ " 'Sitemap '\n",
+ " 'RSS '\n",
+ " 'Feed '\n",
+ " 'Search '\n",
+ " 'Labs '\n",
+ " 'Repo '\n",
+ " 'Elastic.co '\n",
+ " '©2024. '\n",
+ " 'Elasticsearch '\n",
+ " 'B.V. '\n",
+ " 'All '\n",
+ " 'Rights '\n",
+ " 'Reserved.'}],\n",
+ " 'inference_id': 'my-elser-model',\n",
+ " 'model_settings': {'task_type': 'sparse_embedding'}}},\n",
+ " 'title': 'Elasticsearch ingest autoscaling — '\n",
+ " 'Search Labs',\n",
+ " 'url': 'https://www.elastic.co/search-labs/blog/elasticsearch-ingest-autoscaling',\n",
+ " 'url_host': 'www.elastic.co',\n",
+ " 'url_path': '/search-labs/blog/elasticsearch-ingest-autoscaling',\n",
+ " 'url_path_dir1': 'search-labs',\n",
+ " 'url_path_dir2': 'blog',\n",
+ " 'url_path_dir3': 'elasticsearch-ingest-autoscaling',\n",
+ " 'url_port': 443,\n",
+ " 'url_scheme': 'https'}}],\n",
+ " 'max_score': 1.2861483,\n",
+ " 'total': {'relation': 'eq', 'value': 228}},\n",
+ " 'timed_out': False,\n",
+ " 'took': 2}\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file