From 75e14a783e15befe6938f9ea11f0e882ebf99000 Mon Sep 17 00:00:00 2001 From: Jeff Vestal <53237856+jeffvestal@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:31:16 -0500 Subject: [PATCH] fixed ext and link --- ...The_RAG_Really_Tied_the_App_Together.ipynb | 5597 ++++++++++++++++ ..._The_RAG_Really_Tied_the_App_Togetheripynb | 5624 ----------------- 2 files changed, 5597 insertions(+), 5624 deletions(-) create mode 100644 supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Together.ipynb delete mode 100644 supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb diff --git a/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Together.ipynb b/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Together.ipynb new file mode 100644 index 00000000..daf835f2 --- /dev/null +++ b/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Together.ipynb @@ -0,0 +1,5597 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# ChatGPT and Elasticsearch: The RAG Really Tied the App Together\n", + "\n", + "\n", + "## This notebook will show you how to:\n", + " - Create an Elastics Serverless Project\n", + "- Setup an Inference API\n", + " - This will download and deploy ELSER for embedding inference\n", + "- Create an index template\n", + " - This will use `semantic_text` which will auto-chunk and embed the body of text\n", + "- Use the Elastic Open Crawler to crawl the Elastic Search/Observability/Security Labs\n", + "
\n", + "
\n", + "\n", + "## The [accompying blog](https://www.elastic.co/search-labs/blog/app/search-labs/blog/chatgpt-elasticsearch-rag-enhancements) takes it further by showing you how to:\n", + "- Use Playground to test chat prompts and configurations\n", + " - Then generate queries for our RAG app\n", + "- Use the queries from Playground to finish out a RAG Chatbot app\n", + " - Python FastAPI backend with React frontend" + ], + "metadata": { + "id": "_ebYbHHh_0hI" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install elasticsearch" + ], + "metadata": { + "id": "_DmXlQWsGNeM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import requests\n", + "import getpass\n", + "from pprint import pprint\n", + "from elasticsearch import Elasticsearch\n", + "from elasticsearch.exceptions import ConnectionTimeout\n", + "from time import sleep\n", + "from IPython.display import clear_output" + ], + "metadata": { + "id": "cuomUVE-zYjB" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Project Setup" + ], + "metadata": { + "id": "HOOv0igTKjMS" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Enter your Cloud API Key\n", + "\n", + "Generate your secret API key at https://cloud.elastic.co/account/keys" + ], + "metadata": { + "id": "yWSg_D91x9mF" + } + }, + { + "cell_type": "code", + "source": [ + "# Prompt the user for input while masking it for security\n", + "api_key = getpass.getpass(\"Enter your API key: \")\n", + "\n", + "print(\"API key successfully entered!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bidHlfsy2OPf", + "outputId": "ba8305a6-85d7-4173-fde4-166e77c4971e" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Enter your API key: ··········\n", + "API key successfully entered!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create Elasticsearch project\n", + "[Serverless API Docs](https://www.elastic.co/docs/api/doc/elastic-cloud-serverless/operation/operation-createelasticsearchproject#operation-createelasticsearchproject-body-application-json-optimized_for)" + ], + "metadata": { + "id": "mt4_kL0b0E75" + } + }, + { + "cell_type": "code", + "source": [ + "url = \"https://api.elastic-cloud.com/api/v1/serverless/projects/elasticsearch\"\n", + "\n", + "project_data = {\n", + " \"name\": \"The RAG Really Tied the App Together\",\n", + " \"region_id\": \"aws-us-east-1\",\n", + " \"optimized_for\": \"vector\",\n", + "}\n", + "\n", + "auth_header = f\"ApiKey {api_key}\"\n", + "headers = {\"Content-Type\": \"application/json\", \"Authorization\": auth_header}\n", + "\n", + "es_project = requests.post(url, json=project_data, headers=headers)\n", + "\n", + "if 200 <= es_project.status_code < 300:\n", + " es_project_keys = es_project.json()\n", + " prg_name = es_project_keys[\"name\"]\n", + " print(f\"Project {prg_name} creation started\")\n", + "\n", + " # wait for the project to be initialized and ready\n", + " project_id = es_project.json()[\"id\"]\n", + " print(\"Checking if project is created and ready\")\n", + " loop = 1\n", + " while True:\n", + " es_project_check = requests.get(url + f\"/{project_id}/status\", headers=headers)\n", + " if es_project_check.json()[\"phase\"] == \"initialized\":\n", + " break\n", + " else:\n", + " clear_output(wait=True)\n", + " print(\n", + " f\"Waiting for project to be ready. Current status:{es_project_check.json()['phase']} - Loop {loop} Sleeping 10 seconds\"\n", + " )\n", + " sleep(10)\n", + " loop += 1\n", + "\n", + " print(\"Project is ready\")\n", + "\n", + "else:\n", + " print(es_project.text)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lVkyA7KUyDEO", + "outputId": "8dd818ba-9c41-4e90-d718-319dbfcf6062" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Waiting for project to be ready. Current status:initializing - Loop 7 Sleeping 10 seconds\n", + "Project is ready\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create elasticsearch client" + ], + "metadata": { + "id": "Uh0JpsnONMhv" + } + }, + { + "cell_type": "code", + "source": [ + "es = Elasticsearch(\n", + " es_project_keys[\"endpoints\"][\"elasticsearch\"],\n", + " basic_auth=(\n", + " es_project_keys[\"credentials\"][\"username\"],\n", + " es_project_keys[\"credentials\"][\"password\"],\n", + " ),\n", + ")" + ], + "metadata": { + "id": "KG01YrIwMdHz" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Project API Key\n", + "Create a [Project level API key](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html)" + ], + "metadata": { + "id": "Xhu2U-YszbDe" + } + }, + { + "cell_type": "code", + "source": [ + "project_key_response = es.security.create_api_key(\n", + " name=\"full_access_key\",\n", + " metadata={\"description\": \"API key for full access\"},\n", + " expiration=\"14d\",\n", + ")\n", + "\n", + "project_api_key = project_key_response[\"encoded\"]\n", + "print(f\"{project_key_response['name']} has been created\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Puj1UWIKVtSv", + "outputId": "0f5d6937-1204-4f35-9f12-d9eb517cf675" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "full_access_key has been created\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Inference API and Index Setup" + ], + "metadata": { + "id": "C-J2hDsVWIut" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Inference API\n", + "This will:\n", + "- Create an inference API endpoint\n", + "- Download ELSER model (if not already downloaded)\n", + "- Deploy ELSER model with `service_settings` configs\n", + "\n", + "Note - This will wait for ELSER to be downloaded and deployed" + ], + "metadata": { + "id": "AjGmk-jwXi_4" + } + }, + { + "cell_type": "code", + "source": [ + "model_config = {\n", + " \"service\": \"elser\",\n", + " \"service_settings\": {\"num_allocations\": 8, \"num_threads\": 1},\n", + "}\n", + "\n", + "inference_id = \"my-elser-model\"\n", + "\n", + "try:\n", + " create_endpoint = es.inference.put_model(\n", + " inference_id=inference_id, task_type=\"sparse_embedding\", body=model_config\n", + " )\n", + "\n", + "except ConnectionTimeout:\n", + " print(\n", + " \"Connection timed out. This can happen while waiting for the Inference model to fully deploy and start.\"\n", + " )\n", + "finally:\n", + " print(\"Waiting for inference model to be fully deployed\")\n", + " inf_info = es.inference.get_model(inference_id=inference_id)\n", + " model_id = inf_info.body[\"endpoints\"][0][\"service_settings\"][\"model_id\"]\n", + "\n", + " while True:\n", + " try:\n", + " model_stats = es.ml.get_trained_models_stats(model_id=model_id)\n", + " routing_state = model_stats.body[\"trained_model_stats\"][0][\n", + " \"deployment_stats\"\n", + " ][\"nodes\"][0][\"routing_state\"][\"routing_state\"]\n", + "\n", + " if routing_state == \"started\":\n", + " print(\"Inference API created and Inference model is fully deployed.\")\n", + " break\n", + " else:\n", + " clear_output(wait=True)\n", + " print(\"Waiting for inference model to be fully deployed\")\n", + " sleep(5)\n", + " except (IndexError, KeyError): # Handle missing data in the response\n", + " clear_output(wait=True)\n", + " print(\"Still waiting for model deployment...\")\n", + " sleep(5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yblutX5J1LT1", + "outputId": "9810ce99-1d6f-413e-d4ba-89ed6b4391e4" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Waiting for inference model to be fully deployed\n", + "Inference API created and Inference model is fully deployed.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create index template\n", + "The two key fields here are:\n", + "- body\n", + " - the field with the body of text and we use that as the source to copy to our semantic text field `semantic_body`\n", + "- semantic_body\n", + " - This field will automatically handle chunking and generating embeddings" + ], + "metadata": { + "id": "hixAZWcxkBkZ" + } + }, + { + "cell_type": "code", + "source": [ + "template_body = {\n", + " \"index_patterns\": [\"elastic-labs*\"],\n", + " \"template\": {\n", + " \"mappings\": {\n", + " \"properties\": {\n", + " \"body\": {\"type\": \"text\", \"copy_to\": \"semantic_body\"},\n", + " \"semantic_body\": {\n", + " \"type\": \"semantic_text\",\n", + " \"inference_id\": \"my-elser-model\",\n", + " },\n", + " \"headings\": {\"type\": \"text\"},\n", + " \"id\": {\"type\": \"keyword\"},\n", + " \"meta_description\": {\"type\": \"text\"},\n", + " \"title\": {\"type\": \"text\"},\n", + " }\n", + " }\n", + " },\n", + "}\n", + "\n", + "template_resp = es.indices.put_index_template(name=\"labs_template\", body=template_body)\n", + "\n", + "print(template_resp.body)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EBEyEVg1kDh2", + "outputId": "3355ee8d-30c5-4675-f039-77675cae1ba3" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'acknowledged': True}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Crawl the docs" + ], + "metadata": { + "id": "hm65dUPTBTpb" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Open Crawler\n", + "This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab\n", + "\n", + "The [blog details the steps](https://www.elastic.co/search-labs/blog/app/search-labs/blog/rag-ties-the-room-together#crawl-all-the-labs) below running on a Macbook\n", + "\n", + "You can also review the [Open Crawler setup](https://github.com/elastic/crawler?tab=readme-ov-file#setup)." + ], + "metadata": { + "id": "EJ5D8bh3BWX5" + } + }, + { + "cell_type": "markdown", + "source": [ + "## High level steps to configure and run crawler\n", + "*This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab*\n", + "\n", + "- Clone the repo\n", + " - `git clone git@github.com:elastic/crawler.git`\n", + "- Build the Open Crawler Docker container\n", + " - `docker build -t crawler-image . && docker run -i -d --name crawler crawler-image`\n", + "- Create a new config file\n", + " - `vi config/elastic-labs.yml`\n", + " - run the _generate config_ cell below then paste the output in the config file and save.\n", + "- Copy the new local config into the container\n", + " - `docker cp config/elastic-labs.yml crawler:/app/config/elastic-labs.yml`\n", + "- Run the crawler\n", + " - `docker exec -it crawler bin/crawler crawl config/elastic-labs.yml`" + ], + "metadata": { + "id": "WMjjJTXR_hhD" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Generate Config\n", + "Run the below cell to generate the yml config file" + ], + "metadata": { + "id": "2ZB6L76Y8thR" + } + }, + { + "cell_type": "code", + "source": [ + "config = f\"\"\"\n", + "domains:\n", + " - url: https://www.elastic.co\n", + " seed_urls:\n", + " - https://www.elastic.co/search-labs\n", + " - https://www.elastic.co/observability-labs\n", + " - https://www.elastic.co/security-labs\n", + " crawl_rules:\n", + " - policy: allow\n", + " type: begins\n", + " pattern: /search-labs\n", + " - policy: allow\n", + " type: begins\n", + " pattern: /observability-labs\n", + " - policy: allow\n", + " type: begins\n", + " pattern: /security-labs\n", + " - policy:deny\n", + " type: regex\n", + " pattern: .*/author/.*\n", + " - policy: deny\n", + " type: regex\n", + " pattern: .*\n", + "\n", + "output_sink: elasticsearch\n", + "output_index: elastic-labs\n", + "max_crawl_depth: 25\n", + "\n", + "elasticsearch:\n", + " host: \"{es_project.json()['endpoints']['elasticsearch']}\"\n", + " port: \"443\"\n", + " api_key: \"{project_api_key}\"\n", + " bulk_api.max_items: 10\n", + "\"\"\"\n", + "\n", + "print(config)" + ], + "metadata": { + "id": "2XDfDkdM85lN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Confirm the docs have been crawled" + ], + "metadata": { + "id": "v7wxtXHABiC8" + } + }, + { + "cell_type": "markdown", + "source": [ + "First look at the count of docs for each Labs' site" + ], + "metadata": { + "id": "a6mItejyBovz" + } + }, + { + "cell_type": "code", + "source": [ + "query = {\n", + " \"size\": 0,\n", + " \"aggs\": {\"url_path_dir1\": {\"terms\": {\"field\": \"url_path_dir1.keyword\"}}},\n", + "}\n", + "\n", + "response = es.search(index=\"elastic-labs\", body=query)\n", + "pprint(response.body)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_OZxWsx_BVy_", + "outputId": "e7333fa5-37b9-46c7-96f1-622d98e52521" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},\n", + " 'aggregations': {'url_path_dir1': {'buckets': [{'doc_count': 216,\n", + " 'key': 'search-labs'},\n", + " {'doc_count': 214,\n", + " 'key': 'security-labs'},\n", + " {'doc_count': 158,\n", + " 'key': 'observability-labs'}],\n", + " 'doc_count_error_upper_bound': 0,\n", + " 'sum_other_doc_count': 0}},\n", + " 'hits': {'hits': [],\n", + " 'max_score': None,\n", + " 'total': {'relation': 'eq', 'value': 588}},\n", + " 'timed_out': False,\n", + " 'took': 6}\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Next review a sample doc" + ], + "metadata": { + "id": "UnTStG_TCpp1" + } + }, + { + "cell_type": "code", + "source": [ + "query = {\"size\": 1, \"query\": {\"match\": {\"url_path_dir2\": \"blog\"}}}\n", + "\n", + "response = es.search(index=\"elastic-labs\", body=query)\n", + "pprint(response.body)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-1vWFv7cCuFu", + "outputId": "eaa4aa08-13a1-459a-dae0-9c8c1f0a69fc" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'exposes '\n", + " 'a '\n", + " 'list '\n", + " 'of '\n", + " 'ingestion '\n", + " 'load '\n", + " 'values, '\n", + " 'one '\n", + " 'for '\n", + " 'each '\n", + " 'indexing '\n", + " 'node. '\n", + " 'Note '\n", + " 'that '\n", + " 'as '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pools '\n", + " '(which '\n", + " 'handle '\n", + " 'indexing '\n", + " 'requests) '\n", + " 'are '\n", + " 'sized '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'CPU '\n", + " 'cores '\n", + " 'on '\n", + " 'the '\n", + " 'node, '\n", + " 'this '\n", + " 'essentially '\n", + " 'determines '\n", + " 'the '\n", + " 'total '\n", + " 'number '\n", + " 'of '\n", + " 'cores '\n", + " 'that '\n", + " 'is '\n", + " 'needed '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'on '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'consists '\n", + " 'of '\n", + " 'two '\n", + " 'components: '\n", + " 'Thread '\n", + " 'pool '\n", + " 'utilization: '\n", + " 'the '\n", + " 'average '\n", + " 'number '\n", + " 'of '\n", + " 'threads '\n", + " 'in '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pool '\n", + " 'processing '\n", + " 'indexing '\n", + " 'requests '\n", + " 'during '\n", + " 'that '\n", + " 'sampling '\n", + " 'period. '\n", + " 'Queued '\n", + " 'ingestion '\n", + " 'load: '\n", + " 'the '\n", + " 'estimated '\n", + " 'number '\n", + " 'of '\n", + " 'threads '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'queued '\n", + " 'write '\n", + " 'requests. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'is '\n", + " 'calculated '\n", + " 'as '\n", + " 'the '\n", + " 'sum '\n", + " 'of '\n", + " 'these '\n", + " 'two '\n", + " 'values '\n", + " 'for '\n", + " 'all '\n", + " 'the '\n", + " 'three '\n", + " 'write '\n", + " 'thread '\n", + " 'pools '\n", + " '. '\n", + " 'The '\n", + " 'total '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'is '\n", + " 'the '\n", + " 'sum '\n", + " 'of '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'the '\n", + " 'individual '\n", + " 'nodes. '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 't '\n", + " 'h'},\n", + " {'embeddings': {'##est': 1.3433179,\n", + " '##estinal': 0.5916747,\n", + " '##ical': 0.21335103,\n", + " '##ing': 0.66160166,\n", + " '##ion': 1.223692,\n", + " '##l': 0.06755174,\n", + " '##ler': 0.34178317,\n", + " '##line': 0.6707441,\n", + " '##ling': 1.0343578,\n", + " '##load': 0.9880499,\n", + " '##mat': 0.01314945,\n", + " '##rch': 1.3459072,\n", + " '##s': 0.25005433,\n", + " '##sca': 1.6867673,\n", + " '##scu': 0.028700678,\n", + " '##sea': 1.6748068,\n", + " '_': 0.28835136,\n", + " 'access': 0.116686985,\n", + " 'accounting': 0.15865436,\n", + " 'algorithm': 1.0487378,\n", + " 'algorithms': 0.2763102,\n", + " 'allocation': 0.1481772,\n", + " 'amazon': 0.9099395,\n", + " 'among': 0.04313716,\n", + " 'anal': 0.025087006,\n", + " 'analysis': 0.64178395,\n", + " 'analyze': 0.18673302,\n", + " 'and': 0.19101046,\n", + " 'apache': 0.6617465,\n", + " 'api': 1.4468017,\n", + " 'approximate': 0.026616694,\n", + " 'are': 0.19081613,\n", + " 'arithmetic': 0.12217364,\n", + " 'ass': 0.12156314,\n", + " 'auto': 1.4633765,\n", + " 'automatic': 0.73048806,\n", + " 'availability': 0.20461462,\n", + " 'average': 0.58710635,\n", + " 'bot': 0.12357169,\n", + " 'buffer': 0.14556783,\n", + " 'calculate': 0.02387442,\n", + " 'calculated': 0.2452304,\n", + " 'calculation': 0.81089926,\n", + " 'called': 0.2972479,\n", + " 'capacity': 0.60224617,\n", + " 'catalog': 0.078262925,\n", + " 'category': 0.21683785,\n", + " 'checkpoint': 0.012995078,\n", + " 'chess': 0.41694775,\n", + " 'chip': 0.10178017,\n", + " 'class': 0.5914888,\n", + " 'classification': 0.17686933,\n", + " 'cluster': 1.4369037,\n", + " 'clusters': 0.21254443,\n", + " 'comply': 0.131236,\n", + " 'component': 0.37191656,\n", + " 'components': 0.87235415,\n", + " 'computation': 0.47024545,\n", + " 'compute': 0.14372817,\n", + " 'computer': 0.397558,\n", + " 'constant': 0.09540719,\n", + " 'consumption': 0.123454005,\n", + " 'cope': 0.7024604,\n", + " 'core': 0.62535626,\n", + " 'cores': 1.0230916,\n", + " 'cpu': 0.874175,\n", + " 'crawl': 0.23010625,\n", + " 'current': 0.5516459,\n", + " 'data': 0.25792596,\n", + " 'database': 0.4601695,\n", + " 'determine': 0.3844099,\n", + " 'determined': 0.41348428,\n", + " 'diagram': 0.025166756,\n", + " 'dimensions': 0.07042265,\n", + " 'disk': 0.07931721,\n", + " 'each': 0.22229394,\n", + " 'elastic': 1.8257822,\n", + " 'enter': 0.058845505,\n", + " 'equation': 0.43812877,\n", + " 'es': 0.8055687,\n", + " 'estimate': 0.03608101,\n", + " 'estimated': 0.46266982,\n", + " 'execution': 0.05638616,\n", + " 'factors': 0.12973839,\n", + " 'forest': 0.3904727,\n", + " 'formula': 0.016075172,\n", + " 'framework': 0.34186286,\n", + " 'g': 0.08017753,\n", + " 'gage': 0.30852094,\n", + " 'gene': 0.27250904,\n", + " 'handle': 0.9037246,\n", + " 'handling': 0.69093794,\n", + " 'implement': 0.053764082,\n", + " 'index': 1.3896008,\n", + " 'indexed': 0.25086805,\n", + " 'ing': 1.5002296,\n", + " 'integration': 0.20222682,\n", + " 'interface': 0.25386703,\n", + " 'inventory': 0.5645011,\n", + " 'is': 0.05772473,\n", + " 'java': 1.2391971,\n", + " 'l': 0.048691455,\n", + " 'lake': 0.24773102,\n", + " 'lane': 0.25919613,\n", + " 'lang': 0.039321195,\n", + " 'learning': 0.033810128,\n", + " 'library': 0.14143226,\n", + " 'list': 0.10985089,\n", + " 'lists': 0.12752165,\n", + " 'load': 1.7350225,\n", + " 'loaded': 0.057171866,\n", + " 'loading': 0.75305617,\n", + " 'loads': 0.12072936,\n", + " 'log': 0.06388949,\n", + " 'machine': 0.47294563,\n", + " 'mass': 0.092697844,\n", + " 'math': 0.7472431,\n", + " 'matrix': 0.045127213,\n", + " 'maximum': 0.094020285,\n", + " 'measure': 0.32414404,\n", + " 'memories': 0.03024405,\n", + " 'memory': 1.2586498,\n", + " 'method': 0.016832462,\n", + " 'metric': 1.1439759,\n", + " 'mining': 0.40203753,\n", + " 'mp': 0.09331862,\n", + " 'multi': 0.031247457,\n", + " 'multiple': 0.38688186,\n", + " 'n': 0.33228758,\n", + " 'need': 0.19645856,\n", + " 'network': 0.42359397,\n", + " 'new': 0.041632555,\n", + " 'node': 1.3807943,\n", + " 'nodes': 0.63807905,\n", + " 'number': 0.4450389,\n", + " 'o': 0.50335085,\n", + " 'operation': 0.008523868,\n", + " 'order': 0.08601924,\n", + " 'pattern': 0.11067777,\n", + " 'percent': 0.13746342,\n", + " 'performance': 0.41614294,\n", + " 'period': 0.49507552,\n", + " 'pool': 1.3188534,\n", + " 'poole': 0.3433027,\n", + " 'pools': 1.2800426,\n", + " 'predict': 0.23377013,\n", + " 'processing': 1.0733001,\n", + " 'processor': 0.10840816,\n", + " 'pure': 0.11351536,\n", + " 'quantity': 0.109573685,\n", + " 'queue': 1.1129105,\n", + " 'ram': 0.14691876,\n", + " 'rank': 0.36504152,\n", + " 'ratio': 0.011385939,\n", + " 'read': 0.13304754,\n", + " 'represent': 0.42444453,\n", + " 'representation': 0.058323957,\n", + " 'request': 0.755568,\n", + " 'requests': 0.7039498,\n", + " 'routing': 0.060857404,\n", + " 'sample': 0.62170815,\n", + " 'sampling': 0.8610632,\n", + " 'scala': 0.25192302,\n", + " 'scale': 0.5968038,\n", + " 'sea': 0.20613533,\n", + " 'search': 0.4318061,\n", + " 'semi': 0.33687106,\n", + " 'sequence': 0.23863083,\n", + " 'serial': 0.15801017,\n", + " 'server': 0.16233677,\n", + " 'si': 0.2002626,\n", + " 'sid': 0.44975162,\n", + " 'size': 0.8577202,\n", + " 'sized': 0.21010487,\n", + " 'sizes': 0.4059122,\n", + " 'small': 0.09116832,\n", + " 'software': 0.09232291,\n", + " 'sort': 0.35720947,\n", + " 'sorting': 0.06234357,\n", + " 'spectrum': 0.07792632,\n", + " 'sql': 0.116530605,\n", + " 'statistical': 0.0852167,\n", + " 'statistics': 0.22820702,\n", + " 'stomach': 0.018201118,\n", + " 'sum': 0.89766365,\n", + " 'swarm': 0.20437151,\n", + " 'table': 0.007837142,\n", + " 'task': 0.37974054,\n", + " 'taste': 0.053832427,\n", + " 'taylor': 0.10206632,\n", + " 'thread': 1.5052487,\n", + " 'threads': 1.2515007,\n", + " 'three': 0.27322263,\n", + " 'total': 0.64918166,\n", + " 'tree': 0.098200426,\n", + " 'unit': 0.15584692,\n", + " 'used': 0.56170344,\n", + " 'useful': 0.34977943,\n", + " 'utilization': 1.0091052,\n", + " 'value': 0.7453479,\n", + " 'values': 0.63835937,\n", + " 'vector': 0.3917736,\n", + " 'weaving': 0.11804886,\n", + " 'web': 0.46383187,\n", + " 'work': 0.29207155,\n", + " 'write': 1.1660185,\n", + " 'writing': 0.25973478,\n", + " 'z': 0.3776876},\n", + " 'text': 'that '\n", + " 'are '\n", + " 'used '\n", + " 'for '\n", + " 'ingest '\n", + " 'autoscaling '\n", + " 'in '\n", + " 'Elasticsearch '\n", + " 'are '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'memory. '\n", + " 'Ingestion '\n", + " 'load '\n", + " 'Ingestion '\n", + " 'load '\n", + " 'represents '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'threads '\n", + " 'that '\n", + " 'is '\n", + " 'needed '\n", + " 'to '\n", + " 'cope '\n", + " 'with '\n", + " 'the '\n", + " 'current '\n", + " 'indexing '\n", + " 'load. '\n", + " 'The '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'exposes '\n", + " 'a '\n", + " 'list '\n", + " 'of '\n", + " 'ingestion '\n", + " 'load '\n", + " 'values, '\n", + " 'one '\n", + " 'for '\n", + " 'each '\n", + " 'indexing '\n", + " 'node. '\n", + " 'Note '\n", + " 'that '\n", + " 'as '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pools '\n", + " '(which '\n", + " 'handle '\n", + " 'indexing '\n", + " 'requests) '\n", + " 'are '\n", + " 'sized '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'CPU '\n", + " 'cores '\n", + " 'on '\n", + " 'the '\n", + " 'node, '\n", + " 'this '\n", + " 'essentially '\n", + " 'determines '\n", + " 'the '\n", + " 'total '\n", + " 'number '\n", + " 'of '\n", + " 'cores '\n", + " 'that '\n", + " 'is '\n", + " 'needed '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'on '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'consists '\n", + " 'of '\n", + " 'two '\n", + " 'components: '\n", + " 'Thread '\n", + " 'pool '\n", + " 'utilization: '\n", + " 'the '\n", + " 'average '\n", + " 'number '\n", + " 'of '\n", + " 'threads '\n", + " 'in '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pool '\n", + " 'processing '\n", + " 'indexing '\n", + " 'requests '\n", + " 'during '\n", + " 'that '\n", + " 'sampling '\n", + " 'period. '\n", + " 'Queued '\n", + " 'ingestion '\n", + " 'load: '\n", + " 'the '\n", + " 'estimated '\n", + " 'number '\n", + " 'of '\n", + " 'threads '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'queued '\n", + " 'write '\n", + " 'requests. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'is '\n", + " 'calculated '\n", + " 'as '\n", + " 'the '\n", + " 'sum '\n", + " 'of '\n", + " 'these '\n", + " 'two '\n", + " 'values '\n", + " 'for '\n", + " 'all '\n", + " 'the '\n", + " 'three '\n", + " 'write '\n", + " 'thread '\n", + " 'pools '\n", + " '. '\n", + " 'The '\n", + " 'total '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'is '\n", + " 'the '\n", + " 'sum '\n", + " 'of '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'of '\n", + " 'the '\n", + " 'individual '\n", + " 'nodes. '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 't '\n", + " 'h '\n", + " 'r '\n", + " 'e '\n", + " 'a '\n", + " 'd '\n", + " '_ '\n", + " 'p '\n", + " 'o '\n", + " 'o '\n", + " 'l '\n", + " '_ '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'l '\n", + " 'i '\n", + " 'z '\n", + " 'a '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '+ '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 't '\n", + " 'o '\n", + " 't '\n", + " 'a '\n", + " 'l '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " '\\\\small '\n", + " 'node\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\sum(thread\\\\_pool\\\\_utilization '\n", + " '+ '\n", + " 'queued\\\\_ingestion\\\\_load) '\n", + " '\\\\newline '\n", + " 'total\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\sum(node\\\\_ingestion\\\\_load) '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 't '\n", + " 'h '\n", + " 're '\n", + " 'a '\n", + " 'd '\n", + " '_ '\n", + " 'p '\n", + " 'oo '\n", + " 'l '\n", + " '_ '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'l '\n", + " 'i '\n", + " 'z '\n", + " 'a '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '+ '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 't '\n", + " 'o '\n", + " 't '\n", + " 'a '\n", + " 'l '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 'Figure '\n", + " '2 '\n", + " ': '\n", + " 'ingestion'},\n", + " {'embeddings': {'##able': 0.5624876,\n", + " '##ba': 0.10684605,\n", + " '##d': 0.12233314,\n", + " '##est': 0.84587747,\n", + " '##ima': 0.2508807,\n", + " '##ing': 0.57414246,\n", + " '##ion': 1.1121849,\n", + " '##line': 1.1430916,\n", + " '##ma': 1.1706055,\n", + " '##w': 1.3673741,\n", + " '##ws': 0.33763555,\n", + " '10': 0.51392806,\n", + " '200': 0.73087466,\n", + " '30': 0.45019,\n", + " '60': 1.3045075,\n", + " '[UNK]': 0.2956499,\n", + " '_': 0.33742356,\n", + " 'acceptable': 0.29635867,\n", + " 'access': 0.23300913,\n", + " 'accounting': 0.1906402,\n", + " 'achieve': 0.19722655,\n", + " 'algorithm': 1.1037958,\n", + " 'algorithms': 0.26360378,\n", + " 'allocation': 0.53156596,\n", + " 'analysis': 0.41347402,\n", + " 'apache': 0.54295164,\n", + " 'api': 0.21713388,\n", + " 'approximate': 0.51163644,\n", + " 'arithmetic': 0.005784557,\n", + " 'availability': 0.4917338,\n", + " 'average': 0.8478212,\n", + " 'batch': 0.08666975,\n", + " 'blocking': 0.02501016,\n", + " 'bot': 0.06050198,\n", + " 'buffer': 0.40386045,\n", + " 'bug': 0.055751722,\n", + " 'busy': 1.3026394,\n", + " 'calculate': 0.26999432,\n", + " 'calculation': 0.74316484,\n", + " 'capacity': 0.6725085,\n", + " 'chess': 0.25134456,\n", + " 'class': 0.328252,\n", + " 'client': 0.23896244,\n", + " 'clock': 1.125488,\n", + " 'cluster': 0.5103067,\n", + " 'component': 0.2536751,\n", + " 'components': 0.78435194,\n", + " 'computation': 0.62016183,\n", + " 'compute': 0.06482519,\n", + " 'computer': 0.32330835,\n", + " 'concurrency': 0.011380989,\n", + " 'configuration': 0.6887391,\n", + " 'configured': 0.26263618,\n", + " 'constant': 0.29082793,\n", + " 'consumption': 0.16989039,\n", + " 'cpu': 0.3717718,\n", + " 'database': 0.13461274,\n", + " 'e': 0.7789312,\n", + " 'effect': 0.09419204,\n", + " 'effort': 0.055172946,\n", + " 'employee': 0.3274528,\n", + " 'employees': 0.14320064,\n", + " 'ensemble': 0.19942468,\n", + " 'equation': 0.3787911,\n", + " 'equivalent': 0.050270963,\n", + " 'error': 0.12898737,\n", + " 'es': 0.043630168,\n", + " 'est': 0.20599021,\n", + " 'estimate': 1.0792123,\n", + " 'estimated': 0.39457676,\n", + " 'estimates': 0.465428,\n", + " 'estimation': 0.080784135,\n", + " 'every': 0.16873945,\n", + " 'excess': 1.0022457,\n", + " 'excessive': 0.451759,\n", + " 'execute': 0.59175754,\n", + " 'executing': 0.091966435,\n", + " 'execution': 1.3065349,\n", + " 'existing': 0.6437884,\n", + " 'exponential': 1.1467187,\n", + " 'extra': 0.26056916,\n", + " 'figure': 0.019528389,\n", + " 'finish': 0.012790194,\n", + " 'finished': 0.21236378,\n", + " 'flow': 0.10995065,\n", + " 'g': 0.43504617,\n", + " 'gage': 0.4229588,\n", + " 'group': 0.43960038,\n", + " 'guild': 0.014967873,\n", + " 'handle': 0.80899215,\n", + " 'handling': 0.7681083,\n", + " 'heap': 0.3867438,\n", + " 'hours': 0.7462872,\n", + " 'http': 0.20072725,\n", + " 'implement': 0.16245411,\n", + " 'implementation': 0.2408709,\n", + " 'improve': 0.10136651,\n", + " 'index': 1.2976965,\n", + " 'indexed': 0.10614389,\n", + " 'ing': 1.2063053,\n", + " 'inventory': 0.25356865,\n", + " 'java': 1.2153534,\n", + " 'l': 0.48968774,\n", + " 'lake': 0.27167574,\n", + " 'lane': 0.54473066,\n", + " 'length': 0.64622724,\n", + " 'library': 0.08392323,\n", + " 'line': 0.5581907,\n", + " 'load': 1.5088638,\n", + " 'loading': 0.5335804,\n", + " 'machine': 0.3173762,\n", + " 'manage': 0.5220977,\n", + " 'managed': 0.45824686,\n", + " 'management': 0.3230387,\n", + " 'mass': 0.15742503,\n", + " 'math': 0.81244004,\n", + " 'maximum': 0.34374076,\n", + " 'measure': 0.25600985,\n", + " 'memory': 0.5085309,\n", + " 'mining': 0.4451848,\n", + " 'minute': 0.39483455,\n", + " 'minutes': 0.22895378,\n", + " 'moving': 0.76410496,\n", + " 'mp': 0.046217,\n", + " 'multiple': 0.10666605,\n", + " 'n': 0.5416694,\n", + " 'network': 0.3097243,\n", + " 'new': 0.49582836,\n", + " 'node': 1.1907045,\n", + " 'number': 0.47905272,\n", + " 'o': 0.47123736,\n", + " 'operation': 0.19577809,\n", + " 'optimal': 0.1733028,\n", + " 'par': 0.09612937,\n", + " 'percent': 0.1152151,\n", + " 'performance': 0.74001515,\n", + " 'pool': 1.7006081,\n", + " 'poole': 0.36192703,\n", + " 'pools': 1.0764378,\n", + " 'predict': 0.38117534,\n", + " 'probe': 0.2430691,\n", + " 'process': 0.12230635,\n", + " 'processing': 0.47061718,\n", + " 'proportion': 0.2145018,\n", + " 'proportional': 1.1204233,\n", + " 'proposal': 0.1401456,\n", + " 'q': 0.3259466,\n", + " 'queue': 1.580318,\n", + " 'r': 0.14266703,\n", + " 'rank': 0.13613336,\n", + " 'rate': 0.39469108,\n", + " 'request': 1.1001134,\n", + " 'requests': 0.63539153,\n", + " 'resolution': 0.055606272,\n", + " 'resource': 0.21417612,\n", + " 'resources': 0.7937882,\n", + " 'routing': 0.14261606,\n", + " 'sample': 1.0720835,\n", + " 'sampled': 1.0306277,\n", + " 'samples': 1.2079935,\n", + " 'sampling': 0.6740413,\n", + " 'scala': 0.07395835,\n", + " 'script': 0.10171158,\n", + " 'second': 0.18827602,\n", + " 'seconds': 0.817573,\n", + " 'sequence': 0.49634397,\n", + " 'serial': 0.033651996,\n", + " 'server': 0.32002103,\n", + " 'share': 0.27626935,\n", + " 'sid': 0.27850676,\n", + " 'size': 0.11843514,\n", + " 'small': 0.75451213,\n", + " 'speed': 0.30091006,\n", + " 'sql': 0.31397846,\n", + " 'statistical': 0.0100006005,\n", + " 'strategy': 0.08963276,\n", + " 'stream': 0.028335843,\n", + " 'sum': 1.1407199,\n", + " 'surplus': 0.15598625,\n", + " 'swarm': 0.054142684,\n", + " 'task': 1.2177191,\n", + " 'tasks': 1.0780356,\n", + " 'taylor': 0.24217507,\n", + " 'technique': 0.0030198945,\n", + " 'thread': 1.7842301,\n", + " 'threads': 0.9916815,\n", + " 'time': 0.9839317,\n", + " 'timer': 0.19039534,\n", + " 'times': 0.5299459,\n", + " 'total': 0.40682667,\n", + " 'traffic': 0.28910428,\n", + " 'universe': 0.013594781,\n", + " 'usage': 0.5520448,\n", + " 'utilization': 1.6104044,\n", + " 'value': 0.6036144,\n", + " 'values': 0.33944046,\n", + " 'w': 0.4972394,\n", + " 'wait': 0.005872378,\n", + " 'wall': 1.1351137,\n", + " 'weaving': 0.13777943,\n", + " 'web': 0.2821159,\n", + " 'weighted': 1.1533256,\n", + " 'worker': 1.0417976,\n", + " 'workers': 1.2245823,\n", + " 'z': 0.29032487},\n", + " 'text': 'r '\n", + " 'e '\n", + " 'a '\n", + " 'd '\n", + " '_ '\n", + " 'p '\n", + " 'o '\n", + " 'o '\n", + " 'l '\n", + " '_ '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'l '\n", + " 'i '\n", + " 'z '\n", + " 'a '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '+ '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 't '\n", + " 'o '\n", + " 't '\n", + " 'a '\n", + " 'l '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " '\\\\small '\n", + " 'node\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\sum(thread\\\\_pool\\\\_utilization '\n", + " '+ '\n", + " 'queued\\\\_ingestion\\\\_load) '\n", + " '\\\\newline '\n", + " 'total\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\sum(node\\\\_ingestion\\\\_load) '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 't '\n", + " 'h '\n", + " 're '\n", + " 'a '\n", + " 'd '\n", + " '_ '\n", + " 'p '\n", + " 'oo '\n", + " 'l '\n", + " '_ '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'l '\n", + " 'i '\n", + " 'z '\n", + " 'a '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '+ '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 't '\n", + " 'o '\n", + " 't '\n", + " 'a '\n", + " 'l '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '∑ '\n", + " '( '\n", + " 'n '\n", + " 'o '\n", + " 'd '\n", + " 'e '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " ') '\n", + " 'Figure '\n", + " '2 '\n", + " ': '\n", + " 'ingestion '\n", + " 'load '\n", + " 'components '\n", + " 'The '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'is '\n", + " 'an '\n", + " 'exponentially '\n", + " 'weighted '\n", + " 'moving '\n", + " 'average '\n", + " '(EWMA) '\n", + " 'of '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'busy '\n", + " 'threads '\n", + " 'in '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'sampled '\n", + " 'every '\n", + " 'second. '\n", + " 'The '\n", + " 'EWMA '\n", + " 'of '\n", + " 'the '\n", + " 'sampled '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'values '\n", + " 'is '\n", + " 'configured '\n", + " 'such '\n", + " 'that '\n", + " 'the '\n", + " 'sampled '\n", + " 'values '\n", + " 'of '\n", + " 'the '\n", + " 'past '\n", + " '10 '\n", + " 'seconds '\n", + " 'have '\n", + " 'the '\n", + " 'most '\n", + " 'effect '\n", + " 'on '\n", + " 'the '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'component '\n", + " 'of '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'samples '\n", + " 'older '\n", + " 'than '\n", + " '60 '\n", + " 'seconds '\n", + " 'have '\n", + " 'very '\n", + " 'negligible '\n", + " 'impact. '\n", + " 'To '\n", + " 'estimate '\n", + " 'the '\n", + " 'resources '\n", + " 'required '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'queued '\n", + " 'indexing '\n", + " 'requests '\n", + " 'in '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'we '\n", + " 'need '\n", + " 'to '\n", + " 'have '\n", + " 'an '\n", + " 'estimate '\n", + " 'for '\n", + " 'how '\n", + " 'long '\n", + " 'each '\n", + " 'queued '\n", + " 'task '\n", + " 'can '\n", + " 'take '\n", + " 'to '\n", + " 'execute. '\n", + " 'To '\n", + " 'achieve '\n", + " 'this, '\n", + " 'each '\n", + " 'thread '\n", + " 'pool '\n", + " 'also '\n", + " 'provides '\n", + " 'an '\n", + " 'EWMA '\n", + " 'of '\n", + " 'the '\n", + " 'request '\n", + " 'execution '\n", + " 'time. '\n", + " 'The '\n", + " 'request '\n", + " 'execution '\n", + " 'time '\n", + " 'for '\n", + " 'an '\n", + " 'indexing '\n", + " 'request '\n", + " 'is '\n", + " 'the '\n", + " '(wall-clock) '\n", + " 'time '\n", + " 'taken '\n", + " 'for '\n", + " 'the '\n", + " 'request '\n", + " 'to '\n", + " 'finish '\n", + " 'once '\n", + " 'it '\n", + " 'is '\n", + " 'out '\n", + " 'of '\n", + " 'the '\n", + " 'queue '\n", + " 'and '\n", + " 'a '\n", + " 'worker '\n", + " 'thread '\n", + " 'starts '\n", + " 'executing '\n", + " 'it. '\n", + " 'As '\n", + " 'some '\n", + " 'queueing '\n", + " 'is '\n", + " 'acceptable '\n", + " 'and '\n", + " 'should '\n", + " 'be '\n", + " 'manageable '\n", + " 'by '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'we '\n", + " 'try '\n", + " 'to '\n", + " 'estimate '\n", + " 'the '\n", + " 'resources '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'excess '\n", + " 'queueing. '\n", + " 'We '\n", + " 'consider '\n", + " 'up '\n", + " 'to '\n", + " '30s '\n", + " 'worth '\n", + " 'of '\n", + " 'tasks '\n", + " 'in '\n", + " 'the '\n", + " 'queue '\n", + " 'manageable '\n", + " 'by '\n", + " 'the '\n", + " 'existing '\n", + " 'number '\n", + " 'of '\n", + " 'workers '\n", + " 'and '\n", + " 'account '\n", + " 'for '\n", + " 'an '\n", + " 'extra '\n", + " 'thread '\n", + " 'proportional '\n", + " 'to '\n", + " 'this '\n", + " 'value. '\n", + " 'For '\n", + " 'example, '\n", + " 'if '\n", + " 'the '\n", + " 'average '\n", + " 'task '\n", + " 'execution '\n", + " 'time '\n", + " 'is '\n", + " '200ms, '\n", + " 'we '\n", + " 'estimate '\n", + " 'that'},\n", + " {'embeddings': {'##d': 0.06352329,\n", + " '##est': 0.89852107,\n", + " '##estinal': 0.13183321,\n", + " '##ima': 0.40056115,\n", + " '##ing': 0.61320734,\n", + " '##ion': 0.72260284,\n", + " '##ling': 0.8949169,\n", + " '##load': 0.57369965,\n", + " '##m': 0.23721623,\n", + " '##ma': 1.4438714,\n", + " '##mas': 0.24820994,\n", + " '##mat': 0.24343531,\n", + " '##sca': 0.92204034,\n", + " '##w': 1.6598973,\n", + " '##ws': 0.6782139,\n", + " '10': 0.7749067,\n", + " '150': 1.2471286,\n", + " '200': 0.58304185,\n", + " '30': 1.076181,\n", + " '60': 1.1588365,\n", + " '_': 0.17651597,\n", + " 'acceptable': 0.0395143,\n", + " 'access': 0.05357292,\n", + " 'accounting': 0.22549874,\n", + " 'achieve': 0.040418815,\n", + " 'algorithm': 0.9928478,\n", + " 'algorithms': 0.08838318,\n", + " 'allocation': 0.7647576,\n", + " 'analysis': 0.428812,\n", + " 'apache': 0.5859765,\n", + " 'api': 0.016843364,\n", + " 'approximate': 0.21684457,\n", + " 'arithmetic': 0.053462975,\n", + " 'array': 0.066098064,\n", + " 'auto': 0.53497416,\n", + " 'automatic': 0.20355695,\n", + " 'availability': 0.6690054,\n", + " 'average': 1.0341543,\n", + " 'blocking': 0.1431715,\n", + " 'buffer': 0.46087772,\n", + " 'bug': 0.23163809,\n", + " 'busy': 1.3082193,\n", + " 'calculate': 0.2015065,\n", + " 'calculation': 0.71491575,\n", + " 'capacity': 0.8027149,\n", + " 'checkpoint': 0.10162155,\n", + " 'chess': 0.26765594,\n", + " 'class': 0.5377411,\n", + " 'client': 0.028412435,\n", + " 'clock': 0.81897706,\n", + " 'cluster': 0.6336233,\n", + " 'component': 1.2550238,\n", + " 'components': 1.4753778,\n", + " 'computation': 0.5360401,\n", + " 'compute': 0.09496682,\n", + " 'computer': 0.48583803,\n", + " 'computers': 0.082595915,\n", + " 'computing': 0.0053236387,\n", + " 'concept': 0.09244595,\n", + " 'concurrency': 0.080570355,\n", + " 'configuration': 0.63552403,\n", + " 'configured': 0.49945095,\n", + " 'constant': 0.15874276,\n", + " 'consumption': 0.3705247,\n", + " 'count': 0.15291668,\n", + " 'cpu': 0.4727478,\n", + " 'data': 0.5534523,\n", + " 'database': 0.24513115,\n", + " 'definition': 0.25252765,\n", + " 'dew': 0.027248075,\n", + " 'disadvantage': 0.043538865,\n", + " 'disk': 1.0258542,\n", + " 'during': 0.024176076,\n", + " 'e': 1.3067937,\n", + " 'each': 0.01788934,\n", + " 'ec': 0.5695534,\n", + " 'ee': 0.08090695,\n", + " 'effect': 0.33151782,\n", + " 'employee': 0.14918438,\n", + " 'employees': 0.026578736,\n", + " 'equation': 0.42684066,\n", + " 'es': 0.18498634,\n", + " 'est': 0.098570675,\n", + " 'estimate': 0.83097947,\n", + " 'estimated': 0.19130428,\n", + " 'estimates': 0.04933924,\n", + " 'every': 0.384432,\n", + " 'excess': 0.44124436,\n", + " 'execute': 0.56965685,\n", + " 'execution': 1.092663,\n", + " 'exponential': 1.2772857,\n", + " 'extra': 0.3341091,\n", + " 'finish': 0.47172138,\n", + " 'finished': 0.5516902,\n", + " 'flow': 0.1065439,\n", + " 'fra': 0.5131407,\n", + " 'gage': 0.41627494,\n", + " 'group': 0.40121686,\n", + " 'handle': 0.76723486,\n", + " 'handling': 0.8265911,\n", + " 'hardware': 0.007931168,\n", + " 'heap': 0.055197764,\n", + " 'hours': 0.5783272,\n", + " 'http': 0.16334121,\n", + " 'implement': 0.20851848,\n", + " 'improve': 0.033503063,\n", + " 'index': 1.351592,\n", + " 'indexed': 1.2516088,\n", + " 'ing': 1.2539797,\n", + " 'inventory': 0.26884475,\n", + " 'io': 0.49151403,\n", + " 'is': 0.67021686,\n", + " 'items': 0.30828458,\n", + " 'java': 1.233984,\n", + " 'lake': 0.37700737,\n", + " 'lane': 0.35798323,\n", + " 'lang': 0.11334816,\n", + " 'length': 0.39039937,\n", + " 'library': 0.0020271246,\n", + " 'load': 1.839116,\n", + " 'loading': 0.52925104,\n", + " 'log': 0.026120221,\n", + " 'ma': 0.37466413,\n", + " 'machine': 0.41295668,\n", + " 'managed': 0.016499385,\n", + " 'management': 0.24261811,\n", + " 'many': 0.0001822544,\n", + " 'map': 0.16712263,\n", + " 'mat': 0.08338378,\n", + " 'math': 0.69625205,\n", + " 'maximum': 0.34880605,\n", + " 'mb': 0.37918818,\n", + " 'measure': 0.14309268,\n", + " 'memory': 0.58699423,\n", + " 'metric': 0.113157846,\n", + " 'mill': 0.087879546,\n", + " 'minimum': 0.042228475,\n", + " 'mining': 0.31173173,\n", + " 'minute': 0.2855463,\n", + " 'minutes': 0.037687548,\n", + " 'mm': 0.04705554,\n", + " 'move': 0.24638273,\n", + " 'moving': 1.068798,\n", + " 'mp': 0.339956,\n", + " 'mt': 0.18115476,\n", + " 'multi': 0.045562405,\n", + " 'multiple': 0.2256053,\n", + " 'n': 0.20722932,\n", + " 'network': 0.2870649,\n", + " 'node': 0.74391615,\n", + " 'nodes': 0.40956134,\n", + " 'number': 0.5414315,\n", + " 'object': 0.36274558,\n", + " 'old': 0.026420968,\n", + " 'older': 0.14505674,\n", + " 'operation': 0.137978,\n", + " 'optimal': 0.03703803,\n", + " 'par': 0.0058114612,\n", + " 'parts': 0.011510156,\n", + " 'past': 0.25731233,\n", + " 'percent': 0.35817072,\n", + " 'performance': 0.801656,\n", + " 'pool': 1.8708751,\n", + " 'poole': 0.2727913,\n", + " 'pools': 1.2964886,\n", + " 'population': 0.11810607,\n", + " 'predict': 0.18177378,\n", + " 'probe': 0.21369988,\n", + " 'processing': 0.4105097,\n", + " 'proportional': 0.6098035,\n", + " 'q': 0.13568267,\n", + " 'queue': 1.2824515,\n", + " 'rank': 0.40675223,\n", + " 'rate': 0.46714726,\n", + " 'request': 0.949167,\n", + " 'requests': 0.6644938,\n", + " 'requirements': 0.3288823,\n", + " 'resource': 0.4609863,\n", + " 'resources': 0.9455237,\n", + " 'routing': 0.18650433,\n", + " 'sample': 1.0472832,\n", + " 'sampled': 0.8309003,\n", + " 'samples': 1.1415888,\n", + " 'sampling': 0.45636305,\n", + " 'scala': 0.12271185,\n", + " 'scale': 0.3144392,\n", + " 'second': 0.49777645,\n", + " 'seconds': 0.7695267,\n", + " 'sequence': 0.21608938,\n", + " 'serial': 0.049026124,\n", + " 'server': 0.37191278,\n", + " 'share': 0.19251333,\n", + " 'si': 0.020900367,\n", + " 'sid': 0.41317028,\n", + " 'size': 0.7470095,\n", + " 'sizes': 0.060290556,\n", + " 'small': 0.015217632,\n", + " 'speed': 0.21846266,\n", + " 'sql': 0.39542097,\n", + " 'stack': 0.047259662,\n", + " 'start': 0.15702806,\n", + " 'statistical': 0.031916108,\n", + " 'statistics': 0.08593676,\n", + " 'storage': 0.034532573,\n", + " 'store': 0.053150244,\n", + " 'survey': 0.1747176,\n", + " 'system': 0.08567025,\n", + " 'table': 0.006464522,\n", + " 'task': 1.1504556,\n", + " 'tasks': 0.7951614,\n", + " 'taylor': 0.14394312,\n", + " 'term': 0.63525033,\n", + " 'thirty': 0.26077473,\n", + " 'thread': 2.0543768,\n", + " 'threads': 1.1089593,\n", + " 'tier': 1.207179,\n", + " 'time': 0.68932414,\n", + " 'timer': 0.14907645,\n", + " 'times': 0.32087305,\n", + " 'total': 0.22359692,\n", + " 'traffic': 0.26179498,\n", + " 'trial': 0.2198535,\n", + " 'u': 0.064360306,\n", + " 'unit': 0.13278264,\n", + " 'usage': 0.6241088,\n", + " 'utilization': 1.6971744,\n", + " 'value': 0.66488856,\n", + " 'values': 0.2064584,\n", + " 'w': 0.81893605,\n", + " 'wait': 0.103130125,\n", + " 'wall': 1.0635448,\n", + " 'weaving': 0.07162173,\n", + " 'web': 0.23646998,\n", + " 'weight': 0.030211551,\n", + " 'weighted': 1.2184887,\n", + " 'work': 0.23164386,\n", + " 'worker': 0.7420831,\n", + " 'workers': 1.0619413,\n", + " 'ze': 0.40276462},\n", + " 'text': 'load '\n", + " 'components '\n", + " 'The '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'is '\n", + " 'an '\n", + " 'exponentially '\n", + " 'weighted '\n", + " 'moving '\n", + " 'average '\n", + " '(EWMA) '\n", + " 'of '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'busy '\n", + " 'threads '\n", + " 'in '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'sampled '\n", + " 'every '\n", + " 'second. '\n", + " 'The '\n", + " 'EWMA '\n", + " 'of '\n", + " 'the '\n", + " 'sampled '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'values '\n", + " 'is '\n", + " 'configured '\n", + " 'such '\n", + " 'that '\n", + " 'the '\n", + " 'sampled '\n", + " 'values '\n", + " 'of '\n", + " 'the '\n", + " 'past '\n", + " '10 '\n", + " 'seconds '\n", + " 'have '\n", + " 'the '\n", + " 'most '\n", + " 'effect '\n", + " 'on '\n", + " 'the '\n", + " 'thread '\n", + " 'pool '\n", + " 'utilization '\n", + " 'component '\n", + " 'of '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'samples '\n", + " 'older '\n", + " 'than '\n", + " '60 '\n", + " 'seconds '\n", + " 'have '\n", + " 'very '\n", + " 'negligible '\n", + " 'impact. '\n", + " 'To '\n", + " 'estimate '\n", + " 'the '\n", + " 'resources '\n", + " 'required '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'queued '\n", + " 'indexing '\n", + " 'requests '\n", + " 'in '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'we '\n", + " 'need '\n", + " 'to '\n", + " 'have '\n", + " 'an '\n", + " 'estimate '\n", + " 'for '\n", + " 'how '\n", + " 'long '\n", + " 'each '\n", + " 'queued '\n", + " 'task '\n", + " 'can '\n", + " 'take '\n", + " 'to '\n", + " 'execute. '\n", + " 'To '\n", + " 'achieve '\n", + " 'this, '\n", + " 'each '\n", + " 'thread '\n", + " 'pool '\n", + " 'also '\n", + " 'provides '\n", + " 'an '\n", + " 'EWMA '\n", + " 'of '\n", + " 'the '\n", + " 'request '\n", + " 'execution '\n", + " 'time. '\n", + " 'The '\n", + " 'request '\n", + " 'execution '\n", + " 'time '\n", + " 'for '\n", + " 'an '\n", + " 'indexing '\n", + " 'request '\n", + " 'is '\n", + " 'the '\n", + " '(wall-clock) '\n", + " 'time '\n", + " 'taken '\n", + " 'for '\n", + " 'the '\n", + " 'request '\n", + " 'to '\n", + " 'finish '\n", + " 'once '\n", + " 'it '\n", + " 'is '\n", + " 'out '\n", + " 'of '\n", + " 'the '\n", + " 'queue '\n", + " 'and '\n", + " 'a '\n", + " 'worker '\n", + " 'thread '\n", + " 'starts '\n", + " 'executing '\n", + " 'it. '\n", + " 'As '\n", + " 'some '\n", + " 'queueing '\n", + " 'is '\n", + " 'acceptable '\n", + " 'and '\n", + " 'should '\n", + " 'be '\n", + " 'manageable '\n", + " 'by '\n", + " 'the '\n", + " 'thread '\n", + " 'pool, '\n", + " 'we '\n", + " 'try '\n", + " 'to '\n", + " 'estimate '\n", + " 'the '\n", + " 'resources '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'excess '\n", + " 'queueing. '\n", + " 'We '\n", + " 'consider '\n", + " 'up '\n", + " 'to '\n", + " '30s '\n", + " 'worth '\n", + " 'of '\n", + " 'tasks '\n", + " 'in '\n", + " 'the '\n", + " 'queue '\n", + " 'manageable '\n", + " 'by '\n", + " 'the '\n", + " 'existing '\n", + " 'number '\n", + " 'of '\n", + " 'workers '\n", + " 'and '\n", + " 'account '\n", + " 'for '\n", + " 'an '\n", + " 'extra '\n", + " 'thread '\n", + " 'proportional '\n", + " 'to '\n", + " 'this '\n", + " 'value. '\n", + " 'For '\n", + " 'example, '\n", + " 'if '\n", + " 'the '\n", + " 'average '\n", + " 'task '\n", + " 'execution '\n", + " 'time '\n", + " 'is '\n", + " '200ms, '\n", + " 'we '\n", + " 'estimate '\n", + " 'that '\n", + " 'each '\n", + " 'thread '\n", + " 'is '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " '150 '\n", + " 'indexing '\n", + " 'requests '\n", + " 'within '\n", + " '30s, '\n", + " 'and '\n", + " 'therefore '\n", + " 'account '\n", + " 'for '\n", + " 'one '\n", + " 'extra '\n", + " 'thread '\n", + " 'for '\n", + " 'each '\n", + " '150 '\n", + " 'queued '\n", + " 'items. '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " '_ '\n", + " 's '\n", + " 'i '\n", + " 'z '\n", + " 'e '\n", + " '× '\n", + " 'a '\n", + " 'v '\n", + " 'e '\n", + " 'r '\n", + " 'a '\n", + " 'g '\n", + " 'e '\n", + " '_ '\n", + " 'r '\n", + " 'e '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " '_ '\n", + " 'e '\n", + " 'x '\n", + " 'e '\n", + " 'c '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 't '\n", + " 'i '\n", + " 'm '\n", + " 'e '\n", + " '30 '\n", + " 's '\n", + " '\\\\small '\n", + " 'queued\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\frac{queue\\\\_size '\n", + " '\\\\times '\n", + " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '30 '\n", + " 's '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " '_ '\n", + " 's '\n", + " 'i '\n", + " 'ze '\n", + " '× '\n", + " 'a '\n", + " 'v '\n", + " 'er '\n", + " 'a '\n", + " 'g '\n", + " 'e '\n", + " '_ '\n", + " 're '\n", + " 'q '\n", + " 'u '\n", + " 'es '\n", + " 't '\n", + " '_ '\n", + " 'e '\n", + " 'x '\n", + " 'ec '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 't '\n", + " 'im '\n", + " 'e '\n", + " '\\u200b '\n", + " 'Note '\n", + " 'that '\n", + " 'since '\n", + " 'the '\n", + " 'indexing '\n", + " 'nodes '\n", + " 'rely '\n", + " 'on '\n", + " 'pushing '\n", + " 'indexed '\n", + " 'data '\n", + " 'into '\n", + " 'the '\n", + " 'object '\n", + " 'store '\n", + " 'periodically, '\n", + " 'we '\n", + " 'do '\n", + " 'not '\n", + " 'need '\n", + " 'to '\n", + " 'scale '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'total '\n", + " 'size '\n", + " 'of '\n", + " 'the '\n", + " 'indexed '\n", + " 'data. '\n", + " 'However, '\n", + " 'the '\n", + " 'disk '\n", + " 'IO '\n", + " 'requirements '\n", + " 'of '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload '\n", + " 'needs '\n", + " 'to '\n", + " 'be '\n", + " 'considered '\n", + " 'for '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'decisions. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'represents'},\n", + " {'embeddings': {'##d': 0.38506436,\n", + " '##est': 0.8363302,\n", + " '##frame': 0.039107077,\n", + " '##ing': 1.0441189,\n", + " '##ion': 1.1721121,\n", + " '##ler': 1.0595164,\n", + " '##ling': 0.99718106,\n", + " '##load': 0.8622203,\n", + " '##s': 0.26257822,\n", + " '##sca': 1.4883617,\n", + " '(': 0.04112861,\n", + " '120': 0.10787471,\n", + " '150': 1.5649581,\n", + " '200': 0.78864884,\n", + " '30': 1.3745978,\n", + " '300': 0.21148267,\n", + " '50': 0.031711366,\n", + " '500': 0.8493792,\n", + " '_': 0.24777141,\n", + " 'accounting': 0.64968836,\n", + " 'additional': 0.3232339,\n", + " 'algorithm': 1.0360106,\n", + " 'algorithms': 0.20798434,\n", + " 'analysis': 0.25909927,\n", + " 'analyze': 0.18533573,\n", + " 'apache': 0.8096589,\n", + " 'api': 1.3224775,\n", + " 'approximate': 0.0154337585,\n", + " 'array': 0.23401959,\n", + " 'auto': 1.4535567,\n", + " 'automatic': 0.7868701,\n", + " 'availability': 0.21982048,\n", + " 'available': 0.030020691,\n", + " 'average': 0.098859586,\n", + " 'basic': 0.2743477,\n", + " 'blocking': 0.10501332,\n", + " 'bot': 0.07765888,\n", + " 'buffer': 0.36042303,\n", + " 'calculate': 0.21506485,\n", + " 'calculation': 0.81758976,\n", + " 'capacity': 0.58354694,\n", + " 'cassandra': 0.22208737,\n", + " 'checkpoint': 0.031537656,\n", + " 'chess': 0.6237735,\n", + " 'class': 0.439471,\n", + " 'clock': 0.54654706,\n", + " 'cluster': 1.4933486,\n", + " 'cod': 0.12783043,\n", + " 'computation': 0.39954206,\n", + " 'compute': 0.042445127,\n", + " 'computer': 0.13797997,\n", + " 'constant': 0.2067099,\n", + " 'cpu': 0.5182024,\n", + " 'crawl': 0.22104222,\n", + " 'data': 0.51176333,\n", + " 'database': 0.440294,\n", + " 'determined': 0.23795621,\n", + " 'disk': 0.5893501,\n", + " 'e': 0.05990428,\n", + " 'each': 0.46478215,\n", + " 'equation': 0.008288982,\n", + " 'er': 0.43452957,\n", + " 'es': 0.14311427,\n", + " 'estimate': 0.25439763,\n", + " 'every': 0.1305604,\n", + " 'execution': 0.7186893,\n", + " 'exposed': 0.23602542,\n", + " 'extra': 0.7385199,\n", + " 'fixed': 0.11877214,\n", + " 'forum': 0.3137529,\n", + " 'fra': 1.0726693,\n", + " 'fragment': 0.030604606,\n", + " 'g': 0.026902322,\n", + " 'gage': 0.12548852,\n", + " 'guild': 0.27722847,\n", + " 'handle': 0.8976072,\n", + " 'handling': 0.69513077,\n", + " 'heap': 0.26846212,\n", + " 'hours': 0.7121461,\n", + " 'http': 0.10318518,\n", + " 'index': 1.6740144,\n", + " 'indexed': 1.1180266,\n", + " 'indices': 0.88624585,\n", + " 'ing': 1.10228,\n", + " 'integer': 0.2208937,\n", + " 'inventory': 0.44952998,\n", + " 'io': 0.85926545,\n", + " 'item': 0.48019466,\n", + " 'items': 0.7935411,\n", + " 'java': 1.237859,\n", + " 'lane': 0.39564016,\n", + " 'length': 0.47680393,\n", + " 'limit': 0.4967848,\n", + " 'load': 1.2765044,\n", + " 'loading': 0.25379905,\n", + " 'm': 0.06343312,\n", + " 'machine': 0.19301167,\n", + " 'maintenance': 0.23043938,\n", + " 'map': 0.07359305,\n", + " 'mass': 0.08436136,\n", + " 'master': 1.1724675,\n", + " 'matching': 0.044185776,\n", + " 'math': 0.71257645,\n", + " 'max': 0.16343911,\n", + " 'maximum': 0.8216195,\n", + " 'mb': 0.74474645,\n", + " 'measure': 0.22327076,\n", + " 'memory': 1.4785702,\n", + " 'metadata': 0.8341058,\n", + " 'metric': 0.9043063,\n", + " 'minimal': 0.36312523,\n", + " 'minimum': 1.0762551,\n", + " 'mining': 0.6374103,\n", + " 'mp': 0.18194582,\n", + " 'multi': 0.19790418,\n", + " 'multiple': 0.08082614,\n", + " 'n': 0.2315838,\n", + " 'network': 0.5508067,\n", + " 'node': 1.3963627,\n", + " 'nodes': 0.73737425,\n", + " 'number': 0.082121976,\n", + " 'o': 0.11493757,\n", + " 'object': 0.5812754,\n", + " 'par': 0.023205614,\n", + " 'per': 0.23101303,\n", + " 'performance': 0.23446344,\n", + " 'pool': 0.8049336,\n", + " 'pools': 0.15594147,\n", + " 'predict': 0.024841096,\n", + " 'processing': 0.36487442,\n", + " 'pushing': 0.20726342,\n", + " 'q': 0.8291657,\n", + " 'quarterly': 0.13623458,\n", + " 'queue': 1.481917,\n", + " 'rail': 0.078313634,\n", + " 'ram': 0.28152135,\n", + " 'rank': 0.3435108,\n", + " 'ratio': 0.06241234,\n", + " 're': 0.2784615,\n", + " 'regional': 0.34884617,\n", + " 'request': 0.99899644,\n", + " 'requests': 0.99197084,\n", + " 'requirement': 0.62241584,\n", + " 'requirements': 0.674187,\n", + " 'resolution': 0.02591185,\n", + " 'routing': 0.19566713,\n", + " 'scala': 0.17918167,\n", + " 'scale': 0.15746343,\n", + " 'seconds': 0.13917202,\n", + " 'semi': 0.23686175,\n", + " 'sequence': 0.5461212,\n", + " 'ser': 0.08773902,\n", + " 'serial': 0.29184434,\n", + " 'server': 0.5091232,\n", + " 'shards': 1.1462573,\n", + " 'sid': 0.5460215,\n", + " 'size': 0.5671189,\n", + " 'small': 0.1666983,\n", + " 'sort': 0.20719269,\n", + " 'sql': 0.21473138,\n", + " 'stack': 0.042597417,\n", + " 'statistics': 0.019139726,\n", + " 'storage': 0.11576759,\n", + " 'strategy': 0.06358851,\n", + " 'swarm': 0.08892168,\n", + " 't': 0.15734711,\n", + " 'task': 0.2625412,\n", + " 'taylor': 0.059171513,\n", + " 'thirty': 0.59235644,\n", + " 'thread': 1.7254765,\n", + " 'threads': 1.1326298,\n", + " 'tier': 2.0103586,\n", + " 'time': 0.5197543,\n", + " 'times': 0.19328791,\n", + " 'total': 0.9341554,\n", + " 'trial': 1.0915743,\n", + " 'ur': 0.041876547,\n", + " 'value': 0.39162463,\n", + " 'values': 0.10083909,\n", + " 'wall': 0.93653333,\n", + " 'web': 0.1397472,\n", + " 'weeks': 0.027450949,\n", + " 'within': 0.38789856,\n", + " 'work': 0.1474287,\n", + " 'workers': 0.30503651,\n", + " 'write': 0.33134767,\n", + " 'x': 0.027046092,\n", + " 'z': 0.06591661,\n", + " 'ze': 0.69916034},\n", + " 'text': 'each '\n", + " 'thread '\n", + " 'is '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " '150 '\n", + " 'indexing '\n", + " 'requests '\n", + " 'within '\n", + " '30s, '\n", + " 'and '\n", + " 'therefore '\n", + " 'account '\n", + " 'for '\n", + " 'one '\n", + " 'extra '\n", + " 'thread '\n", + " 'for '\n", + " 'each '\n", + " '150 '\n", + " 'queued '\n", + " 'items. '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'i '\n", + " 'n '\n", + " 'g '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " '_ '\n", + " 's '\n", + " 'i '\n", + " 'z '\n", + " 'e '\n", + " '× '\n", + " 'a '\n", + " 'v '\n", + " 'e '\n", + " 'r '\n", + " 'a '\n", + " 'g '\n", + " 'e '\n", + " '_ '\n", + " 'r '\n", + " 'e '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 's '\n", + " 't '\n", + " '_ '\n", + " 'e '\n", + " 'x '\n", + " 'e '\n", + " 'c '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 't '\n", + " 'i '\n", + " 'm '\n", + " 'e '\n", + " '30 '\n", + " 's '\n", + " '\\\\small '\n", + " 'queued\\\\_ingestion\\\\_load '\n", + " '= '\n", + " '\\\\frac{queue\\\\_size '\n", + " '\\\\times '\n", + " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " 'd '\n", + " '_ '\n", + " 'in '\n", + " 'g '\n", + " 'es '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 'l '\n", + " 'o '\n", + " 'a '\n", + " 'd '\n", + " '= '\n", + " '30 '\n", + " 's '\n", + " 'q '\n", + " 'u '\n", + " 'e '\n", + " 'u '\n", + " 'e '\n", + " '_ '\n", + " 's '\n", + " 'i '\n", + " 'ze '\n", + " '× '\n", + " 'a '\n", + " 'v '\n", + " 'er '\n", + " 'a '\n", + " 'g '\n", + " 'e '\n", + " '_ '\n", + " 're '\n", + " 'q '\n", + " 'u '\n", + " 'es '\n", + " 't '\n", + " '_ '\n", + " 'e '\n", + " 'x '\n", + " 'ec '\n", + " 'u '\n", + " 't '\n", + " 'i '\n", + " 'o '\n", + " 'n '\n", + " '_ '\n", + " 't '\n", + " 'im '\n", + " 'e '\n", + " '\\u200b '\n", + " 'Note '\n", + " 'that '\n", + " 'since '\n", + " 'the '\n", + " 'indexing '\n", + " 'nodes '\n", + " 'rely '\n", + " 'on '\n", + " 'pushing '\n", + " 'indexed '\n", + " 'data '\n", + " 'into '\n", + " 'the '\n", + " 'object '\n", + " 'store '\n", + " 'periodically, '\n", + " 'we '\n", + " 'do '\n", + " 'not '\n", + " 'need '\n", + " 'to '\n", + " 'scale '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'total '\n", + " 'size '\n", + " 'of '\n", + " 'the '\n", + " 'indexed '\n", + " 'data. '\n", + " 'However, '\n", + " 'the '\n", + " 'disk '\n", + " 'IO '\n", + " 'requirements '\n", + " 'of '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload '\n", + " 'needs '\n", + " 'to '\n", + " 'be '\n", + " 'considered '\n", + " 'for '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'decisions. '\n", + " 'The '\n", + " 'ingestion '\n", + " 'load '\n", + " 'represents '\n", + " 'both '\n", + " 'CPU '\n", + " 'requirements '\n", + " 'of '\n", + " 'the '\n", + " 'indexing '\n", + " 'nodes '\n", + " 'as '\n", + " 'well '\n", + " 'as '\n", + " 'disk '\n", + " 'IO '\n", + " 'since '\n", + " 'both '\n", + " 'CPU '\n", + " 'and '\n", + " 'IO '\n", + " 'work '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pool '\n", + " 'workers '\n", + " 'and '\n", + " 'we '\n", + " 'rely '\n", + " 'on '\n", + " 'the '\n", + " 'wall '\n", + " 'clock '\n", + " 'time '\n", + " 'to '\n", + " 'estimate '\n", + " 'the '\n", + " 'required '\n", + " 'time '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'queued '\n", + " 'requests. '\n", + " 'Each '\n", + " 'indexing '\n", + " 'node '\n", + " 'calculates '\n", + " 'its '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'publishes '\n", + " 'this '\n", + " 'value '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node '\n", + " 'periodically. '\n", + " 'The '\n", + " 'master '\n", + " 'node '\n", + " 'serves '\n", + " 'the '\n", + " 'per '\n", + " 'node '\n", + " 'ingestion '\n", + " 'load '\n", + " 'values '\n", + " 'via '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'to '\n", + " 'the '\n", + " 'autoscaler. '\n", + " 'Memory '\n", + " 'The '\n", + " 'memory '\n", + " 'metrics '\n", + " 'exposed '\n", + " 'by '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'are '\n", + " 'node '\n", + " 'memory '\n", + " 'and '\n", + " 'tier '\n", + " 'memory. '\n", + " 'The '\n", + " 'node '\n", + " 'memory '\n", + " 'represents '\n", + " 'the '\n", + " 'minimum '\n", + " 'memory '\n", + " 'requirement '\n", + " 'for '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'in '\n", + " 'the '\n", + " 'cluster. '\n", + " 'The '\n", + " 'tier '\n", + " 'memory '\n", + " 'metric '\n", + " 'represents '\n", + " 'the '\n", + " 'minimum '\n", + " 'total '\n", + " 'memory '\n", + " 'that '\n", + " 'should '\n", + " 'be '\n", + " 'available '\n", + " 'in '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier. '\n", + " 'Note '\n", + " 'that '\n", + " 'these '\n", + " 'values '\n", + " 'only '\n", + " 'indicate '\n", + " 'the '\n", + " 'minimum '\n", + " 'to '\n", + " 'ensure '\n", + " 'that '\n", + " 'each '\n", + " 'node '\n", + " 'is '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'basic '\n", + " 'indexing '\n", + " 'workload '\n", + " 'and '\n", + " 'hold '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'indices '\n", + " 'metadata, '\n", + " 'while '\n", + " 'ensuring '\n", + " 'that '\n", + " 'the '\n", + " 'tier '\n", + " 'includes '\n", + " 'enough '\n", + " 'nodes '\n", + " 'to '\n", + " 'accommodate '\n", + " 'all '\n", + " 'index '\n", + " 'shards. '\n", + " 'Node '\n", + " 'memory '\n", + " 'must '\n", + " 'have '\n", + " 'a '\n", + " 'minimum '\n", + " 'of '\n", + " '500MB '\n", + " 'to '\n", + " 'be '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " 'indexing '\n", + " 'workloads '\n", + " ', '\n", + " 'as '\n", + " 'well '\n", + " 'as '\n", + " 'a '\n", + " 'fixed '\n", + " 'amount '\n", + " 'of '\n", + " 'memory '\n", + " 'per '\n", + " 'each '\n", + " 'index '\n", + " '. '\n", + " 'This '\n", + " 'ensures '\n", + " 'all '\n", + " 'nodes '\n", + " 'can '\n", + " 'hold '\n", + " 'metadata '\n", + " 'for '\n", + " 'the '\n", + " 'cluster, '\n", + " 'which '\n", + " 'includes '\n", + " 'metadata '\n", + " 'for '\n", + " 'every '\n", + " 'index. '\n", + " 'Tier '\n", + " 'memory '\n", + " 'is '\n", + " 'determined '\n", + " 'by '\n", + " 'accounting '\n", + " 'for '\n", + " 'the '\n", + " 'memory'},\n", + " {'embeddings': {'##d': 0.055720266,\n", + " '##est': 0.87620574,\n", + " '##ging': 0.12167851,\n", + " '##id': 0.007303444,\n", + " '##ing': 1.0664626,\n", + " '##ion': 0.5800176,\n", + " '##ler': 1.1925261,\n", + " '##ling': 1.0163201,\n", + " '##load': 0.81047934,\n", + " '##mb': 0.41285288,\n", + " '##rch': 0.9021695,\n", + " '##rd': 1.5396098,\n", + " '##rds': 0.47700712,\n", + " '##s': 0.033316635,\n", + " '##sca': 1.5766962,\n", + " '##sea': 1.0991455,\n", + " '500': 0.8151243,\n", + " '6': 0.5519658,\n", + " 'accounting': 0.74103206,\n", + " 'algorithm': 1.0231093,\n", + " 'algorithms': 0.065428115,\n", + " 'allocated': 0.19617477,\n", + " 'amazon': 0.31502825,\n", + " 'analysis': 0.5597703,\n", + " 'analyze': 0.30770445,\n", + " 'apache': 0.8908353,\n", + " 'api': 1.1461797,\n", + " 'approximate': 0.21645284,\n", + " 'archive': 0.013153568,\n", + " 'array': 0.047213156,\n", + " 'auto': 1.3802772,\n", + " 'automatic': 0.7499421,\n", + " 'availability': 0.10610637,\n", + " 'basic': 0.5700848,\n", + " 'blocking': 0.03154505,\n", + " 'bot': 0.2956401,\n", + " 'brain': 0.13824557,\n", + " 'brick': 0.34880513,\n", + " 'broken': 0.1587869,\n", + " 'buffer': 0.27810082,\n", + " 'bug': 0.019329984,\n", + " 'cad': 0.010832788,\n", + " 'calculate': 0.71264565,\n", + " 'calculated': 0.19991197,\n", + " 'calculation': 0.90854484,\n", + " 'capacity': 0.13310817,\n", + " 'cassandra': 0.269642,\n", + " 'checkpoint': 0.33004454,\n", + " 'chess': 0.6517597,\n", + " 'class': 0.40205157,\n", + " 'clock': 1.2123855,\n", + " 'cluster': 1.5899432,\n", + " 'clusters': 0.21755162,\n", + " 'computation': 0.3360238,\n", + " 'compute': 0.15521479,\n", + " 'computer': 0.4586727,\n", + " 'computers': 0.09730453,\n", + " 'core': 0.18051882,\n", + " 'cores': 0.54003507,\n", + " 'cpu': 1.4255431,\n", + " 'data': 0.7048903,\n", + " 'database': 0.5640705,\n", + " 'depend': 0.08640857,\n", + " 'deploy': 0.116062716,\n", + " 'deployed': 0.16281521,\n", + " 'deployment': 1.375697,\n", + " 'dev': 0.16744493,\n", + " 'disk': 1.2671278,\n", + " 'display': 0.10427013,\n", + " 'done': 0.057584852,\n", + " 'each': 0.44890955,\n", + " 'elastic': 1.3546548,\n", + " 'estimate': 1.1541563,\n", + " 'estimated': 0.4820726,\n", + " 'estimates': 0.68956727,\n", + " 'execution': 0.025004579,\n", + " 'expose': 0.3791655,\n", + " 'exposed': 1.4152902,\n", + " 'exposing': 0.2018034,\n", + " 'exposure': 0.22712028,\n", + " 'field': 0.43335024,\n", + " 'fixed': 0.3727484,\n", + " 'fragment': 0.3541149,\n", + " 'fragments': 0.19871251,\n", + " 'framework': 0.0067325183,\n", + " 'gage': 0.062432837,\n", + " 'gb': 0.23573099,\n", + " 'guild': 0.06864197,\n", + " 'handle': 0.6664566,\n", + " 'handling': 0.79544353,\n", + " 'hardware': 0.15463935,\n", + " 'hash': 0.056183893,\n", + " 'host': 0.49334934,\n", + " 'hours': 0.23847345,\n", + " 'hu': 0.12027907,\n", + " 'index': 1.84248,\n", + " 'indexed': 0.5543888,\n", + " 'indices': 0.8364849,\n", + " 'ing': 1.1731079,\n", + " 'integration': 0.43307945,\n", + " 'interface': 0.13424914,\n", + " 'inventory': 0.43660846,\n", + " 'io': 1.1710184,\n", + " 'java': 1.1948129,\n", + " 'kb': 0.275635,\n", + " 'lane': 0.065143116,\n", + " 'lang': 0.07760714,\n", + " 'length': 0.19545008,\n", + " 'limit': 0.14939034,\n", + " 'load': 1.068046,\n", + " 'loading': 0.3452746,\n", + " 'machine': 0.28579098,\n", + " 'maintenance': 0.24792214,\n", + " 'management': 0.016834572,\n", + " 'mandatory': 0.09757359,\n", + " 'map': 0.33999705,\n", + " 'mapped': 0.4253768,\n", + " 'mapping': 0.7739739,\n", + " 'master': 1.514614,\n", + " 'math': 0.62235314,\n", + " 'maximum': 0.4592383,\n", + " 'mb': 0.8386821,\n", + " 'measure': 0.35868418,\n", + " 'memory': 1.4037786,\n", + " 'metadata': 0.57345796,\n", + " 'metric': 1.0478114,\n", + " 'minimal': 0.55310273,\n", + " 'minimum': 1.1779544,\n", + " 'mining': 0.60987383,\n", + " 'monitor': 0.41601682,\n", + " 'monitoring': 0.80379987,\n", + " 'multiple': 0.0046412363,\n", + " 'need': 0.13691676,\n", + " 'needs': 0.09020152,\n", + " 'network': 0.5226748,\n", + " 'node': 1.5207812,\n", + " 'nodes': 0.9873411,\n", + " 'number': 0.08917359,\n", + " 'o': 0.47437057,\n", + " 'open': 0.9998891,\n", + " 'operation': 0.059715636,\n", + " 'parameters': 0.06929999,\n", + " 'per': 1.2698478,\n", + " 'performance': 0.27903107,\n", + " 'pool': 1.1343037,\n", + " 'pools': 0.5005684,\n", + " 'predict': 0.15172759,\n", + " 'processing': 0.34928247,\n", + " 'processor': 0.06942589,\n", + " 'provided': 0.33421612,\n", + " 'published': 0.35502988,\n", + " 'queue': 1.4328028,\n", + " 'ram': 0.07832895,\n", + " 'rank': 0.09849679,\n", + " 'regional': 0.023943441,\n", + " 'request': 0.58130133,\n", + " 'requests': 0.4985438,\n", + " 'require': 0.054292977,\n", + " 'required': 0.20457663,\n", + " 'requirement': 0.9255918,\n", + " 'requirements': 1.1021699,\n", + " 'resolution': 0.2503146,\n", + " 'resource': 0.22062841,\n", + " 'resources': 0.7977981,\n", + " 'scala': 0.046379413,\n", + " 'scale': 0.34393448,\n", + " 'scaling': 0.5871495,\n", + " 'script': 0.07091305,\n", + " 'search': 0.2748066,\n", + " 'semi': 0.19345926,\n", + " 'sequence': 0.2634719,\n", + " 'serial': 0.281783,\n", + " 'serve': 0.3122354,\n", + " 'server': 0.62030464,\n", + " 'sha': 1.412181,\n", + " 'shards': 1.2690446,\n", + " 'sid': 0.5395205,\n", + " 'size': 0.37528938,\n", + " 'software': 0.2301807,\n", + " 'sql': 0.28173122,\n", + " 'storage': 0.17134488,\n", + " 'sum': 0.48667532,\n", + " 'swarm': 0.09873215,\n", + " 'task': 0.15503421,\n", + " 'thread': 1.2720325,\n", + " 'threads': 0.5098314,\n", + " 'tier': 2.0405457,\n", + " 'time': 0.691699,\n", + " 'timer': 0.3272765,\n", + " 'total': 0.853305,\n", + " 'trial': 0.75489986,\n", + " 'value': 0.55824566,\n", + " 'values': 0.18979663,\n", + " 'wall': 1.5562296,\n", + " 'walls': 0.57668746,\n", + " 'web': 0.12833436,\n", + " 'workers': 0.30275372,\n", + " 'write': 0.8986184},\n", + " 'text': 'both '\n", + " 'CPU '\n", + " 'requirements '\n", + " 'of '\n", + " 'the '\n", + " 'indexing '\n", + " 'nodes '\n", + " 'as '\n", + " 'well '\n", + " 'as '\n", + " 'disk '\n", + " 'IO '\n", + " 'since '\n", + " 'both '\n", + " 'CPU '\n", + " 'and '\n", + " 'IO '\n", + " 'work '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'the '\n", + " 'write '\n", + " 'thread '\n", + " 'pool '\n", + " 'workers '\n", + " 'and '\n", + " 'we '\n", + " 'rely '\n", + " 'on '\n", + " 'the '\n", + " 'wall '\n", + " 'clock '\n", + " 'time '\n", + " 'to '\n", + " 'estimate '\n", + " 'the '\n", + " 'required '\n", + " 'time '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'queued '\n", + " 'requests. '\n", + " 'Each '\n", + " 'indexing '\n", + " 'node '\n", + " 'calculates '\n", + " 'its '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'publishes '\n", + " 'this '\n", + " 'value '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node '\n", + " 'periodically. '\n", + " 'The '\n", + " 'master '\n", + " 'node '\n", + " 'serves '\n", + " 'the '\n", + " 'per '\n", + " 'node '\n", + " 'ingestion '\n", + " 'load '\n", + " 'values '\n", + " 'via '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'to '\n", + " 'the '\n", + " 'autoscaler. '\n", + " 'Memory '\n", + " 'The '\n", + " 'memory '\n", + " 'metrics '\n", + " 'exposed '\n", + " 'by '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'are '\n", + " 'node '\n", + " 'memory '\n", + " 'and '\n", + " 'tier '\n", + " 'memory. '\n", + " 'The '\n", + " 'node '\n", + " 'memory '\n", + " 'represents '\n", + " 'the '\n", + " 'minimum '\n", + " 'memory '\n", + " 'requirement '\n", + " 'for '\n", + " 'each '\n", + " 'indexing '\n", + " 'node '\n", + " 'in '\n", + " 'the '\n", + " 'cluster. '\n", + " 'The '\n", + " 'tier '\n", + " 'memory '\n", + " 'metric '\n", + " 'represents '\n", + " 'the '\n", + " 'minimum '\n", + " 'total '\n", + " 'memory '\n", + " 'that '\n", + " 'should '\n", + " 'be '\n", + " 'available '\n", + " 'in '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier. '\n", + " 'Note '\n", + " 'that '\n", + " 'these '\n", + " 'values '\n", + " 'only '\n", + " 'indicate '\n", + " 'the '\n", + " 'minimum '\n", + " 'to '\n", + " 'ensure '\n", + " 'that '\n", + " 'each '\n", + " 'node '\n", + " 'is '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'basic '\n", + " 'indexing '\n", + " 'workload '\n", + " 'and '\n", + " 'hold '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'indices '\n", + " 'metadata, '\n", + " 'while '\n", + " 'ensuring '\n", + " 'that '\n", + " 'the '\n", + " 'tier '\n", + " 'includes '\n", + " 'enough '\n", + " 'nodes '\n", + " 'to '\n", + " 'accommodate '\n", + " 'all '\n", + " 'index '\n", + " 'shards. '\n", + " 'Node '\n", + " 'memory '\n", + " 'must '\n", + " 'have '\n", + " 'a '\n", + " 'minimum '\n", + " 'of '\n", + " '500MB '\n", + " 'to '\n", + " 'be '\n", + " 'able '\n", + " 'to '\n", + " 'handle '\n", + " 'indexing '\n", + " 'workloads '\n", + " ', '\n", + " 'as '\n", + " 'well '\n", + " 'as '\n", + " 'a '\n", + " 'fixed '\n", + " 'amount '\n", + " 'of '\n", + " 'memory '\n", + " 'per '\n", + " 'each '\n", + " 'index '\n", + " '. '\n", + " 'This '\n", + " 'ensures '\n", + " 'all '\n", + " 'nodes '\n", + " 'can '\n", + " 'hold '\n", + " 'metadata '\n", + " 'for '\n", + " 'the '\n", + " 'cluster, '\n", + " 'which '\n", + " 'includes '\n", + " 'metadata '\n", + " 'for '\n", + " 'every '\n", + " 'index. '\n", + " 'Tier '\n", + " 'memory '\n", + " 'is '\n", + " 'determined '\n", + " 'by '\n", + " 'accounting '\n", + " 'for '\n", + " 'the '\n", + " 'memory '\n", + " 'overhead '\n", + " 'of '\n", + " 'the '\n", + " 'field '\n", + " 'mappings '\n", + " 'of '\n", + " 'the '\n", + " 'indices '\n", + " 'and '\n", + " 'the '\n", + " 'amount '\n", + " 'of '\n", + " 'memory '\n", + " 'needed '\n", + " 'for '\n", + " 'each '\n", + " 'open '\n", + " 'shard '\n", + " 'allocated '\n", + " 'on '\n", + " 'a '\n", + " 'node '\n", + " 'in '\n", + " 'the '\n", + " 'cluster. '\n", + " 'Currently, '\n", + " 'the '\n", + " 'per-shard '\n", + " 'memory '\n", + " 'requirement '\n", + " 'uses '\n", + " 'a '\n", + " 'fixed '\n", + " 'estimate '\n", + " 'of '\n", + " '6MB. '\n", + " 'We '\n", + " 'plan '\n", + " 'to '\n", + " 'refine '\n", + " 'this '\n", + " 'value. '\n", + " 'The '\n", + " 'estimate '\n", + " 'for '\n", + " 'the '\n", + " 'memory '\n", + " 'requirements '\n", + " 'for '\n", + " 'the '\n", + " 'mappings '\n", + " 'of '\n", + " 'each '\n", + " 'index '\n", + " 'is '\n", + " 'calculated '\n", + " 'by '\n", + " 'one '\n", + " 'of '\n", + " 'the '\n", + " 'data '\n", + " 'nodes '\n", + " 'that '\n", + " 'hosts '\n", + " 'a '\n", + " 'shard '\n", + " 'of '\n", + " 'the '\n", + " 'index. '\n", + " 'The '\n", + " 'calculated '\n", + " 'estimates '\n", + " 'are '\n", + " 'sent '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node. '\n", + " 'Whenever '\n", + " 'there '\n", + " 'is '\n", + " 'a '\n", + " 'mapping '\n", + " 'change '\n", + " 'this '\n", + " 'estimate '\n", + " 'is '\n", + " 'updated '\n", + " 'and '\n", + " 'published '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node '\n", + " 'again. '\n", + " 'The '\n", + " 'master '\n", + " 'node '\n", + " 'serves '\n", + " 'the '\n", + " 'node '\n", + " 'and '\n", + " 'total '\n", + " 'memory '\n", + " 'metrics '\n", + " 'based '\n", + " 'on '\n", + " 'these '\n", + " 'information '\n", + " 'via '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'to '\n", + " 'the '\n", + " 'autoscaler. '\n", + " 'Scaling '\n", + " 'the '\n", + " 'cluster '\n", + " 'The '\n", + " 'autoscaler '\n", + " 'is '\n", + " 'responsible '\n", + " 'for '\n", + " 'monitoring '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'via '\n", + " 'the '\n", + " 'exposed '\n", + " 'metrics, '\n", + " 'calculating '\n", + " 'the '\n", + " 'desirable '\n", + " 'cluster '\n", + " 'size '\n", + " 'to '\n", + " 'adapt '\n", + " 'to '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload, '\n", + " 'and '\n", + " 'updating '\n", + " 'the '\n", + " 'deployment '\n", + " 'accordingly. '\n", + " 'This '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'calculating '\n", + " 'the '\n", + " 'total '\n", + " 'required '\n", + " 'CPU '\n", + " 'and '\n", + " 'memory '\n", + " 'resources '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'memory '\n", + " 'metrics. '\n", + " 'The '\n", + " 'sum '\n", + " 'of '\n", + " 'all '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'per '\n", + " 'node '\n", + " 'values '\n", + " 'determines '\n", + " 'the '\n", + " 'total '\n", + " 'number '\n", + " 'of '\n", + " 'CPU '\n", + " 'cores '\n", + " 'needed '\n", + " 'for '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier. '\n", + " 'The '\n", + " 'calculated '\n", + " 'CPU '\n", + " 'requirement '\n", + " 'and '\n", + " 'the '\n", + " 'provided '\n", + " 'minimum '\n", + " 'node '\n", + " 'and '\n", + " 'tier '\n", + " 'memory '\n", + " 'resources '\n", + " 'are '\n", + " 'mapped '\n", + " 'to '\n", + " 'a '\n", + " 'predetermined '\n", + " 'set'},\n", + " {'embeddings': {'##ber': 0.9460652,\n", + " '##d': 0.10023495,\n", + " '##es': 0.14341043,\n", + " '##gb': 0.6906553,\n", + " '##ine': 0.9458122,\n", + " '##ing': 0.42145026,\n", + " '##ler': 1.2356958,\n", + " '##ling': 0.63835293,\n", + " '##load': 0.2904571,\n", + " '##mb': 0.6970242,\n", + " '##net': 0.7010928,\n", + " '##pu': 1.0257086,\n", + " '##rch': 1.0700952,\n", + " '##rd': 1.6493205,\n", + " '##rds': 0.6754141,\n", + " '##rt': 0.12942569,\n", + " '##sca': 1.4853197,\n", + " '##sea': 1.4192088,\n", + " '##vc': 1.405061,\n", + " '100': 0.26849923,\n", + " '16': 0.19268984,\n", + " '160': 0.2302431,\n", + " '1600': 0.8732733,\n", + " '32': 1.2120824,\n", + " '6': 0.70548016,\n", + " '64': 1.202607,\n", + " 'algorithm': 0.937971,\n", + " 'allocated': 0.73692024,\n", + " 'allocation': 0.4625666,\n", + " 'amazon': 0.86137766,\n", + " 'analysis': 0.58160084,\n", + " 'analyze': 0.023657316,\n", + " 'apache': 0.85805637,\n", + " 'api': 0.9369967,\n", + " 'approximate': 0.15172462,\n", + " 'auto': 1.225151,\n", + " 'automatic': 0.7224918,\n", + " 'availability': 0.3053787,\n", + " 'bot': 0.33649588,\n", + " 'brick': 0.28021842,\n", + " 'buffer': 0.27807808,\n", + " 'bug': 0.12689802,\n", + " 'calculate': 0.56475216,\n", + " 'calculated': 0.2805605,\n", + " 'calculating': 0.18157567,\n", + " 'calculation': 1.0562031,\n", + " 'capacity': 0.19689727,\n", + " 'certification': 0.030283952,\n", + " 'checkpoint': 0.1251825,\n", + " 'chess': 0.38721076,\n", + " 'class': 0.044428803,\n", + " 'closed': 0.20298174,\n", + " 'cluster': 1.8217679,\n", + " 'clusters': 0.40412048,\n", + " 'computation': 0.27228907,\n", + " 'compute': 0.157462,\n", + " 'computer': 0.07424284,\n", + " 'cores': 0.28018573,\n", + " 'cpu': 0.874331,\n", + " 'criteria': 0.20424062,\n", + " 'cube': 0.078070216,\n", + " 'currently': 0.26391146,\n", + " 'data': 0.57366157,\n", + " 'database': 0.5346718,\n", + " 'deploy': 0.31853938,\n", + " 'deployed': 0.23235346,\n", + " 'deployment': 1.38996,\n", + " 'desirable': 0.25084683,\n", + " 'desired': 0.05757945,\n", + " 'determine': 0.07967118,\n", + " 'determined': 0.38774973,\n", + " 'dimensions': 0.3834306,\n", + " 'disk': 0.7686433,\n", + " 'display': 0.044948753,\n", + " 'domain': 0.05484484,\n", + " 'each': 0.026949435,\n", + " 'elastic': 1.7217911,\n", + " 'equation': 0.07899539,\n", + " 'estimate': 1.0816743,\n", + " 'estimated': 0.2908085,\n", + " 'estimates': 0.7743369,\n", + " 'existing': 0.50358754,\n", + " 'exposed': 0.91814655,\n", + " 'field': 1.4176838,\n", + " 'fields': 0.56111515,\n", + " 'fixed': 0.653671,\n", + " 'forest': 0.088545434,\n", + " 'gage': 0.23066506,\n", + " 'gb': 0.7216355,\n", + " 'hardware': 0.5457616,\n", + " 'honey': 0.13710178,\n", + " 'host': 0.32896483,\n", + " 'hu': 0.022061992,\n", + " 'implement': 0.19801763,\n", + " 'index': 1.5813339,\n", + " 'indexed': 0.33440682,\n", + " 'indicator': 0.07646061,\n", + " 'indices': 1.0497515,\n", + " 'ing': 0.44711637,\n", + " 'integration': 0.38794386,\n", + " 'inventory': 0.55072165,\n", + " 'java': 1.0091366,\n", + " 'kb': 0.31603098,\n", + " 'ku': 1.2214607,\n", + " 'largest': 0.55517995,\n", + " 'length': 0.1961873,\n", + " 'limit': 0.12602727,\n", + " 'linear': 0.13019355,\n", + " 'load': 0.7046929,\n", + " 'map': 0.6723943,\n", + " 'mapped': 0.6155787,\n", + " 'mapping': 0.95820665,\n", + " 'maps': 0.19839133,\n", + " 'master': 1.3583598,\n", + " 'math': 0.52316844,\n", + " 'maximum': 0.17016214,\n", + " 'mb': 0.8793483,\n", + " 'measure': 0.37326512,\n", + " 'memory': 1.3331418,\n", + " 'metric': 0.9261499,\n", + " 'minimum': 0.4176075,\n", + " 'mining': 0.42999497,\n", + " 'monitor': 0.34513482,\n", + " 'monitoring': 0.6307714,\n", + " 'multi': 0.3034215,\n", + " 'network': 0.67814016,\n", + " 'node': 1.2861586,\n", + " 'nodes': 0.6710798,\n", + " 'open': 1.3986069,\n", + " 'optimal': 0.0624708,\n", + " 'overhead': 0.69991654,\n", + " 'parameters': 0.11732358,\n", + " 'pattern': 0.005440311,\n", + " 'per': 1.2889819,\n", + " 'performance': 0.14103872,\n", + " 'poll': 0.52450436,\n", + " 'polling': 0.3777002,\n", + " 'polls': 0.60389787,\n", + " 'predict': 0.038165692,\n", + " 'published': 0.06970011,\n", + " 'radar': 0.004892402,\n", + " 'ram': 0.1705884,\n", + " 'rank': 0.1464829,\n", + " 'ratio': 0.6063533,\n", + " 'reconciliation': 0.4469912,\n", + " 'ref': 0.5476266,\n", + " 'requirement': 0.92776734,\n", + " 'requirements': 1.1151919,\n", + " 'resolution': 0.34558743,\n", + " 'resource': 0.21023308,\n", + " 'resources': 0.925664,\n", + " 'scale': 1.1254972,\n", + " 'scaled': 0.25958243,\n", + " 'scaling': 1.3571583,\n", + " 'scope': 0.007439173,\n", + " 'script': 0.108936414,\n", + " 'search': 0.4840181,\n", + " 'serial': 0.38776705,\n", + " 'server': 0.36229628,\n", + " 'sha': 1.6222633,\n", + " 'sid': 0.4845318,\n", + " 'since': 0.0958648,\n", + " 'size': 1.1212213,\n", + " 'sizes': 0.8831621,\n", + " 'software': 0.10655975,\n", + " 'sort': 0.23242046,\n", + " 'specification': 0.36318856,\n", + " 'specifications': 0.36570984,\n", + " 'storage': 0.16639474,\n", + " 'swarm': 0.012647891,\n", + " 'target': 0.097013876,\n", + " 'tier': 1.3347368,\n", + " 'total': 0.2700686,\n", + " 'trial': 0.48382765,\n", + " 'up': 0.009041203,\n", + " 'value': 0.5148574,\n", + " 'version': 0.00331044,\n", + " 'vote': 0.19521642,\n", + " 'voting': 0.32694972,\n", + " 'web': 0.43445045,\n", + " 'which': 0.22146864},\n", + " 'text': 'overhead '\n", + " 'of '\n", + " 'the '\n", + " 'field '\n", + " 'mappings '\n", + " 'of '\n", + " 'the '\n", + " 'indices '\n", + " 'and '\n", + " 'the '\n", + " 'amount '\n", + " 'of '\n", + " 'memory '\n", + " 'needed '\n", + " 'for '\n", + " 'each '\n", + " 'open '\n", + " 'shard '\n", + " 'allocated '\n", + " 'on '\n", + " 'a '\n", + " 'node '\n", + " 'in '\n", + " 'the '\n", + " 'cluster. '\n", + " 'Currently, '\n", + " 'the '\n", + " 'per-shard '\n", + " 'memory '\n", + " 'requirement '\n", + " 'uses '\n", + " 'a '\n", + " 'fixed '\n", + " 'estimate '\n", + " 'of '\n", + " '6MB. '\n", + " 'We '\n", + " 'plan '\n", + " 'to '\n", + " 'refine '\n", + " 'this '\n", + " 'value. '\n", + " 'The '\n", + " 'estimate '\n", + " 'for '\n", + " 'the '\n", + " 'memory '\n", + " 'requirements '\n", + " 'for '\n", + " 'the '\n", + " 'mappings '\n", + " 'of '\n", + " 'each '\n", + " 'index '\n", + " 'is '\n", + " 'calculated '\n", + " 'by '\n", + " 'one '\n", + " 'of '\n", + " 'the '\n", + " 'data '\n", + " 'nodes '\n", + " 'that '\n", + " 'hosts '\n", + " 'a '\n", + " 'shard '\n", + " 'of '\n", + " 'the '\n", + " 'index. '\n", + " 'The '\n", + " 'calculated '\n", + " 'estimates '\n", + " 'are '\n", + " 'sent '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node. '\n", + " 'Whenever '\n", + " 'there '\n", + " 'is '\n", + " 'a '\n", + " 'mapping '\n", + " 'change '\n", + " 'this '\n", + " 'estimate '\n", + " 'is '\n", + " 'updated '\n", + " 'and '\n", + " 'published '\n", + " 'to '\n", + " 'the '\n", + " 'master '\n", + " 'node '\n", + " 'again. '\n", + " 'The '\n", + " 'master '\n", + " 'node '\n", + " 'serves '\n", + " 'the '\n", + " 'node '\n", + " 'and '\n", + " 'total '\n", + " 'memory '\n", + " 'metrics '\n", + " 'based '\n", + " 'on '\n", + " 'these '\n", + " 'information '\n", + " 'via '\n", + " 'the '\n", + " 'autoscaling '\n", + " 'metrics '\n", + " 'API '\n", + " 'to '\n", + " 'the '\n", + " 'autoscaler. '\n", + " 'Scaling '\n", + " 'the '\n", + " 'cluster '\n", + " 'The '\n", + " 'autoscaler '\n", + " 'is '\n", + " 'responsible '\n", + " 'for '\n", + " 'monitoring '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'via '\n", + " 'the '\n", + " 'exposed '\n", + " 'metrics, '\n", + " 'calculating '\n", + " 'the '\n", + " 'desirable '\n", + " 'cluster '\n", + " 'size '\n", + " 'to '\n", + " 'adapt '\n", + " 'to '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload, '\n", + " 'and '\n", + " 'updating '\n", + " 'the '\n", + " 'deployment '\n", + " 'accordingly. '\n", + " 'This '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'calculating '\n", + " 'the '\n", + " 'total '\n", + " 'required '\n", + " 'CPU '\n", + " 'and '\n", + " 'memory '\n", + " 'resources '\n", + " 'based '\n", + " 'on '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'and '\n", + " 'memory '\n", + " 'metrics. '\n", + " 'The '\n", + " 'sum '\n", + " 'of '\n", + " 'all '\n", + " 'the '\n", + " 'ingestion '\n", + " 'load '\n", + " 'per '\n", + " 'node '\n", + " 'values '\n", + " 'determines '\n", + " 'the '\n", + " 'total '\n", + " 'number '\n", + " 'of '\n", + " 'CPU '\n", + " 'cores '\n", + " 'needed '\n", + " 'for '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier. '\n", + " 'The '\n", + " 'calculated '\n", + " 'CPU '\n", + " 'requirement '\n", + " 'and '\n", + " 'the '\n", + " 'provided '\n", + " 'minimum '\n", + " 'node '\n", + " 'and '\n", + " 'tier '\n", + " 'memory '\n", + " 'resources '\n", + " 'are '\n", + " 'mapped '\n", + " 'to '\n", + " 'a '\n", + " 'predetermined '\n", + " 'set '\n", + " 'of '\n", + " 'cluster '\n", + " 'sizes. '\n", + " 'Each '\n", + " 'cluster '\n", + " 'size '\n", + " 'determines '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'nodes '\n", + " 'and '\n", + " 'the '\n", + " 'CPU, '\n", + " 'memory '\n", + " 'and '\n", + " 'disk '\n", + " 'size '\n", + " 'of '\n", + " 'each '\n", + " 'node. '\n", + " 'All '\n", + " 'nodes '\n", + " 'within '\n", + " 'a '\n", + " 'certain '\n", + " 'cluster '\n", + " 'size '\n", + " 'have '\n", + " 'the '\n", + " 'same '\n", + " 'hardware '\n", + " 'specification. '\n", + " 'There '\n", + " 'is '\n", + " 'a '\n", + " 'fixed '\n", + " 'ratio '\n", + " 'between '\n", + " 'CPU, '\n", + " 'memory '\n", + " 'and '\n", + " 'disk, '\n", + " 'thus '\n", + " 'always '\n", + " 'scaling '\n", + " 'all '\n", + " '3 '\n", + " 'resources '\n", + " 'linearly. '\n", + " 'The '\n", + " 'existing '\n", + " 'cluster '\n", + " 'sizes '\n", + " 'for '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier '\n", + " 'are '\n", + " 'based '\n", + " 'on '\n", + " 'node '\n", + " 'sizes '\n", + " 'starting '\n", + " 'from '\n", + " '4GB/2vCPU/100GB '\n", + " 'disk '\n", + " 'to '\n", + " '64GB/32vCPU/1600GB '\n", + " 'disk. '\n", + " 'Once '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'scales '\n", + " 'up '\n", + " 'to '\n", + " 'the '\n", + " 'largest '\n", + " 'node '\n", + " 'size '\n", + " '(64GB '\n", + " 'memory), '\n", + " 'any '\n", + " 'further '\n", + " 'scale-up '\n", + " 'adds '\n", + " 'new '\n", + " '64GB '\n", + " 'nodes, '\n", + " 'allowing '\n", + " 'a '\n", + " 'cluster '\n", + " 'to '\n", + " 'scale '\n", + " 'up '\n", + " 'to '\n", + " '32 '\n", + " 'nodes '\n", + " 'of '\n", + " '64GB. '\n", + " 'Note '\n", + " 'that '\n", + " 'this '\n", + " 'is '\n", + " 'not '\n", + " 'a '\n", + " 'hard '\n", + " 'upper '\n", + " 'bound '\n", + " 'on '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'Elasticsearch '\n", + " 'nodes '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'can '\n", + " 'be '\n", + " 'increased '\n", + " 'if '\n", + " 'necessary. '\n", + " 'Every '\n", + " '5 '\n", + " 'seconds '\n", + " 'the '\n", + " 'autoscaler '\n", + " 'polls '\n", + " 'metrics '\n", + " 'from '\n", + " 'the '\n", + " 'master '\n", + " 'node, '\n", + " 'calculates '\n", + " 'the '\n", + " 'desirable '\n", + " 'cluster '\n", + " 'size '\n", + " 'and '\n", + " 'if '\n", + " 'it '\n", + " 'is '\n", + " 'different '\n", + " 'from '\n", + " 'the '\n", + " 'current '\n", + " 'cluster '\n", + " 'size, '\n", + " 'it '\n", + " 'updates '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'Kubernetes '\n", + " 'Deployment '\n", + " 'accordingly. '\n", + " 'Note '\n", + " 'that '\n", + " 'the '\n", + " 'actual '\n", + " 'reconciliation '\n", + " 'of '\n", + " 'the '\n", + " 'deployment '\n", + " 'towards '\n", + " 'the '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'and '\n", + " 'adding '\n", + " 'and '\n", + " 'removing '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'nodes '\n", + " 'to '\n", + " 'achieve '\n", + " 'this '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'Kubernetes. '\n", + " 'In '\n", + " 'order '\n", + " 'to '\n", + " 'avoid '\n", + " 'very '\n", + " 'short-lived '\n", + " 'changes '\n", + " 'to '\n", + " 'the'},\n", + " {'embeddings': {'##ber': 0.03804658,\n", + " '##es': 0.1512185,\n", + " '##gb': 0.6443679,\n", + " '##hi': 0.36000288,\n", + " '##ika': 0.07467539,\n", + " '##ing': 0.6129379,\n", + " '##ler': 1.1574837,\n", + " '##less': 0.5735957,\n", + " '##ling': 1.1661593,\n", + " '##load': 0.62337583,\n", + " '##net': 0.58226395,\n", + " '##oya': 1.7074469,\n", + " '##pu': 1.1345644,\n", + " '##rch': 1.0119687,\n", + " '##sca': 1.5153302,\n", + " '##sea': 1.4253823,\n", + " '##vc': 1.4631956,\n", + " '100': 0.55265766,\n", + " '15': 0.052379817,\n", + " '16': 0.33394203,\n", + " '160': 0.118766,\n", + " '1600': 0.8028694,\n", + " '32': 1.1772103,\n", + " '4': 0.16181825,\n", + " '64': 1.4588842,\n", + " 'algorithm': 0.94727564,\n", + " 'always': 0.38941032,\n", + " 'amazon': 0.89331883,\n", + " 'analysis': 0.4050502,\n", + " 'analyze': 0.023668261,\n", + " 'andersen': 0.49676144,\n", + " 'apache': 0.80054885,\n", + " 'ariel': 0.4422102,\n", + " 'auto': 1.2729144,\n", + " 'automatic': 0.7698037,\n", + " 'automatically': 0.04643825,\n", + " 'availability': 0.49544457,\n", + " 'available': 0.19981025,\n", + " 'blog': 0.50581634,\n", + " 'boat': 0.4211383,\n", + " 'bot': 0.44343898,\n", + " 'bug': 0.16439897,\n", + " 'calculate': 0.44946215,\n", + " 'calculating': 0.21078831,\n", + " 'calculation': 0.91136605,\n", + " 'calculations': 0.35172287,\n", + " 'capacity': 0.32551798,\n", + " 'certification': 0.96537966,\n", + " 'certified': 0.86568826,\n", + " 'change': 0.091490604,\n", + " 'checkpoint': 0.13703609,\n", + " 'chess': 0.30361477,\n", + " 'class': 0.12189255,\n", + " 'cloud': 0.36273655,\n", + " 'cluster': 2.1554685,\n", + " 'clusters': 0.84253734,\n", + " 'competition': 0.0070358375,\n", + " 'component': 0.16093102,\n", + " 'components': 0.688979,\n", + " 'computation': 0.0109849,\n", + " 'computer': 0.37449652,\n", + " 'computers': 0.29611063,\n", + " 'constant': 0.21192689,\n", + " 'cpu': 0.9483953,\n", + " 'crawl': 0.061979044,\n", + " 'data': 0.29847682,\n", + " 'database': 0.53361094,\n", + " 'define': 0.30592072,\n", + " 'deployment': 1.1050912,\n", + " 'desirable': 0.28776327,\n", + " 'determination': 0.25265238,\n", + " 'determine': 0.4538456,\n", + " 'determined': 0.5666302,\n", + " 'determines': 0.02666208,\n", + " 'dimensions': 0.43506965,\n", + " 'disadvantage': 0.40544793,\n", + " 'disk': 1.0043706,\n", + " 'domain': 0.08386699,\n", + " 'down': 1.1079221,\n", + " 'each': 0.20502539,\n", + " 'elastic': 2.0313072,\n", + " 'engineer': 0.41261968,\n", + " 'engineering': 0.43656224,\n", + " 'existing': 0.82118076,\n", + " 'expensive': 0.10213457,\n", + " 'factors': 0.04067958,\n", + " 'fernandez': 1.1611929,\n", + " 'fixed': 0.6458474,\n", + " 'forest': 0.07132318,\n", + " 'francisco': 1.0563725,\n", + " 'garcia': 0.13344267,\n", + " 'gb': 0.6862939,\n", + " 'global': 0.0054082987,\n", + " 'hardware': 0.7944886,\n", + " 'hen': 0.9853478,\n", + " 'honey': 0.081156164,\n", + " 'hour': 0.0074544367,\n", + " 'hours': 0.24539681,\n", + " 'hu': 0.06941744,\n", + " 'implement': 0.23772681,\n", + " 'implementation': 0.07986039,\n", + " 'improve': 0.2981144,\n", + " 'increase': 0.7570058,\n", + " 'increasing': 0.25063965,\n", + " 'index': 1.358504,\n", + " 'indexed': 0.29916498,\n", + " 'ing': 0.49232894,\n", + " 'integration': 0.20372295,\n", + " 'inventory': 0.49392712,\n", + " 'java': 0.96544707,\n", + " 'jose': 0.014233379,\n", + " 'ku': 1.0064884,\n", + " 'large': 0.009199611,\n", + " 'largest': 0.5853634,\n", + " 'latest': 0.075750045,\n", + " 'learning': 0.14278692,\n", + " 'length': 0.2575359,\n", + " 'limit': 0.27284575,\n", + " 'linear': 0.99686086,\n", + " 'load': 0.78078943,\n", + " 'loading': 0.09809506,\n", + " 'log': 0.053032227,\n", + " 'lopez': 0.37077188,\n", + " 'machine': 0.1154489,\n", + " 'maintenance': 0.24795005,\n", + " 'management': 0.28454626,\n", + " 'map': 0.12368915,\n", + " 'master': 1.0599743,\n", + " 'math': 0.39245087,\n", + " 'maximum': 0.37043598,\n", + " 'mb': 0.65867126,\n", + " 'measure': 0.401138,\n", + " 'mechanism': 0.5363481,\n", + " 'memory': 1.0781962,\n", + " 'metric': 0.9361899,\n", + " 'mining': 0.4610803,\n", + " 'minute': 0.7122368,\n", + " 'minutes': 0.03330799,\n", + " 'multiple': 0.28440112,\n", + " 'network': 0.70334154,\n", + " 'new': 0.36585885,\n", + " 'node': 1.1508181,\n", + " 'nodes': 0.6786249,\n", + " 'number': 0.46848533,\n", + " 'online': 0.10060778,\n", + " 'operation': 0.013929884,\n", + " 'optimal': 0.052087568,\n", + " 'overhead': 0.12910955,\n", + " 'performance': 0.10508823,\n", + " 'po': 0.030801829,\n", + " 'poll': 0.032789562,\n", + " 'polling': 0.08606442,\n", + " 'polls': 0.31255096,\n", + " 'predict': 0.038815167,\n", + " 'process': 0.32648584,\n", + " 'processing': 0.13010792,\n", + " 'quan': 0.30870175,\n", + " 'rank': 0.23912333,\n", + " 'ratio': 1.1149174,\n", + " 'ratios': 0.17480499,\n", + " 'ready': 0.7220055,\n", + " 'reconciliation': 0.03476886,\n", + " 'reduce': 0.48650545,\n", + " 'regulation': 0.14490134,\n", + " 'requirements': 0.26383802,\n", + " 'resource': 0.48044914,\n", + " 'resources': 0.99925154,\n", + " 'sale': 0.23320372,\n", + " 'same': 0.04602473,\n", + " 'scala': 0.34763098,\n", + " 'scale': 1.3520039,\n", + " 'scaled': 0.373489,\n", + " 'scales': 0.23150739,\n", + " 'scaling': 1.3547646,\n", + " 'scope': 0.24351352,\n", + " 'sea': 0.012636473,\n", + " 'search': 0.5437506,\n", + " 'seconds': 0.21717648,\n", + " 'serial': 0.084758565,\n", + " 'server': 0.66100806,\n", + " 'si': 0.13631321,\n", + " 'sid': 0.4065147,\n", + " 'size': 1.4813008,\n", + " 'sizes': 1.1315687,\n", + " 'software': 0.053653706,\n", + " 'sort': 0.34857363,\n", + " 'specification': 0.47748893,\n", + " 'specifications': 0.54209507,\n", + " 'square': 0.0464906,\n", + " 'storage': 0.2826658,\n", + " 'strategy': 0.105019435,\n", + " 'swarm': 0.08799058,\n", + " 'three': 0.0456386,\n", + " 'tier': 1.2590698,\n", + " 'torre': 0.033106416,\n", + " 'total': 0.15115097,\n", + " 'trainer': 0.28730983,\n", + " 'training': 0.91525143,\n", + " 'trial': 0.40092948,\n", + " 'unit': 0.12670164,\n", + " 'up': 0.48489103,\n", + " 'user': 0.5006898,\n", + " 'users': 0.35868,\n", + " 'vote': 0.16288216,\n", + " 'voting': 0.2478986,\n", + " 'web': 0.44947043},\n", + " 'text': 'of '\n", + " 'cluster '\n", + " 'sizes. '\n", + " 'Each '\n", + " 'cluster '\n", + " 'size '\n", + " 'determines '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'nodes '\n", + " 'and '\n", + " 'the '\n", + " 'CPU, '\n", + " 'memory '\n", + " 'and '\n", + " 'disk '\n", + " 'size '\n", + " 'of '\n", + " 'each '\n", + " 'node. '\n", + " 'All '\n", + " 'nodes '\n", + " 'within '\n", + " 'a '\n", + " 'certain '\n", + " 'cluster '\n", + " 'size '\n", + " 'have '\n", + " 'the '\n", + " 'same '\n", + " 'hardware '\n", + " 'specification. '\n", + " 'There '\n", + " 'is '\n", + " 'a '\n", + " 'fixed '\n", + " 'ratio '\n", + " 'between '\n", + " 'CPU, '\n", + " 'memory '\n", + " 'and '\n", + " 'disk, '\n", + " 'thus '\n", + " 'always '\n", + " 'scaling '\n", + " 'all '\n", + " '3 '\n", + " 'resources '\n", + " 'linearly. '\n", + " 'The '\n", + " 'existing '\n", + " 'cluster '\n", + " 'sizes '\n", + " 'for '\n", + " 'the '\n", + " 'indexing '\n", + " 'tier '\n", + " 'are '\n", + " 'based '\n", + " 'on '\n", + " 'node '\n", + " 'sizes '\n", + " 'starting '\n", + " 'from '\n", + " '4GB/2vCPU/100GB '\n", + " 'disk '\n", + " 'to '\n", + " '64GB/32vCPU/1600GB '\n", + " 'disk. '\n", + " 'Once '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'scales '\n", + " 'up '\n", + " 'to '\n", + " 'the '\n", + " 'largest '\n", + " 'node '\n", + " 'size '\n", + " '(64GB '\n", + " 'memory), '\n", + " 'any '\n", + " 'further '\n", + " 'scale-up '\n", + " 'adds '\n", + " 'new '\n", + " '64GB '\n", + " 'nodes, '\n", + " 'allowing '\n", + " 'a '\n", + " 'cluster '\n", + " 'to '\n", + " 'scale '\n", + " 'up '\n", + " 'to '\n", + " '32 '\n", + " 'nodes '\n", + " 'of '\n", + " '64GB. '\n", + " 'Note '\n", + " 'that '\n", + " 'this '\n", + " 'is '\n", + " 'not '\n", + " 'a '\n", + " 'hard '\n", + " 'upper '\n", + " 'bound '\n", + " 'on '\n", + " 'the '\n", + " 'number '\n", + " 'of '\n", + " 'Elasticsearch '\n", + " 'nodes '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'can '\n", + " 'be '\n", + " 'increased '\n", + " 'if '\n", + " 'necessary. '\n", + " 'Every '\n", + " '5 '\n", + " 'seconds '\n", + " 'the '\n", + " 'autoscaler '\n", + " 'polls '\n", + " 'metrics '\n", + " 'from '\n", + " 'the '\n", + " 'master '\n", + " 'node, '\n", + " 'calculates '\n", + " 'the '\n", + " 'desirable '\n", + " 'cluster '\n", + " 'size '\n", + " 'and '\n", + " 'if '\n", + " 'it '\n", + " 'is '\n", + " 'different '\n", + " 'from '\n", + " 'the '\n", + " 'current '\n", + " 'cluster '\n", + " 'size, '\n", + " 'it '\n", + " 'updates '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'Kubernetes '\n", + " 'Deployment '\n", + " 'accordingly. '\n", + " 'Note '\n", + " 'that '\n", + " 'the '\n", + " 'actual '\n", + " 'reconciliation '\n", + " 'of '\n", + " 'the '\n", + " 'deployment '\n", + " 'towards '\n", + " 'the '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'and '\n", + " 'adding '\n", + " 'and '\n", + " 'removing '\n", + " 'the '\n", + " 'Elasticsearch '\n", + " 'nodes '\n", + " 'to '\n", + " 'achieve '\n", + " 'this '\n", + " 'is '\n", + " 'done '\n", + " 'by '\n", + " 'Kubernetes. '\n", + " 'In '\n", + " 'order '\n", + " 'to '\n", + " 'avoid '\n", + " 'very '\n", + " 'short-lived '\n", + " 'changes '\n", + " 'to '\n", + " 'the '\n", + " 'cluster '\n", + " 'size, '\n", + " 'we '\n", + " 'account '\n", + " 'for '\n", + " 'a '\n", + " '10% '\n", + " 'headroom '\n", + " 'when '\n", + " 'calculating '\n", + " 'the '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'during '\n", + " 'a '\n", + " 'scale '\n", + " 'down '\n", + " 'and '\n", + " 'a '\n", + " 'scale '\n", + " 'down '\n", + " 'takes '\n", + " 'effect '\n", + " 'only '\n", + " 'if '\n", + " 'all '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'calculations '\n", + " 'within '\n", + " 'the '\n", + " 'past '\n", + " '15 '\n", + " 'minute '\n", + " 'have '\n", + " 'indicated '\n", + " 'a '\n", + " 'scale-down. '\n", + " 'Currently, '\n", + " 'the '\n", + " 'time '\n", + " 'that '\n", + " 'it '\n", + " 'takes '\n", + " 'for '\n", + " 'an '\n", + " 'increase '\n", + " 'in '\n", + " 'the '\n", + " 'metrics '\n", + " 'to '\n", + " 'lead '\n", + " 'to '\n", + " 'the '\n", + " 'first '\n", + " 'Elasticsearch '\n", + " 'node '\n", + " 'being '\n", + " 'added '\n", + " 'to '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'ready '\n", + " 'to '\n", + " 'process '\n", + " 'indexing '\n", + " 'load '\n", + " 'is '\n", + " 'under '\n", + " '1 '\n", + " 'minute. '\n", + " 'Conclusion '\n", + " 'In '\n", + " 'this '\n", + " 'blog '\n", + " 'post, '\n", + " 'we '\n", + " 'explained '\n", + " 'how '\n", + " 'ingest '\n", + " 'autoscaling '\n", + " 'works '\n", + " 'in '\n", + " 'Elasticsearch, '\n", + " 'the '\n", + " 'different '\n", + " 'components '\n", + " 'involved, '\n", + " 'and '\n", + " 'the '\n", + " 'metrics '\n", + " 'used '\n", + " 'to '\n", + " 'quantify '\n", + " 'the '\n", + " 'resources '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload. '\n", + " 'We '\n", + " 'believe '\n", + " 'that '\n", + " 'such '\n", + " 'an '\n", + " 'autoscaling '\n", + " 'mechanism '\n", + " 'is '\n", + " 'crucial '\n", + " 'to '\n", + " 'reduce '\n", + " 'the '\n", + " 'operational '\n", + " 'overhead '\n", + " 'of '\n", + " 'an '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'for '\n", + " 'the '\n", + " 'users '\n", + " 'by '\n", + " 'automatically '\n", + " 'increasing '\n", + " 'the '\n", + " 'available '\n", + " 'resources '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'when '\n", + " 'necessary. '\n", + " 'Furthermore, '\n", + " 'it '\n", + " 'leads '\n", + " 'to '\n", + " 'cost '\n", + " 'reduction '\n", + " 'by '\n", + " 'scaling '\n", + " 'down '\n", + " 'the '\n", + " 'cluster '\n", + " 'when '\n", + " 'the '\n", + " 'available '\n", + " 'resources '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'are '\n", + " 'not '\n", + " 'required '\n", + " 'anymore. '\n", + " 'Ready '\n", + " 'to '\n", + " 'try '\n", + " 'this '\n", + " 'out '\n", + " 'on '\n", + " 'your '\n", + " 'own? '\n", + " 'Start '\n", + " 'a '\n", + " 'free '\n", + " 'trial '\n", + " '. '\n", + " 'Want '\n", + " 'to '\n", + " 'get '\n", + " 'Elastic '\n", + " 'certified? '\n", + " 'Find '\n", + " 'out '\n", + " 'when '\n", + " 'the '\n", + " 'next '\n", + " 'Elasticsearch '\n", + " 'Engineer '\n", + " 'training '\n", + " 'is '\n", + " 'running! '\n", + " 'Pooya '\n", + " 'Salehi '\n", + " 'Henning '\n", + " 'Andersen '\n", + " 'Francisco '\n", + " 'Fernández '\n", + " 'Castaño '\n", + " '11 '\n", + " 'min '\n", + " 'read '\n", + " '29 '\n", + " 'July '\n", + " '2024 '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless '\n", + " 'Share '\n", + " 'Twitter '\n", + " 'Facebook '\n", + " 'LinkedIn '\n", + " 'Recommended '\n", + " 'Articles '\n", + " 'Elastic '\n", + " 'Cloud'},\n", + " {'embeddings': {'##4': 0.5609497,\n", + " '##down': 0.011559885,\n", + " '##est': 1.1421111,\n", + " '##hi': 0.0060656513,\n", + " '##ing': 0.48465544,\n", + " '##ler': 0.12595108,\n", + " '##less': 1.3963115,\n", + " '##lessly': 0.76121324,\n", + " '##ling': 1.03232,\n", + " '##load': 0.6918682,\n", + " '##oya': 0.56508857,\n", + " '##rch': 0.94580704,\n", + " '##room': 1.397477,\n", + " '##sca': 1.4164101,\n", + " '##sea': 1.4075159,\n", + " '10': 0.005647892,\n", + " '15': 1.0004816,\n", + " '16': 0.0726173,\n", + " '202': 0.79451597,\n", + " 'account': 0.054787852,\n", + " 'accounting': 0.2977837,\n", + " 'advantage': 0.13797385,\n", + " 'after': 0.04746113,\n", + " 'algorithm': 0.84724355,\n", + " 'amazon': 0.7599511,\n", + " 'analysis': 0.4048887,\n", + " 'analyze': 0.12881227,\n", + " 'andersen': 0.091110215,\n", + " 'anya': 0.031511437,\n", + " 'apache': 0.8387389,\n", + " 'architect': 0.57877886,\n", + " 'archive': 0.027499544,\n", + " 'august': 0.523268,\n", + " 'auto': 1.4506402,\n", + " 'automatic': 0.94025064,\n", + " 'availability': 0.348747,\n", + " 'available': 0.05306761,\n", + " 'blog': 0.8397168,\n", + " 'bot': 0.38508278,\n", + " 'bug': 0.1267487,\n", + " 'build': 0.776895,\n", + " 'building': 0.7504958,\n", + " 'built': 0.19563165,\n", + " 'calculate': 0.3598465,\n", + " 'calculating': 0.11605539,\n", + " 'calculation': 0.8540975,\n", + " 'calculations': 0.57275534,\n", + " 'capacity': 0.3109483,\n", + " 'cave': 0.29021654,\n", + " 'certification': 0.64684826,\n", + " 'certified': 0.26541537,\n", + " 'checkpoint': 0.06267695,\n", + " 'chess': 0.22270066,\n", + " 'class': 0.044449553,\n", + " 'client': 0.05088419,\n", + " 'cloud': 0.9856347,\n", + " 'cluster': 1.8377897,\n", + " 'clustered': 0.18159664,\n", + " 'clusters': 0.79538465,\n", + " 'collapse': 0.29267746,\n", + " 'component': 0.012821147,\n", + " 'components': 0.50653857,\n", + " 'computer': 0.22416146,\n", + " 'cost': 0.06086615,\n", + " 'crawl': 0.27863678,\n", + " 'data': 0.23600358,\n", + " 'database': 0.386357,\n", + " 'decrease': 0.29198787,\n", + " 'deployment': 0.4085412,\n", + " 'desired': 0.04168813,\n", + " 'development': 0.0050133946,\n", + " 'dimensions': 0.10934332,\n", + " 'disadvantage': 0.33458805,\n", + " 'domain': 0.16470446,\n", + " 'down': 1.343148,\n", + " 'downs': 0.2709486,\n", + " 'drop': 0.19782026,\n", + " 'during': 0.4177895,\n", + " 'effect': 0.39730436,\n", + " 'elastic': 1.9854976,\n", + " 'engineer': 0.58167315,\n", + " 'engineering': 0.5884908,\n", + " 'ensemble': 0.007619722,\n", + " 'facebook': 0.3225428,\n", + " 'fernandez': 0.42895493,\n", + " 'fifteen': 0.10546452,\n", + " 'first': 0.50220585,\n", + " 'forest': 0.14911638,\n", + " 'framework': 0.047809396,\n", + " 'free': 0.3561092,\n", + " 'global': 0.09408311,\n", + " 'group': 0.14574468,\n", + " 'handling': 0.30345336,\n", + " 'head': 0.117694445,\n", + " 'hour': 0.3250166,\n", + " 'hours': 0.70438623,\n", + " 'implement': 0.13235687,\n", + " 'implementation': 0.13236406,\n", + " 'important': 0.055658367,\n", + " 'improve': 0.2550515,\n", + " 'increase': 0.74923754,\n", + " 'increasing': 0.3597461,\n", + " 'index': 1.4273754,\n", + " 'indexed': 0.2932871,\n", + " 'ing': 1.2874681,\n", + " 'introduced': 0.10785041,\n", + " 'inventory': 0.65916276,\n", + " 'java': 0.88944626,\n", + " 'july': 0.14186577,\n", + " 'large': 0.06278902,\n", + " 'latest': 0.068817586,\n", + " 'learning': 0.12424224,\n", + " 'length': 0.030345708,\n", + " 'limit': 0.14073928,\n", + " 'load': 1.0610044,\n", + " 'loading': 0.39865428,\n", + " 'loss': 0.11432742,\n", + " 'machine': 0.029201662,\n", + " 'maintenance': 0.15768714,\n", + " 'management': 0.31734702,\n", + " 'math': 0.406777,\n", + " 'maximum': 0.13483465,\n", + " 'measure': 0.5081328,\n", + " 'mechanism': 0.8204686,\n", + " 'memory': 1.0461255,\n", + " 'metric': 0.9943368,\n", + " 'mining': 0.5402124,\n", + " 'minute': 0.92393905,\n", + " 'minutes': 0.3759728,\n", + " 'moment': 0.11160666,\n", + " 'morris': 0.060925715,\n", + " 'network': 0.51853234,\n", + " 'node': 0.99145895,\n", + " 'online': 0.36771652,\n", + " 'operation': 0.28533393,\n", + " 'overhead': 0.086819395,\n", + " 'patience': 0.11310515,\n", + " 'perfect': 0.12382903,\n", + " 'performance': 0.06312573,\n", + " 'process': 0.5356137,\n", + " 'processing': 0.55718875,\n", + " 'production': 0.05736718,\n", + " 'project': 0.14496073,\n", + " 'prototype': 0.31378728,\n", + " 'quan': 0.22408743,\n", + " 'ready': 0.25202373,\n", + " 'reduce': 0.5264253,\n", + " 'reduction': 0.037918843,\n", + " 'research': 0.0142833255,\n", + " 'resource': 0.09839988,\n", + " 'resources': 0.7532266,\n", + " 'rights': 0.08338795,\n", + " 'room': 0.84089494,\n", + " 'rs': 0.47752637,\n", + " 'scala': 0.17796026,\n", + " 'scale': 1.6349432,\n", + " 'scaled': 0.39957505,\n", + " 'scales': 0.24761787,\n", + " 'scaling': 1.3751862,\n", + " 'scope': 0.009172562,\n", + " 'search': 0.6669978,\n", + " 'seconds': 0.11594447,\n", + " 'serial': 0.21314114,\n", + " 'server': 1.1875997,\n", + " 'servers': 0.3761195,\n", + " 'share': 0.21588095,\n", + " 'shrink': 0.08177304,\n", + " 'si': 0.039096646,\n", + " 'sid': 0.26323187,\n", + " 'site': 0.27832702,\n", + " 'size': 1.2518198,\n", + " 'sizes': 0.68347317,\n", + " 'small': 0.021309003,\n", + " 'software': 0.21712899,\n", + " 'sort': 0.46309024,\n", + " 'step': 0.13614927,\n", + " 'storage': 0.33423752,\n", + " 'strategy': 0.2746019,\n", + " 'swarm': 0.18959516,\n", + " 'task': 0.12210263,\n", + " 'time': 0.3716685,\n", + " 'traffic': 0.0044686934,\n", + " 'training': 0.56078845,\n", + " 'trial': 0.30781624,\n", + " 'tutor': 0.18126883,\n", + " 'twitter': 0.7352328,\n", + " 'useful': 0.07486964,\n", + " 'user': 0.61840165,\n", + " 'users': 0.5178945,\n", + " 'wait': 0.12994274,\n", + " 'weaving': 0.09568315,\n", + " 'web': 0.3402482,\n", + " 'website': 0.17116618,\n", + " 'work': 0.38590312,\n", + " 'working': 0.040917397,\n", + " 'works': 0.2640411,\n", + " 'years': 0.057129644},\n", + " 'text': 'cluster '\n", + " 'size, '\n", + " 'we '\n", + " 'account '\n", + " 'for '\n", + " 'a '\n", + " '10% '\n", + " 'headroom '\n", + " 'when '\n", + " 'calculating '\n", + " 'the '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'during '\n", + " 'a '\n", + " 'scale '\n", + " 'down '\n", + " 'and '\n", + " 'a '\n", + " 'scale '\n", + " 'down '\n", + " 'takes '\n", + " 'effect '\n", + " 'only '\n", + " 'if '\n", + " 'all '\n", + " 'desired '\n", + " 'cluster '\n", + " 'size '\n", + " 'calculations '\n", + " 'within '\n", + " 'the '\n", + " 'past '\n", + " '15 '\n", + " 'minute '\n", + " 'have '\n", + " 'indicated '\n", + " 'a '\n", + " 'scale-down. '\n", + " 'Currently, '\n", + " 'the '\n", + " 'time '\n", + " 'that '\n", + " 'it '\n", + " 'takes '\n", + " 'for '\n", + " 'an '\n", + " 'increase '\n", + " 'in '\n", + " 'the '\n", + " 'metrics '\n", + " 'to '\n", + " 'lead '\n", + " 'to '\n", + " 'the '\n", + " 'first '\n", + " 'Elasticsearch '\n", + " 'node '\n", + " 'being '\n", + " 'added '\n", + " 'to '\n", + " 'the '\n", + " 'cluster '\n", + " 'and '\n", + " 'ready '\n", + " 'to '\n", + " 'process '\n", + " 'indexing '\n", + " 'load '\n", + " 'is '\n", + " 'under '\n", + " '1 '\n", + " 'minute. '\n", + " 'Conclusion '\n", + " 'In '\n", + " 'this '\n", + " 'blog '\n", + " 'post, '\n", + " 'we '\n", + " 'explained '\n", + " 'how '\n", + " 'ingest '\n", + " 'autoscaling '\n", + " 'works '\n", + " 'in '\n", + " 'Elasticsearch, '\n", + " 'the '\n", + " 'different '\n", + " 'components '\n", + " 'involved, '\n", + " 'and '\n", + " 'the '\n", + " 'metrics '\n", + " 'used '\n", + " 'to '\n", + " 'quantify '\n", + " 'the '\n", + " 'resources '\n", + " 'needed '\n", + " 'to '\n", + " 'handle '\n", + " 'the '\n", + " 'indexing '\n", + " 'workload. '\n", + " 'We '\n", + " 'believe '\n", + " 'that '\n", + " 'such '\n", + " 'an '\n", + " 'autoscaling '\n", + " 'mechanism '\n", + " 'is '\n", + " 'crucial '\n", + " 'to '\n", + " 'reduce '\n", + " 'the '\n", + " 'operational '\n", + " 'overhead '\n", + " 'of '\n", + " 'an '\n", + " 'Elasticsearch '\n", + " 'cluster '\n", + " 'for '\n", + " 'the '\n", + " 'users '\n", + " 'by '\n", + " 'automatically '\n", + " 'increasing '\n", + " 'the '\n", + " 'available '\n", + " 'resources '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'when '\n", + " 'necessary. '\n", + " 'Furthermore, '\n", + " 'it '\n", + " 'leads '\n", + " 'to '\n", + " 'cost '\n", + " 'reduction '\n", + " 'by '\n", + " 'scaling '\n", + " 'down '\n", + " 'the '\n", + " 'cluster '\n", + " 'when '\n", + " 'the '\n", + " 'available '\n", + " 'resources '\n", + " 'in '\n", + " 'the '\n", + " 'cluster '\n", + " 'are '\n", + " 'not '\n", + " 'required '\n", + " 'anymore. '\n", + " 'Ready '\n", + " 'to '\n", + " 'try '\n", + " 'this '\n", + " 'out '\n", + " 'on '\n", + " 'your '\n", + " 'own? '\n", + " 'Start '\n", + " 'a '\n", + " 'free '\n", + " 'trial '\n", + " '. '\n", + " 'Want '\n", + " 'to '\n", + " 'get '\n", + " 'Elastic '\n", + " 'certified? '\n", + " 'Find '\n", + " 'out '\n", + " 'when '\n", + " 'the '\n", + " 'next '\n", + " 'Elasticsearch '\n", + " 'Engineer '\n", + " 'training '\n", + " 'is '\n", + " 'running! '\n", + " 'Pooya '\n", + " 'Salehi '\n", + " 'Henning '\n", + " 'Andersen '\n", + " 'Francisco '\n", + " 'Fernández '\n", + " 'Castaño '\n", + " '11 '\n", + " 'min '\n", + " 'read '\n", + " '29 '\n", + " 'July '\n", + " '2024 '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless '\n", + " 'Share '\n", + " 'Twitter '\n", + " 'Facebook '\n", + " 'LinkedIn '\n", + " 'Recommended '\n", + " 'Articles '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless '\n", + " '• '\n", + " '15 '\n", + " 'May '\n", + " '2024 '\n", + " 'Building '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless '\n", + " 'Explore '\n", + " 'the '\n", + " 'architectural '\n", + " 'decisions '\n", + " 'we '\n", + " 'made '\n", + " 'along '\n", + " 'the '\n", + " 'journey '\n", + " 'of '\n", + " 'building '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless. '\n", + " 'Jason '\n", + " 'Tedor '\n", + " 'Pooya '\n", + " 'Salehi '\n", + " 'Henning '\n", + " 'Andersen '\n", + " 'Francisco '\n", + " 'Fernández '\n", + " 'Castaño '\n", + " '11 '\n", + " 'min '\n", + " 'read '\n", + " '29 '\n", + " 'July '\n", + " '2024 '\n", + " 'Elastic '\n", + " 'Cloud '\n", + " 'Serverless '\n", + " 'Share '\n", + " 'Twitter '\n", + " 'Facebook '\n", + " 'LinkedIn '\n", + " 'Jump '\n", + " 'to '\n", + " 'Ingest '\n", + " 'autoscaling '\n", + " 'overview '\n", + " 'Metrics '\n", + " 'Ingestion '\n", + " 'load '\n", + " 'Memory '\n", + " 'Scaling '\n", + " 'the '\n", + " 'cluster '\n", + " 'Show '\n", + " 'more '\n", + " 'Sitemap '\n", + " 'RSS '\n", + " 'Feed '\n", + " 'Search '\n", + " 'Labs '\n", + " 'Repo '\n", + " 'Elastic.co '\n", + " '©2024. '\n", + " 'Elasticsearch '\n", + " 'B.V. '\n", + " 'All '\n", + " 'Rights '\n", + " 'Reserved.'}],\n", + " 'inference_id': 'my-elser-model',\n", + " 'model_settings': {'task_type': 'sparse_embedding'}}},\n", + " 'title': 'Elasticsearch ingest autoscaling — '\n", + " 'Search Labs',\n", + " 'url': 'https://www.elastic.co/search-labs/blog/elasticsearch-ingest-autoscaling',\n", + " 'url_host': 'www.elastic.co',\n", + " 'url_path': '/search-labs/blog/elasticsearch-ingest-autoscaling',\n", + " 'url_path_dir1': 'search-labs',\n", + " 'url_path_dir2': 'blog',\n", + " 'url_path_dir3': 'elasticsearch-ingest-autoscaling',\n", + " 'url_port': 443,\n", + " 'url_scheme': 'https'}}],\n", + " 'max_score': 1.2861483,\n", + " 'total': {'relation': 'eq', 'value': 228}},\n", + " 'timed_out': False,\n", + " 'took': 2}\n" + ] + } + ] + } + ] +} diff --git a/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb b/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb deleted file mode 100644 index 69eebff3..00000000 --- a/supporting-blog-content/rag-ties-the-app-together/ChatGPT_and_Elasticsearch__The_RAG_Really_Tied_the_App_Togetheripynb +++ /dev/null @@ -1,5624 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# ChatGPT and Elasticsearch: The RAG Really Tied the App Together\n", - "\n", - "\n", - "## This notebook will show you how to:\n", - " - Create an Elastics Serverless Project\n", - "- Setup an Inference API\n", - " - This will download and deploy ELSER for embedding inference\n", - "- Create an index template\n", - " - This will use `semantic_text` which will auto-chunk and embed the body of text\n", - "- Use the Elastic Open Crawler to crawl the Elastic Search/Observability/Security Labs\n", - "
\n", - "
\n", - "\n", - "## The [accompying blog](https://www.elastic.co/search-labs/blog/app/search-labs/blog/rag-ties-the-room-together/) takes it further by showing you how to:\n", - "- Use Playground to test chat prompts and configurations\n", - " - Then generate queries for our RAG app\n", - "- Use the queries from Playground to finish out a RAG Chatbot app\n", - " - Python FastAPI backend with React frontend" - ], - "metadata": { - "id": "_ebYbHHh_0hI" - } - }, - { - "cell_type": "code", - "source": [ - "!pip install elasticsearch" - ], - "metadata": { - "id": "_DmXlQWsGNeM" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "import requests\n", - "import getpass\n", - "from pprint import pprint\n", - "from elasticsearch import Elasticsearch\n", - "from elasticsearch.exceptions import ConnectionTimeout\n", - "from time import sleep\n", - "from IPython.display import clear_output\n" - ], - "metadata": { - "id": "cuomUVE-zYjB" - }, - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Project Setup" - ], - "metadata": { - "id": "HOOv0igTKjMS" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Enter your Cloud API Key\n", - "\n", - "Generate your secret API key at https://cloud.elastic.co/account/keys" - ], - "metadata": { - "id": "yWSg_D91x9mF" - } - }, - { - "cell_type": "code", - "source": [ - "# Prompt the user for input while masking it for security\n", - "api_key = getpass.getpass(\"Enter your API key: \")\n", - "\n", - "print(\"API key successfully entered!\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bidHlfsy2OPf", - "outputId": "ba8305a6-85d7-4173-fde4-166e77c4971e" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Enter your API key: ··········\n", - "API key successfully entered!\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Create Elasticsearch project\n", - "[Serverless API Docs](https://www.elastic.co/docs/api/doc/elastic-cloud-serverless/operation/operation-createelasticsearchproject#operation-createelasticsearchproject-body-application-json-optimized_for)" - ], - "metadata": { - "id": "mt4_kL0b0E75" - } - }, - { - "cell_type": "code", - "source": [ - "url = \"https://api.elastic-cloud.com/api/v1/serverless/projects/elasticsearch\"\n", - "\n", - "project_data = {\n", - " \"name\": \"The RAG Really Tied the App Together\",\n", - " \"region_id\": \"aws-us-east-1\",\n", - " \"optimized_for\": \"vector\"\n", - "}\n", - "\n", - "auth_header = f\"ApiKey {api_key}\"\n", - "headers = {\n", - " \"Content-Type\": \"application/json\",\n", - " \"Authorization\": auth_header\n", - "}\n", - "\n", - "es_project = requests.post(url, json=project_data, headers=headers)\n", - "\n", - "if 200 <= es_project.status_code < 300:\n", - " es_project_keys = es_project.json()\n", - " prg_name = es_project_keys['name']\n", - " print(f\"Project {prg_name} creation started\")\n", - "\n", - " # wait for the project to be initialized and ready\n", - " project_id = es_project.json()['id']\n", - " print('Checking if project is created and ready')\n", - " loop = 1\n", - " while True:\n", - " es_project_check = requests.get(url +f'/{project_id}/status', headers=headers)\n", - " if es_project_check.json()['phase'] == 'initialized':\n", - " break\n", - " else:\n", - " clear_output(wait=True)\n", - " print(f\"Waiting for project to be ready. Current status:{es_project_check.json()['phase']} - Loop {loop} Sleeping 10 seconds\")\n", - " sleep(10)\n", - " loop += 1\n", - "\n", - " print('Project is ready')\n", - "\n", - "else:\n", - " print(es_project.text)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lVkyA7KUyDEO", - "outputId": "8dd818ba-9c41-4e90-d718-319dbfcf6062" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Waiting for project to be ready. Current status:initializing - Loop 7 Sleeping 10 seconds\n", - "Project is ready\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Create elasticsearch client" - ], - "metadata": { - "id": "Uh0JpsnONMhv" - } - }, - { - "cell_type": "code", - "source": [ - "es = Elasticsearch(es_project_keys['endpoints']['elasticsearch'],\n", - " basic_auth=(es_project_keys['credentials']['username'],\n", - " es_project_keys['credentials']['password']\n", - " )\n", - " )" - ], - "metadata": { - "id": "KG01YrIwMdHz" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Project API Key\n", - "Create a [Project level API key](https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html)" - ], - "metadata": { - "id": "Xhu2U-YszbDe" - } - }, - { - "cell_type": "code", - "source": [ - "project_key_response = es.security.create_api_key(\n", - " name= \"full_access_key\",\n", - " metadata = { \"description\": \"API key for full access\"},\n", - " expiration= \"14d\",\n", - ")\n", - "\n", - "project_api_key = project_key_response['encoded']\n", - "print (f\"{project_key_response['name']} has been created\")" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Puj1UWIKVtSv", - "outputId": "0f5d6937-1204-4f35-9f12-d9eb517cf675" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "full_access_key has been created\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Inference API and Index Setup" - ], - "metadata": { - "id": "C-J2hDsVWIut" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Inference API\n", - "This will:\n", - "- Create an inference API endpoint\n", - "- Download ELSER model (if not already downloaded)\n", - "- Deploy ELSER model with `service_settings` configs\n", - "\n", - "Note - This will wait for ELSER to be downloaded and deployed" - ], - "metadata": { - "id": "AjGmk-jwXi_4" - } - }, - { - "cell_type": "code", - "source": [ - "model_config = {\n", - " \"service\": \"elser\",\n", - " \"service_settings\": {\n", - " \"num_allocations\": 8,\n", - " \"num_threads\": 1\n", - " }\n", - "}\n", - "\n", - "inference_id = \"my-elser-model\"\n", - "\n", - "try:\n", - " create_endpoint = es.inference.put_model(\n", - " inference_id=inference_id,\n", - " task_type=\"sparse_embedding\",\n", - " body=model_config\n", - " )\n", - "\n", - "except ConnectionTimeout:\n", - " print(\"Connection timed out. This can happen while waiting for the Inference model to fully deploy and start.\")\n", - "finally:\n", - " print(\"Waiting for inference model to be fully deployed\")\n", - " inf_info = es.inference.get_model(inference_id=inference_id)\n", - " model_id = inf_info.body['endpoints'][0]['service_settings']['model_id']\n", - "\n", - " while True:\n", - " try:\n", - " model_stats = es.ml.get_trained_models_stats(model_id=model_id)\n", - " routing_state = model_stats.body['trained_model_stats'][0]['deployment_stats']['nodes'][0]['routing_state']['routing_state']\n", - "\n", - " if routing_state == 'started':\n", - " print(\"Inference API created and Inference model is fully deployed.\")\n", - " break\n", - " else:\n", - " clear_output(wait=True)\n", - " print(\"Waiting for inference model to be fully deployed\")\n", - " sleep(5)\n", - " except (IndexError, KeyError): # Handle missing data in the response\n", - " clear_output(wait=True)\n", - " print(\"Still waiting for model deployment...\")\n", - " sleep(5)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yblutX5J1LT1", - "outputId": "9810ce99-1d6f-413e-d4ba-89ed6b4391e4" - }, - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Waiting for inference model to be fully deployed\n", - "Inference API created and Inference model is fully deployed.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## Create index template\n", - "The two key fields here are:\n", - "- body\n", - " - the field with the body of text and we use that as the source to copy to our semantic text field `semantic_body`\n", - "- semantic_body\n", - " - This field will automatically handle chunking and generating embeddings" - ], - "metadata": { - "id": "hixAZWcxkBkZ" - } - }, - { - "cell_type": "code", - "source": [ - "template_body = {\n", - " \"index_patterns\": [\"elastic-labs*\"],\n", - " \"template\": {\n", - " \"mappings\": {\n", - " \"properties\": {\n", - " \"body\": {\n", - " \"type\": \"text\",\n", - " \"copy_to\": \"semantic_body\"\n", - " },\n", - " \"semantic_body\": {\n", - " \"type\": \"semantic_text\",\n", - " \"inference_id\": \"my-elser-model\"\n", - " },\n", - " \"headings\": {\n", - " \"type\": \"text\"\n", - " },\n", - " \"id\": {\n", - " \"type\": \"keyword\"\n", - " },\n", - " \"meta_description\": {\n", - " \"type\": \"text\"\n", - " },\n", - " \"title\": {\n", - " \"type\": \"text\"\n", - " }\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n", - "template_resp = es.indices.put_index_template(\n", - " name=\"labs_template\",\n", - " body=template_body\n", - ")\n", - "\n", - "print(template_resp.body)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "EBEyEVg1kDh2", - "outputId": "3355ee8d-30c5-4675-f039-77675cae1ba3" - }, - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'acknowledged': True}\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Crawl the docs" - ], - "metadata": { - "id": "hm65dUPTBTpb" - } - }, - { - "cell_type": "markdown", - "source": [ - "# Open Crawler\n", - "This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab\n", - "\n", - "The [blog details the steps](https://www.elastic.co/search-labs/blog/app/search-labs/blog/rag-ties-the-room-together#crawl-all-the-labs) below running on a Macbook\n", - "\n", - "You can also review the [Open Crawler setup](https://github.com/elastic/crawler?tab=readme-ov-file#setup)." - ], - "metadata": { - "id": "EJ5D8bh3BWX5" - } - }, - { - "cell_type": "markdown", - "source": [ - "## High level steps to configure and run crawler\n", - "*This HAS TO BE RUN on a Linux/Mac/Windows host/vm NOT in colab*\n", - "\n", - "- Clone the repo\n", - " - `git clone git@github.com:elastic/crawler.git`\n", - "- Build the Open Crawler Docker container\n", - " - `docker build -t crawler-image . && docker run -i -d --name crawler crawler-image`\n", - "- Create a new config file\n", - " - `vi config/elastic-labs.yml`\n", - " - run the _generate config_ cell below then paste the output in the config file and save.\n", - "- Copy the new local config into the container\n", - " - `docker cp config/elastic-labs.yml crawler:/app/config/elastic-labs.yml`\n", - "- Run the crawler\n", - " - `docker exec -it crawler bin/crawler crawl config/elastic-labs.yml`" - ], - "metadata": { - "id": "WMjjJTXR_hhD" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Generate Config\n", - "Run the below cell to generate the yml config file" - ], - "metadata": { - "id": "2ZB6L76Y8thR" - } - }, - { - "cell_type": "code", - "source": [ - "config = f\"\"\"\n", - "domains:\n", - " - url: https://www.elastic.co\n", - " seed_urls:\n", - " - https://www.elastic.co/search-labs\n", - " - https://www.elastic.co/observability-labs\n", - " - https://www.elastic.co/security-labs\n", - " crawl_rules:\n", - " - policy: allow\n", - " type: begins\n", - " pattern: /search-labs\n", - " - policy: allow\n", - " type: begins\n", - " pattern: /observability-labs\n", - " - policy: allow\n", - " type: begins\n", - " pattern: /security-labs\n", - " - policy:deny\n", - " type: regex\n", - " pattern: .*/author/.*\n", - " - policy: deny\n", - " type: regex\n", - " pattern: .*\n", - "\n", - "output_sink: elasticsearch\n", - "output_index: elastic-labs\n", - "max_crawl_depth: 25\n", - "\n", - "elasticsearch:\n", - " host: \"{es_project.json()['endpoints']['elasticsearch']}\"\n", - " port: \"443\"\n", - " api_key: \"{project_api_key}\"\n", - " bulk_api.max_items: 10\n", - "\"\"\"\n", - "\n", - "print(config)" - ], - "metadata": { - "id": "2XDfDkdM85lN" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Confirm the docs have been crawled" - ], - "metadata": { - "id": "v7wxtXHABiC8" - } - }, - { - "cell_type": "markdown", - "source": [ - "First look at the count of docs for each Labs' site" - ], - "metadata": { - "id": "a6mItejyBovz" - } - }, - { - "cell_type": "code", - "source": [ - "query = {\n", - " \"size\": 0,\n", - " \"aggs\": {\n", - " \"url_path_dir1\": {\n", - " \"terms\": {\n", - " \"field\": \"url_path_dir1.keyword\"\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n", - "response = es.search(index=\"elastic-labs\", body=query)\n", - "pprint(response.body)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "_OZxWsx_BVy_", - "outputId": "e7333fa5-37b9-46c7-96f1-622d98e52521" - }, - "execution_count": 20, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{'_shards': {'failed': 0, 'skipped': 0, 'successful': 5, 'total': 5},\n", - " 'aggregations': {'url_path_dir1': {'buckets': [{'doc_count': 216,\n", - " 'key': 'search-labs'},\n", - " {'doc_count': 214,\n", - " 'key': 'security-labs'},\n", - " {'doc_count': 158,\n", - " 'key': 'observability-labs'}],\n", - " 'doc_count_error_upper_bound': 0,\n", - " 'sum_other_doc_count': 0}},\n", - " 'hits': {'hits': [],\n", - " 'max_score': None,\n", - " 'total': {'relation': 'eq', 'value': 588}},\n", - " 'timed_out': False,\n", - " 'took': 6}\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Next review a sample doc" - ], - "metadata": { - "id": "UnTStG_TCpp1" - } - }, - { - "cell_type": "code", - "source": [ - "query = {\n", - " \"size\": 1,\n", - " \"query\": {\n", - " \"match\": {\n", - " \"url_path_dir2\": \"blog\"\n", - " }\n", - " }\n", - "}\n", - "\n", - "response = es.search(index=\"elastic-labs\", body=query)\n", - "pprint(response.body)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "-1vWFv7cCuFu", - "outputId": "eaa4aa08-13a1-459a-dae0-9c8c1f0a69fc" - }, - "execution_count": 23, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[1;30;43mStreaming output truncated to the last 5000 lines.\u001b[0m\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'exposes '\n", - " 'a '\n", - " 'list '\n", - " 'of '\n", - " 'ingestion '\n", - " 'load '\n", - " 'values, '\n", - " 'one '\n", - " 'for '\n", - " 'each '\n", - " 'indexing '\n", - " 'node. '\n", - " 'Note '\n", - " 'that '\n", - " 'as '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pools '\n", - " '(which '\n", - " 'handle '\n", - " 'indexing '\n", - " 'requests) '\n", - " 'are '\n", - " 'sized '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'CPU '\n", - " 'cores '\n", - " 'on '\n", - " 'the '\n", - " 'node, '\n", - " 'this '\n", - " 'essentially '\n", - " 'determines '\n", - " 'the '\n", - " 'total '\n", - " 'number '\n", - " 'of '\n", - " 'cores '\n", - " 'that '\n", - " 'is '\n", - " 'needed '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'on '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'consists '\n", - " 'of '\n", - " 'two '\n", - " 'components: '\n", - " 'Thread '\n", - " 'pool '\n", - " 'utilization: '\n", - " 'the '\n", - " 'average '\n", - " 'number '\n", - " 'of '\n", - " 'threads '\n", - " 'in '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pool '\n", - " 'processing '\n", - " 'indexing '\n", - " 'requests '\n", - " 'during '\n", - " 'that '\n", - " 'sampling '\n", - " 'period. '\n", - " 'Queued '\n", - " 'ingestion '\n", - " 'load: '\n", - " 'the '\n", - " 'estimated '\n", - " 'number '\n", - " 'of '\n", - " 'threads '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'queued '\n", - " 'write '\n", - " 'requests. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'is '\n", - " 'calculated '\n", - " 'as '\n", - " 'the '\n", - " 'sum '\n", - " 'of '\n", - " 'these '\n", - " 'two '\n", - " 'values '\n", - " 'for '\n", - " 'all '\n", - " 'the '\n", - " 'three '\n", - " 'write '\n", - " 'thread '\n", - " 'pools '\n", - " '. '\n", - " 'The '\n", - " 'total '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'is '\n", - " 'the '\n", - " 'sum '\n", - " 'of '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'the '\n", - " 'individual '\n", - " 'nodes. '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 't '\n", - " 'h'},\n", - " {'embeddings': {'##est': 1.3433179,\n", - " '##estinal': 0.5916747,\n", - " '##ical': 0.21335103,\n", - " '##ing': 0.66160166,\n", - " '##ion': 1.223692,\n", - " '##l': 0.06755174,\n", - " '##ler': 0.34178317,\n", - " '##line': 0.6707441,\n", - " '##ling': 1.0343578,\n", - " '##load': 0.9880499,\n", - " '##mat': 0.01314945,\n", - " '##rch': 1.3459072,\n", - " '##s': 0.25005433,\n", - " '##sca': 1.6867673,\n", - " '##scu': 0.028700678,\n", - " '##sea': 1.6748068,\n", - " '_': 0.28835136,\n", - " 'access': 0.116686985,\n", - " 'accounting': 0.15865436,\n", - " 'algorithm': 1.0487378,\n", - " 'algorithms': 0.2763102,\n", - " 'allocation': 0.1481772,\n", - " 'amazon': 0.9099395,\n", - " 'among': 0.04313716,\n", - " 'anal': 0.025087006,\n", - " 'analysis': 0.64178395,\n", - " 'analyze': 0.18673302,\n", - " 'and': 0.19101046,\n", - " 'apache': 0.6617465,\n", - " 'api': 1.4468017,\n", - " 'approximate': 0.026616694,\n", - " 'are': 0.19081613,\n", - " 'arithmetic': 0.12217364,\n", - " 'ass': 0.12156314,\n", - " 'auto': 1.4633765,\n", - " 'automatic': 0.73048806,\n", - " 'availability': 0.20461462,\n", - " 'average': 0.58710635,\n", - " 'bot': 0.12357169,\n", - " 'buffer': 0.14556783,\n", - " 'calculate': 0.02387442,\n", - " 'calculated': 0.2452304,\n", - " 'calculation': 0.81089926,\n", - " 'called': 0.2972479,\n", - " 'capacity': 0.60224617,\n", - " 'catalog': 0.078262925,\n", - " 'category': 0.21683785,\n", - " 'checkpoint': 0.012995078,\n", - " 'chess': 0.41694775,\n", - " 'chip': 0.10178017,\n", - " 'class': 0.5914888,\n", - " 'classification': 0.17686933,\n", - " 'cluster': 1.4369037,\n", - " 'clusters': 0.21254443,\n", - " 'comply': 0.131236,\n", - " 'component': 0.37191656,\n", - " 'components': 0.87235415,\n", - " 'computation': 0.47024545,\n", - " 'compute': 0.14372817,\n", - " 'computer': 0.397558,\n", - " 'constant': 0.09540719,\n", - " 'consumption': 0.123454005,\n", - " 'cope': 0.7024604,\n", - " 'core': 0.62535626,\n", - " 'cores': 1.0230916,\n", - " 'cpu': 0.874175,\n", - " 'crawl': 0.23010625,\n", - " 'current': 0.5516459,\n", - " 'data': 0.25792596,\n", - " 'database': 0.4601695,\n", - " 'determine': 0.3844099,\n", - " 'determined': 0.41348428,\n", - " 'diagram': 0.025166756,\n", - " 'dimensions': 0.07042265,\n", - " 'disk': 0.07931721,\n", - " 'each': 0.22229394,\n", - " 'elastic': 1.8257822,\n", - " 'enter': 0.058845505,\n", - " 'equation': 0.43812877,\n", - " 'es': 0.8055687,\n", - " 'estimate': 0.03608101,\n", - " 'estimated': 0.46266982,\n", - " 'execution': 0.05638616,\n", - " 'factors': 0.12973839,\n", - " 'forest': 0.3904727,\n", - " 'formula': 0.016075172,\n", - " 'framework': 0.34186286,\n", - " 'g': 0.08017753,\n", - " 'gage': 0.30852094,\n", - " 'gene': 0.27250904,\n", - " 'handle': 0.9037246,\n", - " 'handling': 0.69093794,\n", - " 'implement': 0.053764082,\n", - " 'index': 1.3896008,\n", - " 'indexed': 0.25086805,\n", - " 'ing': 1.5002296,\n", - " 'integration': 0.20222682,\n", - " 'interface': 0.25386703,\n", - " 'inventory': 0.5645011,\n", - " 'is': 0.05772473,\n", - " 'java': 1.2391971,\n", - " 'l': 0.048691455,\n", - " 'lake': 0.24773102,\n", - " 'lane': 0.25919613,\n", - " 'lang': 0.039321195,\n", - " 'learning': 0.033810128,\n", - " 'library': 0.14143226,\n", - " 'list': 0.10985089,\n", - " 'lists': 0.12752165,\n", - " 'load': 1.7350225,\n", - " 'loaded': 0.057171866,\n", - " 'loading': 0.75305617,\n", - " 'loads': 0.12072936,\n", - " 'log': 0.06388949,\n", - " 'machine': 0.47294563,\n", - " 'mass': 0.092697844,\n", - " 'math': 0.7472431,\n", - " 'matrix': 0.045127213,\n", - " 'maximum': 0.094020285,\n", - " 'measure': 0.32414404,\n", - " 'memories': 0.03024405,\n", - " 'memory': 1.2586498,\n", - " 'method': 0.016832462,\n", - " 'metric': 1.1439759,\n", - " 'mining': 0.40203753,\n", - " 'mp': 0.09331862,\n", - " 'multi': 0.031247457,\n", - " 'multiple': 0.38688186,\n", - " 'n': 0.33228758,\n", - " 'need': 0.19645856,\n", - " 'network': 0.42359397,\n", - " 'new': 0.041632555,\n", - " 'node': 1.3807943,\n", - " 'nodes': 0.63807905,\n", - " 'number': 0.4450389,\n", - " 'o': 0.50335085,\n", - " 'operation': 0.008523868,\n", - " 'order': 0.08601924,\n", - " 'pattern': 0.11067777,\n", - " 'percent': 0.13746342,\n", - " 'performance': 0.41614294,\n", - " 'period': 0.49507552,\n", - " 'pool': 1.3188534,\n", - " 'poole': 0.3433027,\n", - " 'pools': 1.2800426,\n", - " 'predict': 0.23377013,\n", - " 'processing': 1.0733001,\n", - " 'processor': 0.10840816,\n", - " 'pure': 0.11351536,\n", - " 'quantity': 0.109573685,\n", - " 'queue': 1.1129105,\n", - " 'ram': 0.14691876,\n", - " 'rank': 0.36504152,\n", - " 'ratio': 0.011385939,\n", - " 'read': 0.13304754,\n", - " 'represent': 0.42444453,\n", - " 'representation': 0.058323957,\n", - " 'request': 0.755568,\n", - " 'requests': 0.7039498,\n", - " 'routing': 0.060857404,\n", - " 'sample': 0.62170815,\n", - " 'sampling': 0.8610632,\n", - " 'scala': 0.25192302,\n", - " 'scale': 0.5968038,\n", - " 'sea': 0.20613533,\n", - " 'search': 0.4318061,\n", - " 'semi': 0.33687106,\n", - " 'sequence': 0.23863083,\n", - " 'serial': 0.15801017,\n", - " 'server': 0.16233677,\n", - " 'si': 0.2002626,\n", - " 'sid': 0.44975162,\n", - " 'size': 0.8577202,\n", - " 'sized': 0.21010487,\n", - " 'sizes': 0.4059122,\n", - " 'small': 0.09116832,\n", - " 'software': 0.09232291,\n", - " 'sort': 0.35720947,\n", - " 'sorting': 0.06234357,\n", - " 'spectrum': 0.07792632,\n", - " 'sql': 0.116530605,\n", - " 'statistical': 0.0852167,\n", - " 'statistics': 0.22820702,\n", - " 'stomach': 0.018201118,\n", - " 'sum': 0.89766365,\n", - " 'swarm': 0.20437151,\n", - " 'table': 0.007837142,\n", - " 'task': 0.37974054,\n", - " 'taste': 0.053832427,\n", - " 'taylor': 0.10206632,\n", - " 'thread': 1.5052487,\n", - " 'threads': 1.2515007,\n", - " 'three': 0.27322263,\n", - " 'total': 0.64918166,\n", - " 'tree': 0.098200426,\n", - " 'unit': 0.15584692,\n", - " 'used': 0.56170344,\n", - " 'useful': 0.34977943,\n", - " 'utilization': 1.0091052,\n", - " 'value': 0.7453479,\n", - " 'values': 0.63835937,\n", - " 'vector': 0.3917736,\n", - " 'weaving': 0.11804886,\n", - " 'web': 0.46383187,\n", - " 'work': 0.29207155,\n", - " 'write': 1.1660185,\n", - " 'writing': 0.25973478,\n", - " 'z': 0.3776876},\n", - " 'text': 'that '\n", - " 'are '\n", - " 'used '\n", - " 'for '\n", - " 'ingest '\n", - " 'autoscaling '\n", - " 'in '\n", - " 'Elasticsearch '\n", - " 'are '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'memory. '\n", - " 'Ingestion '\n", - " 'load '\n", - " 'Ingestion '\n", - " 'load '\n", - " 'represents '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'threads '\n", - " 'that '\n", - " 'is '\n", - " 'needed '\n", - " 'to '\n", - " 'cope '\n", - " 'with '\n", - " 'the '\n", - " 'current '\n", - " 'indexing '\n", - " 'load. '\n", - " 'The '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'exposes '\n", - " 'a '\n", - " 'list '\n", - " 'of '\n", - " 'ingestion '\n", - " 'load '\n", - " 'values, '\n", - " 'one '\n", - " 'for '\n", - " 'each '\n", - " 'indexing '\n", - " 'node. '\n", - " 'Note '\n", - " 'that '\n", - " 'as '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pools '\n", - " '(which '\n", - " 'handle '\n", - " 'indexing '\n", - " 'requests) '\n", - " 'are '\n", - " 'sized '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'CPU '\n", - " 'cores '\n", - " 'on '\n", - " 'the '\n", - " 'node, '\n", - " 'this '\n", - " 'essentially '\n", - " 'determines '\n", - " 'the '\n", - " 'total '\n", - " 'number '\n", - " 'of '\n", - " 'cores '\n", - " 'that '\n", - " 'is '\n", - " 'needed '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'on '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'consists '\n", - " 'of '\n", - " 'two '\n", - " 'components: '\n", - " 'Thread '\n", - " 'pool '\n", - " 'utilization: '\n", - " 'the '\n", - " 'average '\n", - " 'number '\n", - " 'of '\n", - " 'threads '\n", - " 'in '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pool '\n", - " 'processing '\n", - " 'indexing '\n", - " 'requests '\n", - " 'during '\n", - " 'that '\n", - " 'sampling '\n", - " 'period. '\n", - " 'Queued '\n", - " 'ingestion '\n", - " 'load: '\n", - " 'the '\n", - " 'estimated '\n", - " 'number '\n", - " 'of '\n", - " 'threads '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'queued '\n", - " 'write '\n", - " 'requests. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'is '\n", - " 'calculated '\n", - " 'as '\n", - " 'the '\n", - " 'sum '\n", - " 'of '\n", - " 'these '\n", - " 'two '\n", - " 'values '\n", - " 'for '\n", - " 'all '\n", - " 'the '\n", - " 'three '\n", - " 'write '\n", - " 'thread '\n", - " 'pools '\n", - " '. '\n", - " 'The '\n", - " 'total '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'is '\n", - " 'the '\n", - " 'sum '\n", - " 'of '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'of '\n", - " 'the '\n", - " 'individual '\n", - " 'nodes. '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 't '\n", - " 'h '\n", - " 'r '\n", - " 'e '\n", - " 'a '\n", - " 'd '\n", - " '_ '\n", - " 'p '\n", - " 'o '\n", - " 'o '\n", - " 'l '\n", - " '_ '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'l '\n", - " 'i '\n", - " 'z '\n", - " 'a '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '+ '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 't '\n", - " 'o '\n", - " 't '\n", - " 'a '\n", - " 'l '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " '\\\\small '\n", - " 'node\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\sum(thread\\\\_pool\\\\_utilization '\n", - " '+ '\n", - " 'queued\\\\_ingestion\\\\_load) '\n", - " '\\\\newline '\n", - " 'total\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\sum(node\\\\_ingestion\\\\_load) '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 't '\n", - " 'h '\n", - " 're '\n", - " 'a '\n", - " 'd '\n", - " '_ '\n", - " 'p '\n", - " 'oo '\n", - " 'l '\n", - " '_ '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'l '\n", - " 'i '\n", - " 'z '\n", - " 'a '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '+ '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 't '\n", - " 'o '\n", - " 't '\n", - " 'a '\n", - " 'l '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 'Figure '\n", - " '2 '\n", - " ': '\n", - " 'ingestion'},\n", - " {'embeddings': {'##able': 0.5624876,\n", - " '##ba': 0.10684605,\n", - " '##d': 0.12233314,\n", - " '##est': 0.84587747,\n", - " '##ima': 0.2508807,\n", - " '##ing': 0.57414246,\n", - " '##ion': 1.1121849,\n", - " '##line': 1.1430916,\n", - " '##ma': 1.1706055,\n", - " '##w': 1.3673741,\n", - " '##ws': 0.33763555,\n", - " '10': 0.51392806,\n", - " '200': 0.73087466,\n", - " '30': 0.45019,\n", - " '60': 1.3045075,\n", - " '[UNK]': 0.2956499,\n", - " '_': 0.33742356,\n", - " 'acceptable': 0.29635867,\n", - " 'access': 0.23300913,\n", - " 'accounting': 0.1906402,\n", - " 'achieve': 0.19722655,\n", - " 'algorithm': 1.1037958,\n", - " 'algorithms': 0.26360378,\n", - " 'allocation': 0.53156596,\n", - " 'analysis': 0.41347402,\n", - " 'apache': 0.54295164,\n", - " 'api': 0.21713388,\n", - " 'approximate': 0.51163644,\n", - " 'arithmetic': 0.005784557,\n", - " 'availability': 0.4917338,\n", - " 'average': 0.8478212,\n", - " 'batch': 0.08666975,\n", - " 'blocking': 0.02501016,\n", - " 'bot': 0.06050198,\n", - " 'buffer': 0.40386045,\n", - " 'bug': 0.055751722,\n", - " 'busy': 1.3026394,\n", - " 'calculate': 0.26999432,\n", - " 'calculation': 0.74316484,\n", - " 'capacity': 0.6725085,\n", - " 'chess': 0.25134456,\n", - " 'class': 0.328252,\n", - " 'client': 0.23896244,\n", - " 'clock': 1.125488,\n", - " 'cluster': 0.5103067,\n", - " 'component': 0.2536751,\n", - " 'components': 0.78435194,\n", - " 'computation': 0.62016183,\n", - " 'compute': 0.06482519,\n", - " 'computer': 0.32330835,\n", - " 'concurrency': 0.011380989,\n", - " 'configuration': 0.6887391,\n", - " 'configured': 0.26263618,\n", - " 'constant': 0.29082793,\n", - " 'consumption': 0.16989039,\n", - " 'cpu': 0.3717718,\n", - " 'database': 0.13461274,\n", - " 'e': 0.7789312,\n", - " 'effect': 0.09419204,\n", - " 'effort': 0.055172946,\n", - " 'employee': 0.3274528,\n", - " 'employees': 0.14320064,\n", - " 'ensemble': 0.19942468,\n", - " 'equation': 0.3787911,\n", - " 'equivalent': 0.050270963,\n", - " 'error': 0.12898737,\n", - " 'es': 0.043630168,\n", - " 'est': 0.20599021,\n", - " 'estimate': 1.0792123,\n", - " 'estimated': 0.39457676,\n", - " 'estimates': 0.465428,\n", - " 'estimation': 0.080784135,\n", - " 'every': 0.16873945,\n", - " 'excess': 1.0022457,\n", - " 'excessive': 0.451759,\n", - " 'execute': 0.59175754,\n", - " 'executing': 0.091966435,\n", - " 'execution': 1.3065349,\n", - " 'existing': 0.6437884,\n", - " 'exponential': 1.1467187,\n", - " 'extra': 0.26056916,\n", - " 'figure': 0.019528389,\n", - " 'finish': 0.012790194,\n", - " 'finished': 0.21236378,\n", - " 'flow': 0.10995065,\n", - " 'g': 0.43504617,\n", - " 'gage': 0.4229588,\n", - " 'group': 0.43960038,\n", - " 'guild': 0.014967873,\n", - " 'handle': 0.80899215,\n", - " 'handling': 0.7681083,\n", - " 'heap': 0.3867438,\n", - " 'hours': 0.7462872,\n", - " 'http': 0.20072725,\n", - " 'implement': 0.16245411,\n", - " 'implementation': 0.2408709,\n", - " 'improve': 0.10136651,\n", - " 'index': 1.2976965,\n", - " 'indexed': 0.10614389,\n", - " 'ing': 1.2063053,\n", - " 'inventory': 0.25356865,\n", - " 'java': 1.2153534,\n", - " 'l': 0.48968774,\n", - " 'lake': 0.27167574,\n", - " 'lane': 0.54473066,\n", - " 'length': 0.64622724,\n", - " 'library': 0.08392323,\n", - " 'line': 0.5581907,\n", - " 'load': 1.5088638,\n", - " 'loading': 0.5335804,\n", - " 'machine': 0.3173762,\n", - " 'manage': 0.5220977,\n", - " 'managed': 0.45824686,\n", - " 'management': 0.3230387,\n", - " 'mass': 0.15742503,\n", - " 'math': 0.81244004,\n", - " 'maximum': 0.34374076,\n", - " 'measure': 0.25600985,\n", - " 'memory': 0.5085309,\n", - " 'mining': 0.4451848,\n", - " 'minute': 0.39483455,\n", - " 'minutes': 0.22895378,\n", - " 'moving': 0.76410496,\n", - " 'mp': 0.046217,\n", - " 'multiple': 0.10666605,\n", - " 'n': 0.5416694,\n", - " 'network': 0.3097243,\n", - " 'new': 0.49582836,\n", - " 'node': 1.1907045,\n", - " 'number': 0.47905272,\n", - " 'o': 0.47123736,\n", - " 'operation': 0.19577809,\n", - " 'optimal': 0.1733028,\n", - " 'par': 0.09612937,\n", - " 'percent': 0.1152151,\n", - " 'performance': 0.74001515,\n", - " 'pool': 1.7006081,\n", - " 'poole': 0.36192703,\n", - " 'pools': 1.0764378,\n", - " 'predict': 0.38117534,\n", - " 'probe': 0.2430691,\n", - " 'process': 0.12230635,\n", - " 'processing': 0.47061718,\n", - " 'proportion': 0.2145018,\n", - " 'proportional': 1.1204233,\n", - " 'proposal': 0.1401456,\n", - " 'q': 0.3259466,\n", - " 'queue': 1.580318,\n", - " 'r': 0.14266703,\n", - " 'rank': 0.13613336,\n", - " 'rate': 0.39469108,\n", - " 'request': 1.1001134,\n", - " 'requests': 0.63539153,\n", - " 'resolution': 0.055606272,\n", - " 'resource': 0.21417612,\n", - " 'resources': 0.7937882,\n", - " 'routing': 0.14261606,\n", - " 'sample': 1.0720835,\n", - " 'sampled': 1.0306277,\n", - " 'samples': 1.2079935,\n", - " 'sampling': 0.6740413,\n", - " 'scala': 0.07395835,\n", - " 'script': 0.10171158,\n", - " 'second': 0.18827602,\n", - " 'seconds': 0.817573,\n", - " 'sequence': 0.49634397,\n", - " 'serial': 0.033651996,\n", - " 'server': 0.32002103,\n", - " 'share': 0.27626935,\n", - " 'sid': 0.27850676,\n", - " 'size': 0.11843514,\n", - " 'small': 0.75451213,\n", - " 'speed': 0.30091006,\n", - " 'sql': 0.31397846,\n", - " 'statistical': 0.0100006005,\n", - " 'strategy': 0.08963276,\n", - " 'stream': 0.028335843,\n", - " 'sum': 1.1407199,\n", - " 'surplus': 0.15598625,\n", - " 'swarm': 0.054142684,\n", - " 'task': 1.2177191,\n", - " 'tasks': 1.0780356,\n", - " 'taylor': 0.24217507,\n", - " 'technique': 0.0030198945,\n", - " 'thread': 1.7842301,\n", - " 'threads': 0.9916815,\n", - " 'time': 0.9839317,\n", - " 'timer': 0.19039534,\n", - " 'times': 0.5299459,\n", - " 'total': 0.40682667,\n", - " 'traffic': 0.28910428,\n", - " 'universe': 0.013594781,\n", - " 'usage': 0.5520448,\n", - " 'utilization': 1.6104044,\n", - " 'value': 0.6036144,\n", - " 'values': 0.33944046,\n", - " 'w': 0.4972394,\n", - " 'wait': 0.005872378,\n", - " 'wall': 1.1351137,\n", - " 'weaving': 0.13777943,\n", - " 'web': 0.2821159,\n", - " 'weighted': 1.1533256,\n", - " 'worker': 1.0417976,\n", - " 'workers': 1.2245823,\n", - " 'z': 0.29032487},\n", - " 'text': 'r '\n", - " 'e '\n", - " 'a '\n", - " 'd '\n", - " '_ '\n", - " 'p '\n", - " 'o '\n", - " 'o '\n", - " 'l '\n", - " '_ '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'l '\n", - " 'i '\n", - " 'z '\n", - " 'a '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '+ '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 't '\n", - " 'o '\n", - " 't '\n", - " 'a '\n", - " 'l '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " '\\\\small '\n", - " 'node\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\sum(thread\\\\_pool\\\\_utilization '\n", - " '+ '\n", - " 'queued\\\\_ingestion\\\\_load) '\n", - " '\\\\newline '\n", - " 'total\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\sum(node\\\\_ingestion\\\\_load) '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 't '\n", - " 'h '\n", - " 're '\n", - " 'a '\n", - " 'd '\n", - " '_ '\n", - " 'p '\n", - " 'oo '\n", - " 'l '\n", - " '_ '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'l '\n", - " 'i '\n", - " 'z '\n", - " 'a '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '+ '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 't '\n", - " 'o '\n", - " 't '\n", - " 'a '\n", - " 'l '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '∑ '\n", - " '( '\n", - " 'n '\n", - " 'o '\n", - " 'd '\n", - " 'e '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " ') '\n", - " 'Figure '\n", - " '2 '\n", - " ': '\n", - " 'ingestion '\n", - " 'load '\n", - " 'components '\n", - " 'The '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'is '\n", - " 'an '\n", - " 'exponentially '\n", - " 'weighted '\n", - " 'moving '\n", - " 'average '\n", - " '(EWMA) '\n", - " 'of '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'busy '\n", - " 'threads '\n", - " 'in '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'sampled '\n", - " 'every '\n", - " 'second. '\n", - " 'The '\n", - " 'EWMA '\n", - " 'of '\n", - " 'the '\n", - " 'sampled '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'values '\n", - " 'is '\n", - " 'configured '\n", - " 'such '\n", - " 'that '\n", - " 'the '\n", - " 'sampled '\n", - " 'values '\n", - " 'of '\n", - " 'the '\n", - " 'past '\n", - " '10 '\n", - " 'seconds '\n", - " 'have '\n", - " 'the '\n", - " 'most '\n", - " 'effect '\n", - " 'on '\n", - " 'the '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'component '\n", - " 'of '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'samples '\n", - " 'older '\n", - " 'than '\n", - " '60 '\n", - " 'seconds '\n", - " 'have '\n", - " 'very '\n", - " 'negligible '\n", - " 'impact. '\n", - " 'To '\n", - " 'estimate '\n", - " 'the '\n", - " 'resources '\n", - " 'required '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'queued '\n", - " 'indexing '\n", - " 'requests '\n", - " 'in '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'we '\n", - " 'need '\n", - " 'to '\n", - " 'have '\n", - " 'an '\n", - " 'estimate '\n", - " 'for '\n", - " 'how '\n", - " 'long '\n", - " 'each '\n", - " 'queued '\n", - " 'task '\n", - " 'can '\n", - " 'take '\n", - " 'to '\n", - " 'execute. '\n", - " 'To '\n", - " 'achieve '\n", - " 'this, '\n", - " 'each '\n", - " 'thread '\n", - " 'pool '\n", - " 'also '\n", - " 'provides '\n", - " 'an '\n", - " 'EWMA '\n", - " 'of '\n", - " 'the '\n", - " 'request '\n", - " 'execution '\n", - " 'time. '\n", - " 'The '\n", - " 'request '\n", - " 'execution '\n", - " 'time '\n", - " 'for '\n", - " 'an '\n", - " 'indexing '\n", - " 'request '\n", - " 'is '\n", - " 'the '\n", - " '(wall-clock) '\n", - " 'time '\n", - " 'taken '\n", - " 'for '\n", - " 'the '\n", - " 'request '\n", - " 'to '\n", - " 'finish '\n", - " 'once '\n", - " 'it '\n", - " 'is '\n", - " 'out '\n", - " 'of '\n", - " 'the '\n", - " 'queue '\n", - " 'and '\n", - " 'a '\n", - " 'worker '\n", - " 'thread '\n", - " 'starts '\n", - " 'executing '\n", - " 'it. '\n", - " 'As '\n", - " 'some '\n", - " 'queueing '\n", - " 'is '\n", - " 'acceptable '\n", - " 'and '\n", - " 'should '\n", - " 'be '\n", - " 'manageable '\n", - " 'by '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'we '\n", - " 'try '\n", - " 'to '\n", - " 'estimate '\n", - " 'the '\n", - " 'resources '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'excess '\n", - " 'queueing. '\n", - " 'We '\n", - " 'consider '\n", - " 'up '\n", - " 'to '\n", - " '30s '\n", - " 'worth '\n", - " 'of '\n", - " 'tasks '\n", - " 'in '\n", - " 'the '\n", - " 'queue '\n", - " 'manageable '\n", - " 'by '\n", - " 'the '\n", - " 'existing '\n", - " 'number '\n", - " 'of '\n", - " 'workers '\n", - " 'and '\n", - " 'account '\n", - " 'for '\n", - " 'an '\n", - " 'extra '\n", - " 'thread '\n", - " 'proportional '\n", - " 'to '\n", - " 'this '\n", - " 'value. '\n", - " 'For '\n", - " 'example, '\n", - " 'if '\n", - " 'the '\n", - " 'average '\n", - " 'task '\n", - " 'execution '\n", - " 'time '\n", - " 'is '\n", - " '200ms, '\n", - " 'we '\n", - " 'estimate '\n", - " 'that'},\n", - " {'embeddings': {'##d': 0.06352329,\n", - " '##est': 0.89852107,\n", - " '##estinal': 0.13183321,\n", - " '##ima': 0.40056115,\n", - " '##ing': 0.61320734,\n", - " '##ion': 0.72260284,\n", - " '##ling': 0.8949169,\n", - " '##load': 0.57369965,\n", - " '##m': 0.23721623,\n", - " '##ma': 1.4438714,\n", - " '##mas': 0.24820994,\n", - " '##mat': 0.24343531,\n", - " '##sca': 0.92204034,\n", - " '##w': 1.6598973,\n", - " '##ws': 0.6782139,\n", - " '10': 0.7749067,\n", - " '150': 1.2471286,\n", - " '200': 0.58304185,\n", - " '30': 1.076181,\n", - " '60': 1.1588365,\n", - " '_': 0.17651597,\n", - " 'acceptable': 0.0395143,\n", - " 'access': 0.05357292,\n", - " 'accounting': 0.22549874,\n", - " 'achieve': 0.040418815,\n", - " 'algorithm': 0.9928478,\n", - " 'algorithms': 0.08838318,\n", - " 'allocation': 0.7647576,\n", - " 'analysis': 0.428812,\n", - " 'apache': 0.5859765,\n", - " 'api': 0.016843364,\n", - " 'approximate': 0.21684457,\n", - " 'arithmetic': 0.053462975,\n", - " 'array': 0.066098064,\n", - " 'auto': 0.53497416,\n", - " 'automatic': 0.20355695,\n", - " 'availability': 0.6690054,\n", - " 'average': 1.0341543,\n", - " 'blocking': 0.1431715,\n", - " 'buffer': 0.46087772,\n", - " 'bug': 0.23163809,\n", - " 'busy': 1.3082193,\n", - " 'calculate': 0.2015065,\n", - " 'calculation': 0.71491575,\n", - " 'capacity': 0.8027149,\n", - " 'checkpoint': 0.10162155,\n", - " 'chess': 0.26765594,\n", - " 'class': 0.5377411,\n", - " 'client': 0.028412435,\n", - " 'clock': 0.81897706,\n", - " 'cluster': 0.6336233,\n", - " 'component': 1.2550238,\n", - " 'components': 1.4753778,\n", - " 'computation': 0.5360401,\n", - " 'compute': 0.09496682,\n", - " 'computer': 0.48583803,\n", - " 'computers': 0.082595915,\n", - " 'computing': 0.0053236387,\n", - " 'concept': 0.09244595,\n", - " 'concurrency': 0.080570355,\n", - " 'configuration': 0.63552403,\n", - " 'configured': 0.49945095,\n", - " 'constant': 0.15874276,\n", - " 'consumption': 0.3705247,\n", - " 'count': 0.15291668,\n", - " 'cpu': 0.4727478,\n", - " 'data': 0.5534523,\n", - " 'database': 0.24513115,\n", - " 'definition': 0.25252765,\n", - " 'dew': 0.027248075,\n", - " 'disadvantage': 0.043538865,\n", - " 'disk': 1.0258542,\n", - " 'during': 0.024176076,\n", - " 'e': 1.3067937,\n", - " 'each': 0.01788934,\n", - " 'ec': 0.5695534,\n", - " 'ee': 0.08090695,\n", - " 'effect': 0.33151782,\n", - " 'employee': 0.14918438,\n", - " 'employees': 0.026578736,\n", - " 'equation': 0.42684066,\n", - " 'es': 0.18498634,\n", - " 'est': 0.098570675,\n", - " 'estimate': 0.83097947,\n", - " 'estimated': 0.19130428,\n", - " 'estimates': 0.04933924,\n", - " 'every': 0.384432,\n", - " 'excess': 0.44124436,\n", - " 'execute': 0.56965685,\n", - " 'execution': 1.092663,\n", - " 'exponential': 1.2772857,\n", - " 'extra': 0.3341091,\n", - " 'finish': 0.47172138,\n", - " 'finished': 0.5516902,\n", - " 'flow': 0.1065439,\n", - " 'fra': 0.5131407,\n", - " 'gage': 0.41627494,\n", - " 'group': 0.40121686,\n", - " 'handle': 0.76723486,\n", - " 'handling': 0.8265911,\n", - " 'hardware': 0.007931168,\n", - " 'heap': 0.055197764,\n", - " 'hours': 0.5783272,\n", - " 'http': 0.16334121,\n", - " 'implement': 0.20851848,\n", - " 'improve': 0.033503063,\n", - " 'index': 1.351592,\n", - " 'indexed': 1.2516088,\n", - " 'ing': 1.2539797,\n", - " 'inventory': 0.26884475,\n", - " 'io': 0.49151403,\n", - " 'is': 0.67021686,\n", - " 'items': 0.30828458,\n", - " 'java': 1.233984,\n", - " 'lake': 0.37700737,\n", - " 'lane': 0.35798323,\n", - " 'lang': 0.11334816,\n", - " 'length': 0.39039937,\n", - " 'library': 0.0020271246,\n", - " 'load': 1.839116,\n", - " 'loading': 0.52925104,\n", - " 'log': 0.026120221,\n", - " 'ma': 0.37466413,\n", - " 'machine': 0.41295668,\n", - " 'managed': 0.016499385,\n", - " 'management': 0.24261811,\n", - " 'many': 0.0001822544,\n", - " 'map': 0.16712263,\n", - " 'mat': 0.08338378,\n", - " 'math': 0.69625205,\n", - " 'maximum': 0.34880605,\n", - " 'mb': 0.37918818,\n", - " 'measure': 0.14309268,\n", - " 'memory': 0.58699423,\n", - " 'metric': 0.113157846,\n", - " 'mill': 0.087879546,\n", - " 'minimum': 0.042228475,\n", - " 'mining': 0.31173173,\n", - " 'minute': 0.2855463,\n", - " 'minutes': 0.037687548,\n", - " 'mm': 0.04705554,\n", - " 'move': 0.24638273,\n", - " 'moving': 1.068798,\n", - " 'mp': 0.339956,\n", - " 'mt': 0.18115476,\n", - " 'multi': 0.045562405,\n", - " 'multiple': 0.2256053,\n", - " 'n': 0.20722932,\n", - " 'network': 0.2870649,\n", - " 'node': 0.74391615,\n", - " 'nodes': 0.40956134,\n", - " 'number': 0.5414315,\n", - " 'object': 0.36274558,\n", - " 'old': 0.026420968,\n", - " 'older': 0.14505674,\n", - " 'operation': 0.137978,\n", - " 'optimal': 0.03703803,\n", - " 'par': 0.0058114612,\n", - " 'parts': 0.011510156,\n", - " 'past': 0.25731233,\n", - " 'percent': 0.35817072,\n", - " 'performance': 0.801656,\n", - " 'pool': 1.8708751,\n", - " 'poole': 0.2727913,\n", - " 'pools': 1.2964886,\n", - " 'population': 0.11810607,\n", - " 'predict': 0.18177378,\n", - " 'probe': 0.21369988,\n", - " 'processing': 0.4105097,\n", - " 'proportional': 0.6098035,\n", - " 'q': 0.13568267,\n", - " 'queue': 1.2824515,\n", - " 'rank': 0.40675223,\n", - " 'rate': 0.46714726,\n", - " 'request': 0.949167,\n", - " 'requests': 0.6644938,\n", - " 'requirements': 0.3288823,\n", - " 'resource': 0.4609863,\n", - " 'resources': 0.9455237,\n", - " 'routing': 0.18650433,\n", - " 'sample': 1.0472832,\n", - " 'sampled': 0.8309003,\n", - " 'samples': 1.1415888,\n", - " 'sampling': 0.45636305,\n", - " 'scala': 0.12271185,\n", - " 'scale': 0.3144392,\n", - " 'second': 0.49777645,\n", - " 'seconds': 0.7695267,\n", - " 'sequence': 0.21608938,\n", - " 'serial': 0.049026124,\n", - " 'server': 0.37191278,\n", - " 'share': 0.19251333,\n", - " 'si': 0.020900367,\n", - " 'sid': 0.41317028,\n", - " 'size': 0.7470095,\n", - " 'sizes': 0.060290556,\n", - " 'small': 0.015217632,\n", - " 'speed': 0.21846266,\n", - " 'sql': 0.39542097,\n", - " 'stack': 0.047259662,\n", - " 'start': 0.15702806,\n", - " 'statistical': 0.031916108,\n", - " 'statistics': 0.08593676,\n", - " 'storage': 0.034532573,\n", - " 'store': 0.053150244,\n", - " 'survey': 0.1747176,\n", - " 'system': 0.08567025,\n", - " 'table': 0.006464522,\n", - " 'task': 1.1504556,\n", - " 'tasks': 0.7951614,\n", - " 'taylor': 0.14394312,\n", - " 'term': 0.63525033,\n", - " 'thirty': 0.26077473,\n", - " 'thread': 2.0543768,\n", - " 'threads': 1.1089593,\n", - " 'tier': 1.207179,\n", - " 'time': 0.68932414,\n", - " 'timer': 0.14907645,\n", - " 'times': 0.32087305,\n", - " 'total': 0.22359692,\n", - " 'traffic': 0.26179498,\n", - " 'trial': 0.2198535,\n", - " 'u': 0.064360306,\n", - " 'unit': 0.13278264,\n", - " 'usage': 0.6241088,\n", - " 'utilization': 1.6971744,\n", - " 'value': 0.66488856,\n", - " 'values': 0.2064584,\n", - " 'w': 0.81893605,\n", - " 'wait': 0.103130125,\n", - " 'wall': 1.0635448,\n", - " 'weaving': 0.07162173,\n", - " 'web': 0.23646998,\n", - " 'weight': 0.030211551,\n", - " 'weighted': 1.2184887,\n", - " 'work': 0.23164386,\n", - " 'worker': 0.7420831,\n", - " 'workers': 1.0619413,\n", - " 'ze': 0.40276462},\n", - " 'text': 'load '\n", - " 'components '\n", - " 'The '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'is '\n", - " 'an '\n", - " 'exponentially '\n", - " 'weighted '\n", - " 'moving '\n", - " 'average '\n", - " '(EWMA) '\n", - " 'of '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'busy '\n", - " 'threads '\n", - " 'in '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'sampled '\n", - " 'every '\n", - " 'second. '\n", - " 'The '\n", - " 'EWMA '\n", - " 'of '\n", - " 'the '\n", - " 'sampled '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'values '\n", - " 'is '\n", - " 'configured '\n", - " 'such '\n", - " 'that '\n", - " 'the '\n", - " 'sampled '\n", - " 'values '\n", - " 'of '\n", - " 'the '\n", - " 'past '\n", - " '10 '\n", - " 'seconds '\n", - " 'have '\n", - " 'the '\n", - " 'most '\n", - " 'effect '\n", - " 'on '\n", - " 'the '\n", - " 'thread '\n", - " 'pool '\n", - " 'utilization '\n", - " 'component '\n", - " 'of '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'samples '\n", - " 'older '\n", - " 'than '\n", - " '60 '\n", - " 'seconds '\n", - " 'have '\n", - " 'very '\n", - " 'negligible '\n", - " 'impact. '\n", - " 'To '\n", - " 'estimate '\n", - " 'the '\n", - " 'resources '\n", - " 'required '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'queued '\n", - " 'indexing '\n", - " 'requests '\n", - " 'in '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'we '\n", - " 'need '\n", - " 'to '\n", - " 'have '\n", - " 'an '\n", - " 'estimate '\n", - " 'for '\n", - " 'how '\n", - " 'long '\n", - " 'each '\n", - " 'queued '\n", - " 'task '\n", - " 'can '\n", - " 'take '\n", - " 'to '\n", - " 'execute. '\n", - " 'To '\n", - " 'achieve '\n", - " 'this, '\n", - " 'each '\n", - " 'thread '\n", - " 'pool '\n", - " 'also '\n", - " 'provides '\n", - " 'an '\n", - " 'EWMA '\n", - " 'of '\n", - " 'the '\n", - " 'request '\n", - " 'execution '\n", - " 'time. '\n", - " 'The '\n", - " 'request '\n", - " 'execution '\n", - " 'time '\n", - " 'for '\n", - " 'an '\n", - " 'indexing '\n", - " 'request '\n", - " 'is '\n", - " 'the '\n", - " '(wall-clock) '\n", - " 'time '\n", - " 'taken '\n", - " 'for '\n", - " 'the '\n", - " 'request '\n", - " 'to '\n", - " 'finish '\n", - " 'once '\n", - " 'it '\n", - " 'is '\n", - " 'out '\n", - " 'of '\n", - " 'the '\n", - " 'queue '\n", - " 'and '\n", - " 'a '\n", - " 'worker '\n", - " 'thread '\n", - " 'starts '\n", - " 'executing '\n", - " 'it. '\n", - " 'As '\n", - " 'some '\n", - " 'queueing '\n", - " 'is '\n", - " 'acceptable '\n", - " 'and '\n", - " 'should '\n", - " 'be '\n", - " 'manageable '\n", - " 'by '\n", - " 'the '\n", - " 'thread '\n", - " 'pool, '\n", - " 'we '\n", - " 'try '\n", - " 'to '\n", - " 'estimate '\n", - " 'the '\n", - " 'resources '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'excess '\n", - " 'queueing. '\n", - " 'We '\n", - " 'consider '\n", - " 'up '\n", - " 'to '\n", - " '30s '\n", - " 'worth '\n", - " 'of '\n", - " 'tasks '\n", - " 'in '\n", - " 'the '\n", - " 'queue '\n", - " 'manageable '\n", - " 'by '\n", - " 'the '\n", - " 'existing '\n", - " 'number '\n", - " 'of '\n", - " 'workers '\n", - " 'and '\n", - " 'account '\n", - " 'for '\n", - " 'an '\n", - " 'extra '\n", - " 'thread '\n", - " 'proportional '\n", - " 'to '\n", - " 'this '\n", - " 'value. '\n", - " 'For '\n", - " 'example, '\n", - " 'if '\n", - " 'the '\n", - " 'average '\n", - " 'task '\n", - " 'execution '\n", - " 'time '\n", - " 'is '\n", - " '200ms, '\n", - " 'we '\n", - " 'estimate '\n", - " 'that '\n", - " 'each '\n", - " 'thread '\n", - " 'is '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " '150 '\n", - " 'indexing '\n", - " 'requests '\n", - " 'within '\n", - " '30s, '\n", - " 'and '\n", - " 'therefore '\n", - " 'account '\n", - " 'for '\n", - " 'one '\n", - " 'extra '\n", - " 'thread '\n", - " 'for '\n", - " 'each '\n", - " '150 '\n", - " 'queued '\n", - " 'items. '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " '_ '\n", - " 's '\n", - " 'i '\n", - " 'z '\n", - " 'e '\n", - " '× '\n", - " 'a '\n", - " 'v '\n", - " 'e '\n", - " 'r '\n", - " 'a '\n", - " 'g '\n", - " 'e '\n", - " '_ '\n", - " 'r '\n", - " 'e '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " '_ '\n", - " 'e '\n", - " 'x '\n", - " 'e '\n", - " 'c '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 't '\n", - " 'i '\n", - " 'm '\n", - " 'e '\n", - " '30 '\n", - " 's '\n", - " '\\\\small '\n", - " 'queued\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\frac{queue\\\\_size '\n", - " '\\\\times '\n", - " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '30 '\n", - " 's '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " '_ '\n", - " 's '\n", - " 'i '\n", - " 'ze '\n", - " '× '\n", - " 'a '\n", - " 'v '\n", - " 'er '\n", - " 'a '\n", - " 'g '\n", - " 'e '\n", - " '_ '\n", - " 're '\n", - " 'q '\n", - " 'u '\n", - " 'es '\n", - " 't '\n", - " '_ '\n", - " 'e '\n", - " 'x '\n", - " 'ec '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 't '\n", - " 'im '\n", - " 'e '\n", - " '\\u200b '\n", - " 'Note '\n", - " 'that '\n", - " 'since '\n", - " 'the '\n", - " 'indexing '\n", - " 'nodes '\n", - " 'rely '\n", - " 'on '\n", - " 'pushing '\n", - " 'indexed '\n", - " 'data '\n", - " 'into '\n", - " 'the '\n", - " 'object '\n", - " 'store '\n", - " 'periodically, '\n", - " 'we '\n", - " 'do '\n", - " 'not '\n", - " 'need '\n", - " 'to '\n", - " 'scale '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'total '\n", - " 'size '\n", - " 'of '\n", - " 'the '\n", - " 'indexed '\n", - " 'data. '\n", - " 'However, '\n", - " 'the '\n", - " 'disk '\n", - " 'IO '\n", - " 'requirements '\n", - " 'of '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload '\n", - " 'needs '\n", - " 'to '\n", - " 'be '\n", - " 'considered '\n", - " 'for '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'decisions. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'represents'},\n", - " {'embeddings': {'##d': 0.38506436,\n", - " '##est': 0.8363302,\n", - " '##frame': 0.039107077,\n", - " '##ing': 1.0441189,\n", - " '##ion': 1.1721121,\n", - " '##ler': 1.0595164,\n", - " '##ling': 0.99718106,\n", - " '##load': 0.8622203,\n", - " '##s': 0.26257822,\n", - " '##sca': 1.4883617,\n", - " '(': 0.04112861,\n", - " '120': 0.10787471,\n", - " '150': 1.5649581,\n", - " '200': 0.78864884,\n", - " '30': 1.3745978,\n", - " '300': 0.21148267,\n", - " '50': 0.031711366,\n", - " '500': 0.8493792,\n", - " '_': 0.24777141,\n", - " 'accounting': 0.64968836,\n", - " 'additional': 0.3232339,\n", - " 'algorithm': 1.0360106,\n", - " 'algorithms': 0.20798434,\n", - " 'analysis': 0.25909927,\n", - " 'analyze': 0.18533573,\n", - " 'apache': 0.8096589,\n", - " 'api': 1.3224775,\n", - " 'approximate': 0.0154337585,\n", - " 'array': 0.23401959,\n", - " 'auto': 1.4535567,\n", - " 'automatic': 0.7868701,\n", - " 'availability': 0.21982048,\n", - " 'available': 0.030020691,\n", - " 'average': 0.098859586,\n", - " 'basic': 0.2743477,\n", - " 'blocking': 0.10501332,\n", - " 'bot': 0.07765888,\n", - " 'buffer': 0.36042303,\n", - " 'calculate': 0.21506485,\n", - " 'calculation': 0.81758976,\n", - " 'capacity': 0.58354694,\n", - " 'cassandra': 0.22208737,\n", - " 'checkpoint': 0.031537656,\n", - " 'chess': 0.6237735,\n", - " 'class': 0.439471,\n", - " 'clock': 0.54654706,\n", - " 'cluster': 1.4933486,\n", - " 'cod': 0.12783043,\n", - " 'computation': 0.39954206,\n", - " 'compute': 0.042445127,\n", - " 'computer': 0.13797997,\n", - " 'constant': 0.2067099,\n", - " 'cpu': 0.5182024,\n", - " 'crawl': 0.22104222,\n", - " 'data': 0.51176333,\n", - " 'database': 0.440294,\n", - " 'determined': 0.23795621,\n", - " 'disk': 0.5893501,\n", - " 'e': 0.05990428,\n", - " 'each': 0.46478215,\n", - " 'equation': 0.008288982,\n", - " 'er': 0.43452957,\n", - " 'es': 0.14311427,\n", - " 'estimate': 0.25439763,\n", - " 'every': 0.1305604,\n", - " 'execution': 0.7186893,\n", - " 'exposed': 0.23602542,\n", - " 'extra': 0.7385199,\n", - " 'fixed': 0.11877214,\n", - " 'forum': 0.3137529,\n", - " 'fra': 1.0726693,\n", - " 'fragment': 0.030604606,\n", - " 'g': 0.026902322,\n", - " 'gage': 0.12548852,\n", - " 'guild': 0.27722847,\n", - " 'handle': 0.8976072,\n", - " 'handling': 0.69513077,\n", - " 'heap': 0.26846212,\n", - " 'hours': 0.7121461,\n", - " 'http': 0.10318518,\n", - " 'index': 1.6740144,\n", - " 'indexed': 1.1180266,\n", - " 'indices': 0.88624585,\n", - " 'ing': 1.10228,\n", - " 'integer': 0.2208937,\n", - " 'inventory': 0.44952998,\n", - " 'io': 0.85926545,\n", - " 'item': 0.48019466,\n", - " 'items': 0.7935411,\n", - " 'java': 1.237859,\n", - " 'lane': 0.39564016,\n", - " 'length': 0.47680393,\n", - " 'limit': 0.4967848,\n", - " 'load': 1.2765044,\n", - " 'loading': 0.25379905,\n", - " 'm': 0.06343312,\n", - " 'machine': 0.19301167,\n", - " 'maintenance': 0.23043938,\n", - " 'map': 0.07359305,\n", - " 'mass': 0.08436136,\n", - " 'master': 1.1724675,\n", - " 'matching': 0.044185776,\n", - " 'math': 0.71257645,\n", - " 'max': 0.16343911,\n", - " 'maximum': 0.8216195,\n", - " 'mb': 0.74474645,\n", - " 'measure': 0.22327076,\n", - " 'memory': 1.4785702,\n", - " 'metadata': 0.8341058,\n", - " 'metric': 0.9043063,\n", - " 'minimal': 0.36312523,\n", - " 'minimum': 1.0762551,\n", - " 'mining': 0.6374103,\n", - " 'mp': 0.18194582,\n", - " 'multi': 0.19790418,\n", - " 'multiple': 0.08082614,\n", - " 'n': 0.2315838,\n", - " 'network': 0.5508067,\n", - " 'node': 1.3963627,\n", - " 'nodes': 0.73737425,\n", - " 'number': 0.082121976,\n", - " 'o': 0.11493757,\n", - " 'object': 0.5812754,\n", - " 'par': 0.023205614,\n", - " 'per': 0.23101303,\n", - " 'performance': 0.23446344,\n", - " 'pool': 0.8049336,\n", - " 'pools': 0.15594147,\n", - " 'predict': 0.024841096,\n", - " 'processing': 0.36487442,\n", - " 'pushing': 0.20726342,\n", - " 'q': 0.8291657,\n", - " 'quarterly': 0.13623458,\n", - " 'queue': 1.481917,\n", - " 'rail': 0.078313634,\n", - " 'ram': 0.28152135,\n", - " 'rank': 0.3435108,\n", - " 'ratio': 0.06241234,\n", - " 're': 0.2784615,\n", - " 'regional': 0.34884617,\n", - " 'request': 0.99899644,\n", - " 'requests': 0.99197084,\n", - " 'requirement': 0.62241584,\n", - " 'requirements': 0.674187,\n", - " 'resolution': 0.02591185,\n", - " 'routing': 0.19566713,\n", - " 'scala': 0.17918167,\n", - " 'scale': 0.15746343,\n", - " 'seconds': 0.13917202,\n", - " 'semi': 0.23686175,\n", - " 'sequence': 0.5461212,\n", - " 'ser': 0.08773902,\n", - " 'serial': 0.29184434,\n", - " 'server': 0.5091232,\n", - " 'shards': 1.1462573,\n", - " 'sid': 0.5460215,\n", - " 'size': 0.5671189,\n", - " 'small': 0.1666983,\n", - " 'sort': 0.20719269,\n", - " 'sql': 0.21473138,\n", - " 'stack': 0.042597417,\n", - " 'statistics': 0.019139726,\n", - " 'storage': 0.11576759,\n", - " 'strategy': 0.06358851,\n", - " 'swarm': 0.08892168,\n", - " 't': 0.15734711,\n", - " 'task': 0.2625412,\n", - " 'taylor': 0.059171513,\n", - " 'thirty': 0.59235644,\n", - " 'thread': 1.7254765,\n", - " 'threads': 1.1326298,\n", - " 'tier': 2.0103586,\n", - " 'time': 0.5197543,\n", - " 'times': 0.19328791,\n", - " 'total': 0.9341554,\n", - " 'trial': 1.0915743,\n", - " 'ur': 0.041876547,\n", - " 'value': 0.39162463,\n", - " 'values': 0.10083909,\n", - " 'wall': 0.93653333,\n", - " 'web': 0.1397472,\n", - " 'weeks': 0.027450949,\n", - " 'within': 0.38789856,\n", - " 'work': 0.1474287,\n", - " 'workers': 0.30503651,\n", - " 'write': 0.33134767,\n", - " 'x': 0.027046092,\n", - " 'z': 0.06591661,\n", - " 'ze': 0.69916034},\n", - " 'text': 'each '\n", - " 'thread '\n", - " 'is '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " '150 '\n", - " 'indexing '\n", - " 'requests '\n", - " 'within '\n", - " '30s, '\n", - " 'and '\n", - " 'therefore '\n", - " 'account '\n", - " 'for '\n", - " 'one '\n", - " 'extra '\n", - " 'thread '\n", - " 'for '\n", - " 'each '\n", - " '150 '\n", - " 'queued '\n", - " 'items. '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'i '\n", - " 'n '\n", - " 'g '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " '_ '\n", - " 's '\n", - " 'i '\n", - " 'z '\n", - " 'e '\n", - " '× '\n", - " 'a '\n", - " 'v '\n", - " 'e '\n", - " 'r '\n", - " 'a '\n", - " 'g '\n", - " 'e '\n", - " '_ '\n", - " 'r '\n", - " 'e '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 's '\n", - " 't '\n", - " '_ '\n", - " 'e '\n", - " 'x '\n", - " 'e '\n", - " 'c '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 't '\n", - " 'i '\n", - " 'm '\n", - " 'e '\n", - " '30 '\n", - " 's '\n", - " '\\\\small '\n", - " 'queued\\\\_ingestion\\\\_load '\n", - " '= '\n", - " '\\\\frac{queue\\\\_size '\n", - " '\\\\times '\n", - " 'average\\\\_request\\\\_execution\\\\_time}{30s} '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " 'd '\n", - " '_ '\n", - " 'in '\n", - " 'g '\n", - " 'es '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 'l '\n", - " 'o '\n", - " 'a '\n", - " 'd '\n", - " '= '\n", - " '30 '\n", - " 's '\n", - " 'q '\n", - " 'u '\n", - " 'e '\n", - " 'u '\n", - " 'e '\n", - " '_ '\n", - " 's '\n", - " 'i '\n", - " 'ze '\n", - " '× '\n", - " 'a '\n", - " 'v '\n", - " 'er '\n", - " 'a '\n", - " 'g '\n", - " 'e '\n", - " '_ '\n", - " 're '\n", - " 'q '\n", - " 'u '\n", - " 'es '\n", - " 't '\n", - " '_ '\n", - " 'e '\n", - " 'x '\n", - " 'ec '\n", - " 'u '\n", - " 't '\n", - " 'i '\n", - " 'o '\n", - " 'n '\n", - " '_ '\n", - " 't '\n", - " 'im '\n", - " 'e '\n", - " '\\u200b '\n", - " 'Note '\n", - " 'that '\n", - " 'since '\n", - " 'the '\n", - " 'indexing '\n", - " 'nodes '\n", - " 'rely '\n", - " 'on '\n", - " 'pushing '\n", - " 'indexed '\n", - " 'data '\n", - " 'into '\n", - " 'the '\n", - " 'object '\n", - " 'store '\n", - " 'periodically, '\n", - " 'we '\n", - " 'do '\n", - " 'not '\n", - " 'need '\n", - " 'to '\n", - " 'scale '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'total '\n", - " 'size '\n", - " 'of '\n", - " 'the '\n", - " 'indexed '\n", - " 'data. '\n", - " 'However, '\n", - " 'the '\n", - " 'disk '\n", - " 'IO '\n", - " 'requirements '\n", - " 'of '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload '\n", - " 'needs '\n", - " 'to '\n", - " 'be '\n", - " 'considered '\n", - " 'for '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'decisions. '\n", - " 'The '\n", - " 'ingestion '\n", - " 'load '\n", - " 'represents '\n", - " 'both '\n", - " 'CPU '\n", - " 'requirements '\n", - " 'of '\n", - " 'the '\n", - " 'indexing '\n", - " 'nodes '\n", - " 'as '\n", - " 'well '\n", - " 'as '\n", - " 'disk '\n", - " 'IO '\n", - " 'since '\n", - " 'both '\n", - " 'CPU '\n", - " 'and '\n", - " 'IO '\n", - " 'work '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pool '\n", - " 'workers '\n", - " 'and '\n", - " 'we '\n", - " 'rely '\n", - " 'on '\n", - " 'the '\n", - " 'wall '\n", - " 'clock '\n", - " 'time '\n", - " 'to '\n", - " 'estimate '\n", - " 'the '\n", - " 'required '\n", - " 'time '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'queued '\n", - " 'requests. '\n", - " 'Each '\n", - " 'indexing '\n", - " 'node '\n", - " 'calculates '\n", - " 'its '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'publishes '\n", - " 'this '\n", - " 'value '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node '\n", - " 'periodically. '\n", - " 'The '\n", - " 'master '\n", - " 'node '\n", - " 'serves '\n", - " 'the '\n", - " 'per '\n", - " 'node '\n", - " 'ingestion '\n", - " 'load '\n", - " 'values '\n", - " 'via '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'to '\n", - " 'the '\n", - " 'autoscaler. '\n", - " 'Memory '\n", - " 'The '\n", - " 'memory '\n", - " 'metrics '\n", - " 'exposed '\n", - " 'by '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'are '\n", - " 'node '\n", - " 'memory '\n", - " 'and '\n", - " 'tier '\n", - " 'memory. '\n", - " 'The '\n", - " 'node '\n", - " 'memory '\n", - " 'represents '\n", - " 'the '\n", - " 'minimum '\n", - " 'memory '\n", - " 'requirement '\n", - " 'for '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'in '\n", - " 'the '\n", - " 'cluster. '\n", - " 'The '\n", - " 'tier '\n", - " 'memory '\n", - " 'metric '\n", - " 'represents '\n", - " 'the '\n", - " 'minimum '\n", - " 'total '\n", - " 'memory '\n", - " 'that '\n", - " 'should '\n", - " 'be '\n", - " 'available '\n", - " 'in '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier. '\n", - " 'Note '\n", - " 'that '\n", - " 'these '\n", - " 'values '\n", - " 'only '\n", - " 'indicate '\n", - " 'the '\n", - " 'minimum '\n", - " 'to '\n", - " 'ensure '\n", - " 'that '\n", - " 'each '\n", - " 'node '\n", - " 'is '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'basic '\n", - " 'indexing '\n", - " 'workload '\n", - " 'and '\n", - " 'hold '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'indices '\n", - " 'metadata, '\n", - " 'while '\n", - " 'ensuring '\n", - " 'that '\n", - " 'the '\n", - " 'tier '\n", - " 'includes '\n", - " 'enough '\n", - " 'nodes '\n", - " 'to '\n", - " 'accommodate '\n", - " 'all '\n", - " 'index '\n", - " 'shards. '\n", - " 'Node '\n", - " 'memory '\n", - " 'must '\n", - " 'have '\n", - " 'a '\n", - " 'minimum '\n", - " 'of '\n", - " '500MB '\n", - " 'to '\n", - " 'be '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " 'indexing '\n", - " 'workloads '\n", - " ', '\n", - " 'as '\n", - " 'well '\n", - " 'as '\n", - " 'a '\n", - " 'fixed '\n", - " 'amount '\n", - " 'of '\n", - " 'memory '\n", - " 'per '\n", - " 'each '\n", - " 'index '\n", - " '. '\n", - " 'This '\n", - " 'ensures '\n", - " 'all '\n", - " 'nodes '\n", - " 'can '\n", - " 'hold '\n", - " 'metadata '\n", - " 'for '\n", - " 'the '\n", - " 'cluster, '\n", - " 'which '\n", - " 'includes '\n", - " 'metadata '\n", - " 'for '\n", - " 'every '\n", - " 'index. '\n", - " 'Tier '\n", - " 'memory '\n", - " 'is '\n", - " 'determined '\n", - " 'by '\n", - " 'accounting '\n", - " 'for '\n", - " 'the '\n", - " 'memory'},\n", - " {'embeddings': {'##d': 0.055720266,\n", - " '##est': 0.87620574,\n", - " '##ging': 0.12167851,\n", - " '##id': 0.007303444,\n", - " '##ing': 1.0664626,\n", - " '##ion': 0.5800176,\n", - " '##ler': 1.1925261,\n", - " '##ling': 1.0163201,\n", - " '##load': 0.81047934,\n", - " '##mb': 0.41285288,\n", - " '##rch': 0.9021695,\n", - " '##rd': 1.5396098,\n", - " '##rds': 0.47700712,\n", - " '##s': 0.033316635,\n", - " '##sca': 1.5766962,\n", - " '##sea': 1.0991455,\n", - " '500': 0.8151243,\n", - " '6': 0.5519658,\n", - " 'accounting': 0.74103206,\n", - " 'algorithm': 1.0231093,\n", - " 'algorithms': 0.065428115,\n", - " 'allocated': 0.19617477,\n", - " 'amazon': 0.31502825,\n", - " 'analysis': 0.5597703,\n", - " 'analyze': 0.30770445,\n", - " 'apache': 0.8908353,\n", - " 'api': 1.1461797,\n", - " 'approximate': 0.21645284,\n", - " 'archive': 0.013153568,\n", - " 'array': 0.047213156,\n", - " 'auto': 1.3802772,\n", - " 'automatic': 0.7499421,\n", - " 'availability': 0.10610637,\n", - " 'basic': 0.5700848,\n", - " 'blocking': 0.03154505,\n", - " 'bot': 0.2956401,\n", - " 'brain': 0.13824557,\n", - " 'brick': 0.34880513,\n", - " 'broken': 0.1587869,\n", - " 'buffer': 0.27810082,\n", - " 'bug': 0.019329984,\n", - " 'cad': 0.010832788,\n", - " 'calculate': 0.71264565,\n", - " 'calculated': 0.19991197,\n", - " 'calculation': 0.90854484,\n", - " 'capacity': 0.13310817,\n", - " 'cassandra': 0.269642,\n", - " 'checkpoint': 0.33004454,\n", - " 'chess': 0.6517597,\n", - " 'class': 0.40205157,\n", - " 'clock': 1.2123855,\n", - " 'cluster': 1.5899432,\n", - " 'clusters': 0.21755162,\n", - " 'computation': 0.3360238,\n", - " 'compute': 0.15521479,\n", - " 'computer': 0.4586727,\n", - " 'computers': 0.09730453,\n", - " 'core': 0.18051882,\n", - " 'cores': 0.54003507,\n", - " 'cpu': 1.4255431,\n", - " 'data': 0.7048903,\n", - " 'database': 0.5640705,\n", - " 'depend': 0.08640857,\n", - " 'deploy': 0.116062716,\n", - " 'deployed': 0.16281521,\n", - " 'deployment': 1.375697,\n", - " 'dev': 0.16744493,\n", - " 'disk': 1.2671278,\n", - " 'display': 0.10427013,\n", - " 'done': 0.057584852,\n", - " 'each': 0.44890955,\n", - " 'elastic': 1.3546548,\n", - " 'estimate': 1.1541563,\n", - " 'estimated': 0.4820726,\n", - " 'estimates': 0.68956727,\n", - " 'execution': 0.025004579,\n", - " 'expose': 0.3791655,\n", - " 'exposed': 1.4152902,\n", - " 'exposing': 0.2018034,\n", - " 'exposure': 0.22712028,\n", - " 'field': 0.43335024,\n", - " 'fixed': 0.3727484,\n", - " 'fragment': 0.3541149,\n", - " 'fragments': 0.19871251,\n", - " 'framework': 0.0067325183,\n", - " 'gage': 0.062432837,\n", - " 'gb': 0.23573099,\n", - " 'guild': 0.06864197,\n", - " 'handle': 0.6664566,\n", - " 'handling': 0.79544353,\n", - " 'hardware': 0.15463935,\n", - " 'hash': 0.056183893,\n", - " 'host': 0.49334934,\n", - " 'hours': 0.23847345,\n", - " 'hu': 0.12027907,\n", - " 'index': 1.84248,\n", - " 'indexed': 0.5543888,\n", - " 'indices': 0.8364849,\n", - " 'ing': 1.1731079,\n", - " 'integration': 0.43307945,\n", - " 'interface': 0.13424914,\n", - " 'inventory': 0.43660846,\n", - " 'io': 1.1710184,\n", - " 'java': 1.1948129,\n", - " 'kb': 0.275635,\n", - " 'lane': 0.065143116,\n", - " 'lang': 0.07760714,\n", - " 'length': 0.19545008,\n", - " 'limit': 0.14939034,\n", - " 'load': 1.068046,\n", - " 'loading': 0.3452746,\n", - " 'machine': 0.28579098,\n", - " 'maintenance': 0.24792214,\n", - " 'management': 0.016834572,\n", - " 'mandatory': 0.09757359,\n", - " 'map': 0.33999705,\n", - " 'mapped': 0.4253768,\n", - " 'mapping': 0.7739739,\n", - " 'master': 1.514614,\n", - " 'math': 0.62235314,\n", - " 'maximum': 0.4592383,\n", - " 'mb': 0.8386821,\n", - " 'measure': 0.35868418,\n", - " 'memory': 1.4037786,\n", - " 'metadata': 0.57345796,\n", - " 'metric': 1.0478114,\n", - " 'minimal': 0.55310273,\n", - " 'minimum': 1.1779544,\n", - " 'mining': 0.60987383,\n", - " 'monitor': 0.41601682,\n", - " 'monitoring': 0.80379987,\n", - " 'multiple': 0.0046412363,\n", - " 'need': 0.13691676,\n", - " 'needs': 0.09020152,\n", - " 'network': 0.5226748,\n", - " 'node': 1.5207812,\n", - " 'nodes': 0.9873411,\n", - " 'number': 0.08917359,\n", - " 'o': 0.47437057,\n", - " 'open': 0.9998891,\n", - " 'operation': 0.059715636,\n", - " 'parameters': 0.06929999,\n", - " 'per': 1.2698478,\n", - " 'performance': 0.27903107,\n", - " 'pool': 1.1343037,\n", - " 'pools': 0.5005684,\n", - " 'predict': 0.15172759,\n", - " 'processing': 0.34928247,\n", - " 'processor': 0.06942589,\n", - " 'provided': 0.33421612,\n", - " 'published': 0.35502988,\n", - " 'queue': 1.4328028,\n", - " 'ram': 0.07832895,\n", - " 'rank': 0.09849679,\n", - " 'regional': 0.023943441,\n", - " 'request': 0.58130133,\n", - " 'requests': 0.4985438,\n", - " 'require': 0.054292977,\n", - " 'required': 0.20457663,\n", - " 'requirement': 0.9255918,\n", - " 'requirements': 1.1021699,\n", - " 'resolution': 0.2503146,\n", - " 'resource': 0.22062841,\n", - " 'resources': 0.7977981,\n", - " 'scala': 0.046379413,\n", - " 'scale': 0.34393448,\n", - " 'scaling': 0.5871495,\n", - " 'script': 0.07091305,\n", - " 'search': 0.2748066,\n", - " 'semi': 0.19345926,\n", - " 'sequence': 0.2634719,\n", - " 'serial': 0.281783,\n", - " 'serve': 0.3122354,\n", - " 'server': 0.62030464,\n", - " 'sha': 1.412181,\n", - " 'shards': 1.2690446,\n", - " 'sid': 0.5395205,\n", - " 'size': 0.37528938,\n", - " 'software': 0.2301807,\n", - " 'sql': 0.28173122,\n", - " 'storage': 0.17134488,\n", - " 'sum': 0.48667532,\n", - " 'swarm': 0.09873215,\n", - " 'task': 0.15503421,\n", - " 'thread': 1.2720325,\n", - " 'threads': 0.5098314,\n", - " 'tier': 2.0405457,\n", - " 'time': 0.691699,\n", - " 'timer': 0.3272765,\n", - " 'total': 0.853305,\n", - " 'trial': 0.75489986,\n", - " 'value': 0.55824566,\n", - " 'values': 0.18979663,\n", - " 'wall': 1.5562296,\n", - " 'walls': 0.57668746,\n", - " 'web': 0.12833436,\n", - " 'workers': 0.30275372,\n", - " 'write': 0.8986184},\n", - " 'text': 'both '\n", - " 'CPU '\n", - " 'requirements '\n", - " 'of '\n", - " 'the '\n", - " 'indexing '\n", - " 'nodes '\n", - " 'as '\n", - " 'well '\n", - " 'as '\n", - " 'disk '\n", - " 'IO '\n", - " 'since '\n", - " 'both '\n", - " 'CPU '\n", - " 'and '\n", - " 'IO '\n", - " 'work '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'the '\n", - " 'write '\n", - " 'thread '\n", - " 'pool '\n", - " 'workers '\n", - " 'and '\n", - " 'we '\n", - " 'rely '\n", - " 'on '\n", - " 'the '\n", - " 'wall '\n", - " 'clock '\n", - " 'time '\n", - " 'to '\n", - " 'estimate '\n", - " 'the '\n", - " 'required '\n", - " 'time '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'queued '\n", - " 'requests. '\n", - " 'Each '\n", - " 'indexing '\n", - " 'node '\n", - " 'calculates '\n", - " 'its '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'publishes '\n", - " 'this '\n", - " 'value '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node '\n", - " 'periodically. '\n", - " 'The '\n", - " 'master '\n", - " 'node '\n", - " 'serves '\n", - " 'the '\n", - " 'per '\n", - " 'node '\n", - " 'ingestion '\n", - " 'load '\n", - " 'values '\n", - " 'via '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'to '\n", - " 'the '\n", - " 'autoscaler. '\n", - " 'Memory '\n", - " 'The '\n", - " 'memory '\n", - " 'metrics '\n", - " 'exposed '\n", - " 'by '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'are '\n", - " 'node '\n", - " 'memory '\n", - " 'and '\n", - " 'tier '\n", - " 'memory. '\n", - " 'The '\n", - " 'node '\n", - " 'memory '\n", - " 'represents '\n", - " 'the '\n", - " 'minimum '\n", - " 'memory '\n", - " 'requirement '\n", - " 'for '\n", - " 'each '\n", - " 'indexing '\n", - " 'node '\n", - " 'in '\n", - " 'the '\n", - " 'cluster. '\n", - " 'The '\n", - " 'tier '\n", - " 'memory '\n", - " 'metric '\n", - " 'represents '\n", - " 'the '\n", - " 'minimum '\n", - " 'total '\n", - " 'memory '\n", - " 'that '\n", - " 'should '\n", - " 'be '\n", - " 'available '\n", - " 'in '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier. '\n", - " 'Note '\n", - " 'that '\n", - " 'these '\n", - " 'values '\n", - " 'only '\n", - " 'indicate '\n", - " 'the '\n", - " 'minimum '\n", - " 'to '\n", - " 'ensure '\n", - " 'that '\n", - " 'each '\n", - " 'node '\n", - " 'is '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'basic '\n", - " 'indexing '\n", - " 'workload '\n", - " 'and '\n", - " 'hold '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'indices '\n", - " 'metadata, '\n", - " 'while '\n", - " 'ensuring '\n", - " 'that '\n", - " 'the '\n", - " 'tier '\n", - " 'includes '\n", - " 'enough '\n", - " 'nodes '\n", - " 'to '\n", - " 'accommodate '\n", - " 'all '\n", - " 'index '\n", - " 'shards. '\n", - " 'Node '\n", - " 'memory '\n", - " 'must '\n", - " 'have '\n", - " 'a '\n", - " 'minimum '\n", - " 'of '\n", - " '500MB '\n", - " 'to '\n", - " 'be '\n", - " 'able '\n", - " 'to '\n", - " 'handle '\n", - " 'indexing '\n", - " 'workloads '\n", - " ', '\n", - " 'as '\n", - " 'well '\n", - " 'as '\n", - " 'a '\n", - " 'fixed '\n", - " 'amount '\n", - " 'of '\n", - " 'memory '\n", - " 'per '\n", - " 'each '\n", - " 'index '\n", - " '. '\n", - " 'This '\n", - " 'ensures '\n", - " 'all '\n", - " 'nodes '\n", - " 'can '\n", - " 'hold '\n", - " 'metadata '\n", - " 'for '\n", - " 'the '\n", - " 'cluster, '\n", - " 'which '\n", - " 'includes '\n", - " 'metadata '\n", - " 'for '\n", - " 'every '\n", - " 'index. '\n", - " 'Tier '\n", - " 'memory '\n", - " 'is '\n", - " 'determined '\n", - " 'by '\n", - " 'accounting '\n", - " 'for '\n", - " 'the '\n", - " 'memory '\n", - " 'overhead '\n", - " 'of '\n", - " 'the '\n", - " 'field '\n", - " 'mappings '\n", - " 'of '\n", - " 'the '\n", - " 'indices '\n", - " 'and '\n", - " 'the '\n", - " 'amount '\n", - " 'of '\n", - " 'memory '\n", - " 'needed '\n", - " 'for '\n", - " 'each '\n", - " 'open '\n", - " 'shard '\n", - " 'allocated '\n", - " 'on '\n", - " 'a '\n", - " 'node '\n", - " 'in '\n", - " 'the '\n", - " 'cluster. '\n", - " 'Currently, '\n", - " 'the '\n", - " 'per-shard '\n", - " 'memory '\n", - " 'requirement '\n", - " 'uses '\n", - " 'a '\n", - " 'fixed '\n", - " 'estimate '\n", - " 'of '\n", - " '6MB. '\n", - " 'We '\n", - " 'plan '\n", - " 'to '\n", - " 'refine '\n", - " 'this '\n", - " 'value. '\n", - " 'The '\n", - " 'estimate '\n", - " 'for '\n", - " 'the '\n", - " 'memory '\n", - " 'requirements '\n", - " 'for '\n", - " 'the '\n", - " 'mappings '\n", - " 'of '\n", - " 'each '\n", - " 'index '\n", - " 'is '\n", - " 'calculated '\n", - " 'by '\n", - " 'one '\n", - " 'of '\n", - " 'the '\n", - " 'data '\n", - " 'nodes '\n", - " 'that '\n", - " 'hosts '\n", - " 'a '\n", - " 'shard '\n", - " 'of '\n", - " 'the '\n", - " 'index. '\n", - " 'The '\n", - " 'calculated '\n", - " 'estimates '\n", - " 'are '\n", - " 'sent '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node. '\n", - " 'Whenever '\n", - " 'there '\n", - " 'is '\n", - " 'a '\n", - " 'mapping '\n", - " 'change '\n", - " 'this '\n", - " 'estimate '\n", - " 'is '\n", - " 'updated '\n", - " 'and '\n", - " 'published '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node '\n", - " 'again. '\n", - " 'The '\n", - " 'master '\n", - " 'node '\n", - " 'serves '\n", - " 'the '\n", - " 'node '\n", - " 'and '\n", - " 'total '\n", - " 'memory '\n", - " 'metrics '\n", - " 'based '\n", - " 'on '\n", - " 'these '\n", - " 'information '\n", - " 'via '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'to '\n", - " 'the '\n", - " 'autoscaler. '\n", - " 'Scaling '\n", - " 'the '\n", - " 'cluster '\n", - " 'The '\n", - " 'autoscaler '\n", - " 'is '\n", - " 'responsible '\n", - " 'for '\n", - " 'monitoring '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'via '\n", - " 'the '\n", - " 'exposed '\n", - " 'metrics, '\n", - " 'calculating '\n", - " 'the '\n", - " 'desirable '\n", - " 'cluster '\n", - " 'size '\n", - " 'to '\n", - " 'adapt '\n", - " 'to '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload, '\n", - " 'and '\n", - " 'updating '\n", - " 'the '\n", - " 'deployment '\n", - " 'accordingly. '\n", - " 'This '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'calculating '\n", - " 'the '\n", - " 'total '\n", - " 'required '\n", - " 'CPU '\n", - " 'and '\n", - " 'memory '\n", - " 'resources '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'memory '\n", - " 'metrics. '\n", - " 'The '\n", - " 'sum '\n", - " 'of '\n", - " 'all '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'per '\n", - " 'node '\n", - " 'values '\n", - " 'determines '\n", - " 'the '\n", - " 'total '\n", - " 'number '\n", - " 'of '\n", - " 'CPU '\n", - " 'cores '\n", - " 'needed '\n", - " 'for '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier. '\n", - " 'The '\n", - " 'calculated '\n", - " 'CPU '\n", - " 'requirement '\n", - " 'and '\n", - " 'the '\n", - " 'provided '\n", - " 'minimum '\n", - " 'node '\n", - " 'and '\n", - " 'tier '\n", - " 'memory '\n", - " 'resources '\n", - " 'are '\n", - " 'mapped '\n", - " 'to '\n", - " 'a '\n", - " 'predetermined '\n", - " 'set'},\n", - " {'embeddings': {'##ber': 0.9460652,\n", - " '##d': 0.10023495,\n", - " '##es': 0.14341043,\n", - " '##gb': 0.6906553,\n", - " '##ine': 0.9458122,\n", - " '##ing': 0.42145026,\n", - " '##ler': 1.2356958,\n", - " '##ling': 0.63835293,\n", - " '##load': 0.2904571,\n", - " '##mb': 0.6970242,\n", - " '##net': 0.7010928,\n", - " '##pu': 1.0257086,\n", - " '##rch': 1.0700952,\n", - " '##rd': 1.6493205,\n", - " '##rds': 0.6754141,\n", - " '##rt': 0.12942569,\n", - " '##sca': 1.4853197,\n", - " '##sea': 1.4192088,\n", - " '##vc': 1.405061,\n", - " '100': 0.26849923,\n", - " '16': 0.19268984,\n", - " '160': 0.2302431,\n", - " '1600': 0.8732733,\n", - " '32': 1.2120824,\n", - " '6': 0.70548016,\n", - " '64': 1.202607,\n", - " 'algorithm': 0.937971,\n", - " 'allocated': 0.73692024,\n", - " 'allocation': 0.4625666,\n", - " 'amazon': 0.86137766,\n", - " 'analysis': 0.58160084,\n", - " 'analyze': 0.023657316,\n", - " 'apache': 0.85805637,\n", - " 'api': 0.9369967,\n", - " 'approximate': 0.15172462,\n", - " 'auto': 1.225151,\n", - " 'automatic': 0.7224918,\n", - " 'availability': 0.3053787,\n", - " 'bot': 0.33649588,\n", - " 'brick': 0.28021842,\n", - " 'buffer': 0.27807808,\n", - " 'bug': 0.12689802,\n", - " 'calculate': 0.56475216,\n", - " 'calculated': 0.2805605,\n", - " 'calculating': 0.18157567,\n", - " 'calculation': 1.0562031,\n", - " 'capacity': 0.19689727,\n", - " 'certification': 0.030283952,\n", - " 'checkpoint': 0.1251825,\n", - " 'chess': 0.38721076,\n", - " 'class': 0.044428803,\n", - " 'closed': 0.20298174,\n", - " 'cluster': 1.8217679,\n", - " 'clusters': 0.40412048,\n", - " 'computation': 0.27228907,\n", - " 'compute': 0.157462,\n", - " 'computer': 0.07424284,\n", - " 'cores': 0.28018573,\n", - " 'cpu': 0.874331,\n", - " 'criteria': 0.20424062,\n", - " 'cube': 0.078070216,\n", - " 'currently': 0.26391146,\n", - " 'data': 0.57366157,\n", - " 'database': 0.5346718,\n", - " 'deploy': 0.31853938,\n", - " 'deployed': 0.23235346,\n", - " 'deployment': 1.38996,\n", - " 'desirable': 0.25084683,\n", - " 'desired': 0.05757945,\n", - " 'determine': 0.07967118,\n", - " 'determined': 0.38774973,\n", - " 'dimensions': 0.3834306,\n", - " 'disk': 0.7686433,\n", - " 'display': 0.044948753,\n", - " 'domain': 0.05484484,\n", - " 'each': 0.026949435,\n", - " 'elastic': 1.7217911,\n", - " 'equation': 0.07899539,\n", - " 'estimate': 1.0816743,\n", - " 'estimated': 0.2908085,\n", - " 'estimates': 0.7743369,\n", - " 'existing': 0.50358754,\n", - " 'exposed': 0.91814655,\n", - " 'field': 1.4176838,\n", - " 'fields': 0.56111515,\n", - " 'fixed': 0.653671,\n", - " 'forest': 0.088545434,\n", - " 'gage': 0.23066506,\n", - " 'gb': 0.7216355,\n", - " 'hardware': 0.5457616,\n", - " 'honey': 0.13710178,\n", - " 'host': 0.32896483,\n", - " 'hu': 0.022061992,\n", - " 'implement': 0.19801763,\n", - " 'index': 1.5813339,\n", - " 'indexed': 0.33440682,\n", - " 'indicator': 0.07646061,\n", - " 'indices': 1.0497515,\n", - " 'ing': 0.44711637,\n", - " 'integration': 0.38794386,\n", - " 'inventory': 0.55072165,\n", - " 'java': 1.0091366,\n", - " 'kb': 0.31603098,\n", - " 'ku': 1.2214607,\n", - " 'largest': 0.55517995,\n", - " 'length': 0.1961873,\n", - " 'limit': 0.12602727,\n", - " 'linear': 0.13019355,\n", - " 'load': 0.7046929,\n", - " 'map': 0.6723943,\n", - " 'mapped': 0.6155787,\n", - " 'mapping': 0.95820665,\n", - " 'maps': 0.19839133,\n", - " 'master': 1.3583598,\n", - " 'math': 0.52316844,\n", - " 'maximum': 0.17016214,\n", - " 'mb': 0.8793483,\n", - " 'measure': 0.37326512,\n", - " 'memory': 1.3331418,\n", - " 'metric': 0.9261499,\n", - " 'minimum': 0.4176075,\n", - " 'mining': 0.42999497,\n", - " 'monitor': 0.34513482,\n", - " 'monitoring': 0.6307714,\n", - " 'multi': 0.3034215,\n", - " 'network': 0.67814016,\n", - " 'node': 1.2861586,\n", - " 'nodes': 0.6710798,\n", - " 'open': 1.3986069,\n", - " 'optimal': 0.0624708,\n", - " 'overhead': 0.69991654,\n", - " 'parameters': 0.11732358,\n", - " 'pattern': 0.005440311,\n", - " 'per': 1.2889819,\n", - " 'performance': 0.14103872,\n", - " 'poll': 0.52450436,\n", - " 'polling': 0.3777002,\n", - " 'polls': 0.60389787,\n", - " 'predict': 0.038165692,\n", - " 'published': 0.06970011,\n", - " 'radar': 0.004892402,\n", - " 'ram': 0.1705884,\n", - " 'rank': 0.1464829,\n", - " 'ratio': 0.6063533,\n", - " 'reconciliation': 0.4469912,\n", - " 'ref': 0.5476266,\n", - " 'requirement': 0.92776734,\n", - " 'requirements': 1.1151919,\n", - " 'resolution': 0.34558743,\n", - " 'resource': 0.21023308,\n", - " 'resources': 0.925664,\n", - " 'scale': 1.1254972,\n", - " 'scaled': 0.25958243,\n", - " 'scaling': 1.3571583,\n", - " 'scope': 0.007439173,\n", - " 'script': 0.108936414,\n", - " 'search': 0.4840181,\n", - " 'serial': 0.38776705,\n", - " 'server': 0.36229628,\n", - " 'sha': 1.6222633,\n", - " 'sid': 0.4845318,\n", - " 'since': 0.0958648,\n", - " 'size': 1.1212213,\n", - " 'sizes': 0.8831621,\n", - " 'software': 0.10655975,\n", - " 'sort': 0.23242046,\n", - " 'specification': 0.36318856,\n", - " 'specifications': 0.36570984,\n", - " 'storage': 0.16639474,\n", - " 'swarm': 0.012647891,\n", - " 'target': 0.097013876,\n", - " 'tier': 1.3347368,\n", - " 'total': 0.2700686,\n", - " 'trial': 0.48382765,\n", - " 'up': 0.009041203,\n", - " 'value': 0.5148574,\n", - " 'version': 0.00331044,\n", - " 'vote': 0.19521642,\n", - " 'voting': 0.32694972,\n", - " 'web': 0.43445045,\n", - " 'which': 0.22146864},\n", - " 'text': 'overhead '\n", - " 'of '\n", - " 'the '\n", - " 'field '\n", - " 'mappings '\n", - " 'of '\n", - " 'the '\n", - " 'indices '\n", - " 'and '\n", - " 'the '\n", - " 'amount '\n", - " 'of '\n", - " 'memory '\n", - " 'needed '\n", - " 'for '\n", - " 'each '\n", - " 'open '\n", - " 'shard '\n", - " 'allocated '\n", - " 'on '\n", - " 'a '\n", - " 'node '\n", - " 'in '\n", - " 'the '\n", - " 'cluster. '\n", - " 'Currently, '\n", - " 'the '\n", - " 'per-shard '\n", - " 'memory '\n", - " 'requirement '\n", - " 'uses '\n", - " 'a '\n", - " 'fixed '\n", - " 'estimate '\n", - " 'of '\n", - " '6MB. '\n", - " 'We '\n", - " 'plan '\n", - " 'to '\n", - " 'refine '\n", - " 'this '\n", - " 'value. '\n", - " 'The '\n", - " 'estimate '\n", - " 'for '\n", - " 'the '\n", - " 'memory '\n", - " 'requirements '\n", - " 'for '\n", - " 'the '\n", - " 'mappings '\n", - " 'of '\n", - " 'each '\n", - " 'index '\n", - " 'is '\n", - " 'calculated '\n", - " 'by '\n", - " 'one '\n", - " 'of '\n", - " 'the '\n", - " 'data '\n", - " 'nodes '\n", - " 'that '\n", - " 'hosts '\n", - " 'a '\n", - " 'shard '\n", - " 'of '\n", - " 'the '\n", - " 'index. '\n", - " 'The '\n", - " 'calculated '\n", - " 'estimates '\n", - " 'are '\n", - " 'sent '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node. '\n", - " 'Whenever '\n", - " 'there '\n", - " 'is '\n", - " 'a '\n", - " 'mapping '\n", - " 'change '\n", - " 'this '\n", - " 'estimate '\n", - " 'is '\n", - " 'updated '\n", - " 'and '\n", - " 'published '\n", - " 'to '\n", - " 'the '\n", - " 'master '\n", - " 'node '\n", - " 'again. '\n", - " 'The '\n", - " 'master '\n", - " 'node '\n", - " 'serves '\n", - " 'the '\n", - " 'node '\n", - " 'and '\n", - " 'total '\n", - " 'memory '\n", - " 'metrics '\n", - " 'based '\n", - " 'on '\n", - " 'these '\n", - " 'information '\n", - " 'via '\n", - " 'the '\n", - " 'autoscaling '\n", - " 'metrics '\n", - " 'API '\n", - " 'to '\n", - " 'the '\n", - " 'autoscaler. '\n", - " 'Scaling '\n", - " 'the '\n", - " 'cluster '\n", - " 'The '\n", - " 'autoscaler '\n", - " 'is '\n", - " 'responsible '\n", - " 'for '\n", - " 'monitoring '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'via '\n", - " 'the '\n", - " 'exposed '\n", - " 'metrics, '\n", - " 'calculating '\n", - " 'the '\n", - " 'desirable '\n", - " 'cluster '\n", - " 'size '\n", - " 'to '\n", - " 'adapt '\n", - " 'to '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload, '\n", - " 'and '\n", - " 'updating '\n", - " 'the '\n", - " 'deployment '\n", - " 'accordingly. '\n", - " 'This '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'calculating '\n", - " 'the '\n", - " 'total '\n", - " 'required '\n", - " 'CPU '\n", - " 'and '\n", - " 'memory '\n", - " 'resources '\n", - " 'based '\n", - " 'on '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'and '\n", - " 'memory '\n", - " 'metrics. '\n", - " 'The '\n", - " 'sum '\n", - " 'of '\n", - " 'all '\n", - " 'the '\n", - " 'ingestion '\n", - " 'load '\n", - " 'per '\n", - " 'node '\n", - " 'values '\n", - " 'determines '\n", - " 'the '\n", - " 'total '\n", - " 'number '\n", - " 'of '\n", - " 'CPU '\n", - " 'cores '\n", - " 'needed '\n", - " 'for '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier. '\n", - " 'The '\n", - " 'calculated '\n", - " 'CPU '\n", - " 'requirement '\n", - " 'and '\n", - " 'the '\n", - " 'provided '\n", - " 'minimum '\n", - " 'node '\n", - " 'and '\n", - " 'tier '\n", - " 'memory '\n", - " 'resources '\n", - " 'are '\n", - " 'mapped '\n", - " 'to '\n", - " 'a '\n", - " 'predetermined '\n", - " 'set '\n", - " 'of '\n", - " 'cluster '\n", - " 'sizes. '\n", - " 'Each '\n", - " 'cluster '\n", - " 'size '\n", - " 'determines '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'nodes '\n", - " 'and '\n", - " 'the '\n", - " 'CPU, '\n", - " 'memory '\n", - " 'and '\n", - " 'disk '\n", - " 'size '\n", - " 'of '\n", - " 'each '\n", - " 'node. '\n", - " 'All '\n", - " 'nodes '\n", - " 'within '\n", - " 'a '\n", - " 'certain '\n", - " 'cluster '\n", - " 'size '\n", - " 'have '\n", - " 'the '\n", - " 'same '\n", - " 'hardware '\n", - " 'specification. '\n", - " 'There '\n", - " 'is '\n", - " 'a '\n", - " 'fixed '\n", - " 'ratio '\n", - " 'between '\n", - " 'CPU, '\n", - " 'memory '\n", - " 'and '\n", - " 'disk, '\n", - " 'thus '\n", - " 'always '\n", - " 'scaling '\n", - " 'all '\n", - " '3 '\n", - " 'resources '\n", - " 'linearly. '\n", - " 'The '\n", - " 'existing '\n", - " 'cluster '\n", - " 'sizes '\n", - " 'for '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier '\n", - " 'are '\n", - " 'based '\n", - " 'on '\n", - " 'node '\n", - " 'sizes '\n", - " 'starting '\n", - " 'from '\n", - " '4GB/2vCPU/100GB '\n", - " 'disk '\n", - " 'to '\n", - " '64GB/32vCPU/1600GB '\n", - " 'disk. '\n", - " 'Once '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'scales '\n", - " 'up '\n", - " 'to '\n", - " 'the '\n", - " 'largest '\n", - " 'node '\n", - " 'size '\n", - " '(64GB '\n", - " 'memory), '\n", - " 'any '\n", - " 'further '\n", - " 'scale-up '\n", - " 'adds '\n", - " 'new '\n", - " '64GB '\n", - " 'nodes, '\n", - " 'allowing '\n", - " 'a '\n", - " 'cluster '\n", - " 'to '\n", - " 'scale '\n", - " 'up '\n", - " 'to '\n", - " '32 '\n", - " 'nodes '\n", - " 'of '\n", - " '64GB. '\n", - " 'Note '\n", - " 'that '\n", - " 'this '\n", - " 'is '\n", - " 'not '\n", - " 'a '\n", - " 'hard '\n", - " 'upper '\n", - " 'bound '\n", - " 'on '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'Elasticsearch '\n", - " 'nodes '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'can '\n", - " 'be '\n", - " 'increased '\n", - " 'if '\n", - " 'necessary. '\n", - " 'Every '\n", - " '5 '\n", - " 'seconds '\n", - " 'the '\n", - " 'autoscaler '\n", - " 'polls '\n", - " 'metrics '\n", - " 'from '\n", - " 'the '\n", - " 'master '\n", - " 'node, '\n", - " 'calculates '\n", - " 'the '\n", - " 'desirable '\n", - " 'cluster '\n", - " 'size '\n", - " 'and '\n", - " 'if '\n", - " 'it '\n", - " 'is '\n", - " 'different '\n", - " 'from '\n", - " 'the '\n", - " 'current '\n", - " 'cluster '\n", - " 'size, '\n", - " 'it '\n", - " 'updates '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'Kubernetes '\n", - " 'Deployment '\n", - " 'accordingly. '\n", - " 'Note '\n", - " 'that '\n", - " 'the '\n", - " 'actual '\n", - " 'reconciliation '\n", - " 'of '\n", - " 'the '\n", - " 'deployment '\n", - " 'towards '\n", - " 'the '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'and '\n", - " 'adding '\n", - " 'and '\n", - " 'removing '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'nodes '\n", - " 'to '\n", - " 'achieve '\n", - " 'this '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'Kubernetes. '\n", - " 'In '\n", - " 'order '\n", - " 'to '\n", - " 'avoid '\n", - " 'very '\n", - " 'short-lived '\n", - " 'changes '\n", - " 'to '\n", - " 'the'},\n", - " {'embeddings': {'##ber': 0.03804658,\n", - " '##es': 0.1512185,\n", - " '##gb': 0.6443679,\n", - " '##hi': 0.36000288,\n", - " '##ika': 0.07467539,\n", - " '##ing': 0.6129379,\n", - " '##ler': 1.1574837,\n", - " '##less': 0.5735957,\n", - " '##ling': 1.1661593,\n", - " '##load': 0.62337583,\n", - " '##net': 0.58226395,\n", - " '##oya': 1.7074469,\n", - " '##pu': 1.1345644,\n", - " '##rch': 1.0119687,\n", - " '##sca': 1.5153302,\n", - " '##sea': 1.4253823,\n", - " '##vc': 1.4631956,\n", - " '100': 0.55265766,\n", - " '15': 0.052379817,\n", - " '16': 0.33394203,\n", - " '160': 0.118766,\n", - " '1600': 0.8028694,\n", - " '32': 1.1772103,\n", - " '4': 0.16181825,\n", - " '64': 1.4588842,\n", - " 'algorithm': 0.94727564,\n", - " 'always': 0.38941032,\n", - " 'amazon': 0.89331883,\n", - " 'analysis': 0.4050502,\n", - " 'analyze': 0.023668261,\n", - " 'andersen': 0.49676144,\n", - " 'apache': 0.80054885,\n", - " 'ariel': 0.4422102,\n", - " 'auto': 1.2729144,\n", - " 'automatic': 0.7698037,\n", - " 'automatically': 0.04643825,\n", - " 'availability': 0.49544457,\n", - " 'available': 0.19981025,\n", - " 'blog': 0.50581634,\n", - " 'boat': 0.4211383,\n", - " 'bot': 0.44343898,\n", - " 'bug': 0.16439897,\n", - " 'calculate': 0.44946215,\n", - " 'calculating': 0.21078831,\n", - " 'calculation': 0.91136605,\n", - " 'calculations': 0.35172287,\n", - " 'capacity': 0.32551798,\n", - " 'certification': 0.96537966,\n", - " 'certified': 0.86568826,\n", - " 'change': 0.091490604,\n", - " 'checkpoint': 0.13703609,\n", - " 'chess': 0.30361477,\n", - " 'class': 0.12189255,\n", - " 'cloud': 0.36273655,\n", - " 'cluster': 2.1554685,\n", - " 'clusters': 0.84253734,\n", - " 'competition': 0.0070358375,\n", - " 'component': 0.16093102,\n", - " 'components': 0.688979,\n", - " 'computation': 0.0109849,\n", - " 'computer': 0.37449652,\n", - " 'computers': 0.29611063,\n", - " 'constant': 0.21192689,\n", - " 'cpu': 0.9483953,\n", - " 'crawl': 0.061979044,\n", - " 'data': 0.29847682,\n", - " 'database': 0.53361094,\n", - " 'define': 0.30592072,\n", - " 'deployment': 1.1050912,\n", - " 'desirable': 0.28776327,\n", - " 'determination': 0.25265238,\n", - " 'determine': 0.4538456,\n", - " 'determined': 0.5666302,\n", - " 'determines': 0.02666208,\n", - " 'dimensions': 0.43506965,\n", - " 'disadvantage': 0.40544793,\n", - " 'disk': 1.0043706,\n", - " 'domain': 0.08386699,\n", - " 'down': 1.1079221,\n", - " 'each': 0.20502539,\n", - " 'elastic': 2.0313072,\n", - " 'engineer': 0.41261968,\n", - " 'engineering': 0.43656224,\n", - " 'existing': 0.82118076,\n", - " 'expensive': 0.10213457,\n", - " 'factors': 0.04067958,\n", - " 'fernandez': 1.1611929,\n", - " 'fixed': 0.6458474,\n", - " 'forest': 0.07132318,\n", - " 'francisco': 1.0563725,\n", - " 'garcia': 0.13344267,\n", - " 'gb': 0.6862939,\n", - " 'global': 0.0054082987,\n", - " 'hardware': 0.7944886,\n", - " 'hen': 0.9853478,\n", - " 'honey': 0.081156164,\n", - " 'hour': 0.0074544367,\n", - " 'hours': 0.24539681,\n", - " 'hu': 0.06941744,\n", - " 'implement': 0.23772681,\n", - " 'implementation': 0.07986039,\n", - " 'improve': 0.2981144,\n", - " 'increase': 0.7570058,\n", - " 'increasing': 0.25063965,\n", - " 'index': 1.358504,\n", - " 'indexed': 0.29916498,\n", - " 'ing': 0.49232894,\n", - " 'integration': 0.20372295,\n", - " 'inventory': 0.49392712,\n", - " 'java': 0.96544707,\n", - " 'jose': 0.014233379,\n", - " 'ku': 1.0064884,\n", - " 'large': 0.009199611,\n", - " 'largest': 0.5853634,\n", - " 'latest': 0.075750045,\n", - " 'learning': 0.14278692,\n", - " 'length': 0.2575359,\n", - " 'limit': 0.27284575,\n", - " 'linear': 0.99686086,\n", - " 'load': 0.78078943,\n", - " 'loading': 0.09809506,\n", - " 'log': 0.053032227,\n", - " 'lopez': 0.37077188,\n", - " 'machine': 0.1154489,\n", - " 'maintenance': 0.24795005,\n", - " 'management': 0.28454626,\n", - " 'map': 0.12368915,\n", - " 'master': 1.0599743,\n", - " 'math': 0.39245087,\n", - " 'maximum': 0.37043598,\n", - " 'mb': 0.65867126,\n", - " 'measure': 0.401138,\n", - " 'mechanism': 0.5363481,\n", - " 'memory': 1.0781962,\n", - " 'metric': 0.9361899,\n", - " 'mining': 0.4610803,\n", - " 'minute': 0.7122368,\n", - " 'minutes': 0.03330799,\n", - " 'multiple': 0.28440112,\n", - " 'network': 0.70334154,\n", - " 'new': 0.36585885,\n", - " 'node': 1.1508181,\n", - " 'nodes': 0.6786249,\n", - " 'number': 0.46848533,\n", - " 'online': 0.10060778,\n", - " 'operation': 0.013929884,\n", - " 'optimal': 0.052087568,\n", - " 'overhead': 0.12910955,\n", - " 'performance': 0.10508823,\n", - " 'po': 0.030801829,\n", - " 'poll': 0.032789562,\n", - " 'polling': 0.08606442,\n", - " 'polls': 0.31255096,\n", - " 'predict': 0.038815167,\n", - " 'process': 0.32648584,\n", - " 'processing': 0.13010792,\n", - " 'quan': 0.30870175,\n", - " 'rank': 0.23912333,\n", - " 'ratio': 1.1149174,\n", - " 'ratios': 0.17480499,\n", - " 'ready': 0.7220055,\n", - " 'reconciliation': 0.03476886,\n", - " 'reduce': 0.48650545,\n", - " 'regulation': 0.14490134,\n", - " 'requirements': 0.26383802,\n", - " 'resource': 0.48044914,\n", - " 'resources': 0.99925154,\n", - " 'sale': 0.23320372,\n", - " 'same': 0.04602473,\n", - " 'scala': 0.34763098,\n", - " 'scale': 1.3520039,\n", - " 'scaled': 0.373489,\n", - " 'scales': 0.23150739,\n", - " 'scaling': 1.3547646,\n", - " 'scope': 0.24351352,\n", - " 'sea': 0.012636473,\n", - " 'search': 0.5437506,\n", - " 'seconds': 0.21717648,\n", - " 'serial': 0.084758565,\n", - " 'server': 0.66100806,\n", - " 'si': 0.13631321,\n", - " 'sid': 0.4065147,\n", - " 'size': 1.4813008,\n", - " 'sizes': 1.1315687,\n", - " 'software': 0.053653706,\n", - " 'sort': 0.34857363,\n", - " 'specification': 0.47748893,\n", - " 'specifications': 0.54209507,\n", - " 'square': 0.0464906,\n", - " 'storage': 0.2826658,\n", - " 'strategy': 0.105019435,\n", - " 'swarm': 0.08799058,\n", - " 'three': 0.0456386,\n", - " 'tier': 1.2590698,\n", - " 'torre': 0.033106416,\n", - " 'total': 0.15115097,\n", - " 'trainer': 0.28730983,\n", - " 'training': 0.91525143,\n", - " 'trial': 0.40092948,\n", - " 'unit': 0.12670164,\n", - " 'up': 0.48489103,\n", - " 'user': 0.5006898,\n", - " 'users': 0.35868,\n", - " 'vote': 0.16288216,\n", - " 'voting': 0.2478986,\n", - " 'web': 0.44947043},\n", - " 'text': 'of '\n", - " 'cluster '\n", - " 'sizes. '\n", - " 'Each '\n", - " 'cluster '\n", - " 'size '\n", - " 'determines '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'nodes '\n", - " 'and '\n", - " 'the '\n", - " 'CPU, '\n", - " 'memory '\n", - " 'and '\n", - " 'disk '\n", - " 'size '\n", - " 'of '\n", - " 'each '\n", - " 'node. '\n", - " 'All '\n", - " 'nodes '\n", - " 'within '\n", - " 'a '\n", - " 'certain '\n", - " 'cluster '\n", - " 'size '\n", - " 'have '\n", - " 'the '\n", - " 'same '\n", - " 'hardware '\n", - " 'specification. '\n", - " 'There '\n", - " 'is '\n", - " 'a '\n", - " 'fixed '\n", - " 'ratio '\n", - " 'between '\n", - " 'CPU, '\n", - " 'memory '\n", - " 'and '\n", - " 'disk, '\n", - " 'thus '\n", - " 'always '\n", - " 'scaling '\n", - " 'all '\n", - " '3 '\n", - " 'resources '\n", - " 'linearly. '\n", - " 'The '\n", - " 'existing '\n", - " 'cluster '\n", - " 'sizes '\n", - " 'for '\n", - " 'the '\n", - " 'indexing '\n", - " 'tier '\n", - " 'are '\n", - " 'based '\n", - " 'on '\n", - " 'node '\n", - " 'sizes '\n", - " 'starting '\n", - " 'from '\n", - " '4GB/2vCPU/100GB '\n", - " 'disk '\n", - " 'to '\n", - " '64GB/32vCPU/1600GB '\n", - " 'disk. '\n", - " 'Once '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'scales '\n", - " 'up '\n", - " 'to '\n", - " 'the '\n", - " 'largest '\n", - " 'node '\n", - " 'size '\n", - " '(64GB '\n", - " 'memory), '\n", - " 'any '\n", - " 'further '\n", - " 'scale-up '\n", - " 'adds '\n", - " 'new '\n", - " '64GB '\n", - " 'nodes, '\n", - " 'allowing '\n", - " 'a '\n", - " 'cluster '\n", - " 'to '\n", - " 'scale '\n", - " 'up '\n", - " 'to '\n", - " '32 '\n", - " 'nodes '\n", - " 'of '\n", - " '64GB. '\n", - " 'Note '\n", - " 'that '\n", - " 'this '\n", - " 'is '\n", - " 'not '\n", - " 'a '\n", - " 'hard '\n", - " 'upper '\n", - " 'bound '\n", - " 'on '\n", - " 'the '\n", - " 'number '\n", - " 'of '\n", - " 'Elasticsearch '\n", - " 'nodes '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'can '\n", - " 'be '\n", - " 'increased '\n", - " 'if '\n", - " 'necessary. '\n", - " 'Every '\n", - " '5 '\n", - " 'seconds '\n", - " 'the '\n", - " 'autoscaler '\n", - " 'polls '\n", - " 'metrics '\n", - " 'from '\n", - " 'the '\n", - " 'master '\n", - " 'node, '\n", - " 'calculates '\n", - " 'the '\n", - " 'desirable '\n", - " 'cluster '\n", - " 'size '\n", - " 'and '\n", - " 'if '\n", - " 'it '\n", - " 'is '\n", - " 'different '\n", - " 'from '\n", - " 'the '\n", - " 'current '\n", - " 'cluster '\n", - " 'size, '\n", - " 'it '\n", - " 'updates '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'Kubernetes '\n", - " 'Deployment '\n", - " 'accordingly. '\n", - " 'Note '\n", - " 'that '\n", - " 'the '\n", - " 'actual '\n", - " 'reconciliation '\n", - " 'of '\n", - " 'the '\n", - " 'deployment '\n", - " 'towards '\n", - " 'the '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'and '\n", - " 'adding '\n", - " 'and '\n", - " 'removing '\n", - " 'the '\n", - " 'Elasticsearch '\n", - " 'nodes '\n", - " 'to '\n", - " 'achieve '\n", - " 'this '\n", - " 'is '\n", - " 'done '\n", - " 'by '\n", - " 'Kubernetes. '\n", - " 'In '\n", - " 'order '\n", - " 'to '\n", - " 'avoid '\n", - " 'very '\n", - " 'short-lived '\n", - " 'changes '\n", - " 'to '\n", - " 'the '\n", - " 'cluster '\n", - " 'size, '\n", - " 'we '\n", - " 'account '\n", - " 'for '\n", - " 'a '\n", - " '10% '\n", - " 'headroom '\n", - " 'when '\n", - " 'calculating '\n", - " 'the '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'during '\n", - " 'a '\n", - " 'scale '\n", - " 'down '\n", - " 'and '\n", - " 'a '\n", - " 'scale '\n", - " 'down '\n", - " 'takes '\n", - " 'effect '\n", - " 'only '\n", - " 'if '\n", - " 'all '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'calculations '\n", - " 'within '\n", - " 'the '\n", - " 'past '\n", - " '15 '\n", - " 'minute '\n", - " 'have '\n", - " 'indicated '\n", - " 'a '\n", - " 'scale-down. '\n", - " 'Currently, '\n", - " 'the '\n", - " 'time '\n", - " 'that '\n", - " 'it '\n", - " 'takes '\n", - " 'for '\n", - " 'an '\n", - " 'increase '\n", - " 'in '\n", - " 'the '\n", - " 'metrics '\n", - " 'to '\n", - " 'lead '\n", - " 'to '\n", - " 'the '\n", - " 'first '\n", - " 'Elasticsearch '\n", - " 'node '\n", - " 'being '\n", - " 'added '\n", - " 'to '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'ready '\n", - " 'to '\n", - " 'process '\n", - " 'indexing '\n", - " 'load '\n", - " 'is '\n", - " 'under '\n", - " '1 '\n", - " 'minute. '\n", - " 'Conclusion '\n", - " 'In '\n", - " 'this '\n", - " 'blog '\n", - " 'post, '\n", - " 'we '\n", - " 'explained '\n", - " 'how '\n", - " 'ingest '\n", - " 'autoscaling '\n", - " 'works '\n", - " 'in '\n", - " 'Elasticsearch, '\n", - " 'the '\n", - " 'different '\n", - " 'components '\n", - " 'involved, '\n", - " 'and '\n", - " 'the '\n", - " 'metrics '\n", - " 'used '\n", - " 'to '\n", - " 'quantify '\n", - " 'the '\n", - " 'resources '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload. '\n", - " 'We '\n", - " 'believe '\n", - " 'that '\n", - " 'such '\n", - " 'an '\n", - " 'autoscaling '\n", - " 'mechanism '\n", - " 'is '\n", - " 'crucial '\n", - " 'to '\n", - " 'reduce '\n", - " 'the '\n", - " 'operational '\n", - " 'overhead '\n", - " 'of '\n", - " 'an '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'for '\n", - " 'the '\n", - " 'users '\n", - " 'by '\n", - " 'automatically '\n", - " 'increasing '\n", - " 'the '\n", - " 'available '\n", - " 'resources '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'when '\n", - " 'necessary. '\n", - " 'Furthermore, '\n", - " 'it '\n", - " 'leads '\n", - " 'to '\n", - " 'cost '\n", - " 'reduction '\n", - " 'by '\n", - " 'scaling '\n", - " 'down '\n", - " 'the '\n", - " 'cluster '\n", - " 'when '\n", - " 'the '\n", - " 'available '\n", - " 'resources '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'are '\n", - " 'not '\n", - " 'required '\n", - " 'anymore. '\n", - " 'Ready '\n", - " 'to '\n", - " 'try '\n", - " 'this '\n", - " 'out '\n", - " 'on '\n", - " 'your '\n", - " 'own? '\n", - " 'Start '\n", - " 'a '\n", - " 'free '\n", - " 'trial '\n", - " '. '\n", - " 'Want '\n", - " 'to '\n", - " 'get '\n", - " 'Elastic '\n", - " 'certified? '\n", - " 'Find '\n", - " 'out '\n", - " 'when '\n", - " 'the '\n", - " 'next '\n", - " 'Elasticsearch '\n", - " 'Engineer '\n", - " 'training '\n", - " 'is '\n", - " 'running! '\n", - " 'Pooya '\n", - " 'Salehi '\n", - " 'Henning '\n", - " 'Andersen '\n", - " 'Francisco '\n", - " 'Fernández '\n", - " 'Castaño '\n", - " '11 '\n", - " 'min '\n", - " 'read '\n", - " '29 '\n", - " 'July '\n", - " '2024 '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless '\n", - " 'Share '\n", - " 'Twitter '\n", - " 'Facebook '\n", - " 'LinkedIn '\n", - " 'Recommended '\n", - " 'Articles '\n", - " 'Elastic '\n", - " 'Cloud'},\n", - " {'embeddings': {'##4': 0.5609497,\n", - " '##down': 0.011559885,\n", - " '##est': 1.1421111,\n", - " '##hi': 0.0060656513,\n", - " '##ing': 0.48465544,\n", - " '##ler': 0.12595108,\n", - " '##less': 1.3963115,\n", - " '##lessly': 0.76121324,\n", - " '##ling': 1.03232,\n", - " '##load': 0.6918682,\n", - " '##oya': 0.56508857,\n", - " '##rch': 0.94580704,\n", - " '##room': 1.397477,\n", - " '##sca': 1.4164101,\n", - " '##sea': 1.4075159,\n", - " '10': 0.005647892,\n", - " '15': 1.0004816,\n", - " '16': 0.0726173,\n", - " '202': 0.79451597,\n", - " 'account': 0.054787852,\n", - " 'accounting': 0.2977837,\n", - " 'advantage': 0.13797385,\n", - " 'after': 0.04746113,\n", - " 'algorithm': 0.84724355,\n", - " 'amazon': 0.7599511,\n", - " 'analysis': 0.4048887,\n", - " 'analyze': 0.12881227,\n", - " 'andersen': 0.091110215,\n", - " 'anya': 0.031511437,\n", - " 'apache': 0.8387389,\n", - " 'architect': 0.57877886,\n", - " 'archive': 0.027499544,\n", - " 'august': 0.523268,\n", - " 'auto': 1.4506402,\n", - " 'automatic': 0.94025064,\n", - " 'availability': 0.348747,\n", - " 'available': 0.05306761,\n", - " 'blog': 0.8397168,\n", - " 'bot': 0.38508278,\n", - " 'bug': 0.1267487,\n", - " 'build': 0.776895,\n", - " 'building': 0.7504958,\n", - " 'built': 0.19563165,\n", - " 'calculate': 0.3598465,\n", - " 'calculating': 0.11605539,\n", - " 'calculation': 0.8540975,\n", - " 'calculations': 0.57275534,\n", - " 'capacity': 0.3109483,\n", - " 'cave': 0.29021654,\n", - " 'certification': 0.64684826,\n", - " 'certified': 0.26541537,\n", - " 'checkpoint': 0.06267695,\n", - " 'chess': 0.22270066,\n", - " 'class': 0.044449553,\n", - " 'client': 0.05088419,\n", - " 'cloud': 0.9856347,\n", - " 'cluster': 1.8377897,\n", - " 'clustered': 0.18159664,\n", - " 'clusters': 0.79538465,\n", - " 'collapse': 0.29267746,\n", - " 'component': 0.012821147,\n", - " 'components': 0.50653857,\n", - " 'computer': 0.22416146,\n", - " 'cost': 0.06086615,\n", - " 'crawl': 0.27863678,\n", - " 'data': 0.23600358,\n", - " 'database': 0.386357,\n", - " 'decrease': 0.29198787,\n", - " 'deployment': 0.4085412,\n", - " 'desired': 0.04168813,\n", - " 'development': 0.0050133946,\n", - " 'dimensions': 0.10934332,\n", - " 'disadvantage': 0.33458805,\n", - " 'domain': 0.16470446,\n", - " 'down': 1.343148,\n", - " 'downs': 0.2709486,\n", - " 'drop': 0.19782026,\n", - " 'during': 0.4177895,\n", - " 'effect': 0.39730436,\n", - " 'elastic': 1.9854976,\n", - " 'engineer': 0.58167315,\n", - " 'engineering': 0.5884908,\n", - " 'ensemble': 0.007619722,\n", - " 'facebook': 0.3225428,\n", - " 'fernandez': 0.42895493,\n", - " 'fifteen': 0.10546452,\n", - " 'first': 0.50220585,\n", - " 'forest': 0.14911638,\n", - " 'framework': 0.047809396,\n", - " 'free': 0.3561092,\n", - " 'global': 0.09408311,\n", - " 'group': 0.14574468,\n", - " 'handling': 0.30345336,\n", - " 'head': 0.117694445,\n", - " 'hour': 0.3250166,\n", - " 'hours': 0.70438623,\n", - " 'implement': 0.13235687,\n", - " 'implementation': 0.13236406,\n", - " 'important': 0.055658367,\n", - " 'improve': 0.2550515,\n", - " 'increase': 0.74923754,\n", - " 'increasing': 0.3597461,\n", - " 'index': 1.4273754,\n", - " 'indexed': 0.2932871,\n", - " 'ing': 1.2874681,\n", - " 'introduced': 0.10785041,\n", - " 'inventory': 0.65916276,\n", - " 'java': 0.88944626,\n", - " 'july': 0.14186577,\n", - " 'large': 0.06278902,\n", - " 'latest': 0.068817586,\n", - " 'learning': 0.12424224,\n", - " 'length': 0.030345708,\n", - " 'limit': 0.14073928,\n", - " 'load': 1.0610044,\n", - " 'loading': 0.39865428,\n", - " 'loss': 0.11432742,\n", - " 'machine': 0.029201662,\n", - " 'maintenance': 0.15768714,\n", - " 'management': 0.31734702,\n", - " 'math': 0.406777,\n", - " 'maximum': 0.13483465,\n", - " 'measure': 0.5081328,\n", - " 'mechanism': 0.8204686,\n", - " 'memory': 1.0461255,\n", - " 'metric': 0.9943368,\n", - " 'mining': 0.5402124,\n", - " 'minute': 0.92393905,\n", - " 'minutes': 0.3759728,\n", - " 'moment': 0.11160666,\n", - " 'morris': 0.060925715,\n", - " 'network': 0.51853234,\n", - " 'node': 0.99145895,\n", - " 'online': 0.36771652,\n", - " 'operation': 0.28533393,\n", - " 'overhead': 0.086819395,\n", - " 'patience': 0.11310515,\n", - " 'perfect': 0.12382903,\n", - " 'performance': 0.06312573,\n", - " 'process': 0.5356137,\n", - " 'processing': 0.55718875,\n", - " 'production': 0.05736718,\n", - " 'project': 0.14496073,\n", - " 'prototype': 0.31378728,\n", - " 'quan': 0.22408743,\n", - " 'ready': 0.25202373,\n", - " 'reduce': 0.5264253,\n", - " 'reduction': 0.037918843,\n", - " 'research': 0.0142833255,\n", - " 'resource': 0.09839988,\n", - " 'resources': 0.7532266,\n", - " 'rights': 0.08338795,\n", - " 'room': 0.84089494,\n", - " 'rs': 0.47752637,\n", - " 'scala': 0.17796026,\n", - " 'scale': 1.6349432,\n", - " 'scaled': 0.39957505,\n", - " 'scales': 0.24761787,\n", - " 'scaling': 1.3751862,\n", - " 'scope': 0.009172562,\n", - " 'search': 0.6669978,\n", - " 'seconds': 0.11594447,\n", - " 'serial': 0.21314114,\n", - " 'server': 1.1875997,\n", - " 'servers': 0.3761195,\n", - " 'share': 0.21588095,\n", - " 'shrink': 0.08177304,\n", - " 'si': 0.039096646,\n", - " 'sid': 0.26323187,\n", - " 'site': 0.27832702,\n", - " 'size': 1.2518198,\n", - " 'sizes': 0.68347317,\n", - " 'small': 0.021309003,\n", - " 'software': 0.21712899,\n", - " 'sort': 0.46309024,\n", - " 'step': 0.13614927,\n", - " 'storage': 0.33423752,\n", - " 'strategy': 0.2746019,\n", - " 'swarm': 0.18959516,\n", - " 'task': 0.12210263,\n", - " 'time': 0.3716685,\n", - " 'traffic': 0.0044686934,\n", - " 'training': 0.56078845,\n", - " 'trial': 0.30781624,\n", - " 'tutor': 0.18126883,\n", - " 'twitter': 0.7352328,\n", - " 'useful': 0.07486964,\n", - " 'user': 0.61840165,\n", - " 'users': 0.5178945,\n", - " 'wait': 0.12994274,\n", - " 'weaving': 0.09568315,\n", - " 'web': 0.3402482,\n", - " 'website': 0.17116618,\n", - " 'work': 0.38590312,\n", - " 'working': 0.040917397,\n", - " 'works': 0.2640411,\n", - " 'years': 0.057129644},\n", - " 'text': 'cluster '\n", - " 'size, '\n", - " 'we '\n", - " 'account '\n", - " 'for '\n", - " 'a '\n", - " '10% '\n", - " 'headroom '\n", - " 'when '\n", - " 'calculating '\n", - " 'the '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'during '\n", - " 'a '\n", - " 'scale '\n", - " 'down '\n", - " 'and '\n", - " 'a '\n", - " 'scale '\n", - " 'down '\n", - " 'takes '\n", - " 'effect '\n", - " 'only '\n", - " 'if '\n", - " 'all '\n", - " 'desired '\n", - " 'cluster '\n", - " 'size '\n", - " 'calculations '\n", - " 'within '\n", - " 'the '\n", - " 'past '\n", - " '15 '\n", - " 'minute '\n", - " 'have '\n", - " 'indicated '\n", - " 'a '\n", - " 'scale-down. '\n", - " 'Currently, '\n", - " 'the '\n", - " 'time '\n", - " 'that '\n", - " 'it '\n", - " 'takes '\n", - " 'for '\n", - " 'an '\n", - " 'increase '\n", - " 'in '\n", - " 'the '\n", - " 'metrics '\n", - " 'to '\n", - " 'lead '\n", - " 'to '\n", - " 'the '\n", - " 'first '\n", - " 'Elasticsearch '\n", - " 'node '\n", - " 'being '\n", - " 'added '\n", - " 'to '\n", - " 'the '\n", - " 'cluster '\n", - " 'and '\n", - " 'ready '\n", - " 'to '\n", - " 'process '\n", - " 'indexing '\n", - " 'load '\n", - " 'is '\n", - " 'under '\n", - " '1 '\n", - " 'minute. '\n", - " 'Conclusion '\n", - " 'In '\n", - " 'this '\n", - " 'blog '\n", - " 'post, '\n", - " 'we '\n", - " 'explained '\n", - " 'how '\n", - " 'ingest '\n", - " 'autoscaling '\n", - " 'works '\n", - " 'in '\n", - " 'Elasticsearch, '\n", - " 'the '\n", - " 'different '\n", - " 'components '\n", - " 'involved, '\n", - " 'and '\n", - " 'the '\n", - " 'metrics '\n", - " 'used '\n", - " 'to '\n", - " 'quantify '\n", - " 'the '\n", - " 'resources '\n", - " 'needed '\n", - " 'to '\n", - " 'handle '\n", - " 'the '\n", - " 'indexing '\n", - " 'workload. '\n", - " 'We '\n", - " 'believe '\n", - " 'that '\n", - " 'such '\n", - " 'an '\n", - " 'autoscaling '\n", - " 'mechanism '\n", - " 'is '\n", - " 'crucial '\n", - " 'to '\n", - " 'reduce '\n", - " 'the '\n", - " 'operational '\n", - " 'overhead '\n", - " 'of '\n", - " 'an '\n", - " 'Elasticsearch '\n", - " 'cluster '\n", - " 'for '\n", - " 'the '\n", - " 'users '\n", - " 'by '\n", - " 'automatically '\n", - " 'increasing '\n", - " 'the '\n", - " 'available '\n", - " 'resources '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'when '\n", - " 'necessary. '\n", - " 'Furthermore, '\n", - " 'it '\n", - " 'leads '\n", - " 'to '\n", - " 'cost '\n", - " 'reduction '\n", - " 'by '\n", - " 'scaling '\n", - " 'down '\n", - " 'the '\n", - " 'cluster '\n", - " 'when '\n", - " 'the '\n", - " 'available '\n", - " 'resources '\n", - " 'in '\n", - " 'the '\n", - " 'cluster '\n", - " 'are '\n", - " 'not '\n", - " 'required '\n", - " 'anymore. '\n", - " 'Ready '\n", - " 'to '\n", - " 'try '\n", - " 'this '\n", - " 'out '\n", - " 'on '\n", - " 'your '\n", - " 'own? '\n", - " 'Start '\n", - " 'a '\n", - " 'free '\n", - " 'trial '\n", - " '. '\n", - " 'Want '\n", - " 'to '\n", - " 'get '\n", - " 'Elastic '\n", - " 'certified? '\n", - " 'Find '\n", - " 'out '\n", - " 'when '\n", - " 'the '\n", - " 'next '\n", - " 'Elasticsearch '\n", - " 'Engineer '\n", - " 'training '\n", - " 'is '\n", - " 'running! '\n", - " 'Pooya '\n", - " 'Salehi '\n", - " 'Henning '\n", - " 'Andersen '\n", - " 'Francisco '\n", - " 'Fernández '\n", - " 'Castaño '\n", - " '11 '\n", - " 'min '\n", - " 'read '\n", - " '29 '\n", - " 'July '\n", - " '2024 '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless '\n", - " 'Share '\n", - " 'Twitter '\n", - " 'Facebook '\n", - " 'LinkedIn '\n", - " 'Recommended '\n", - " 'Articles '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless '\n", - " '• '\n", - " '15 '\n", - " 'May '\n", - " '2024 '\n", - " 'Building '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless '\n", - " 'Explore '\n", - " 'the '\n", - " 'architectural '\n", - " 'decisions '\n", - " 'we '\n", - " 'made '\n", - " 'along '\n", - " 'the '\n", - " 'journey '\n", - " 'of '\n", - " 'building '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless. '\n", - " 'Jason '\n", - " 'Tedor '\n", - " 'Pooya '\n", - " 'Salehi '\n", - " 'Henning '\n", - " 'Andersen '\n", - " 'Francisco '\n", - " 'Fernández '\n", - " 'Castaño '\n", - " '11 '\n", - " 'min '\n", - " 'read '\n", - " '29 '\n", - " 'July '\n", - " '2024 '\n", - " 'Elastic '\n", - " 'Cloud '\n", - " 'Serverless '\n", - " 'Share '\n", - " 'Twitter '\n", - " 'Facebook '\n", - " 'LinkedIn '\n", - " 'Jump '\n", - " 'to '\n", - " 'Ingest '\n", - " 'autoscaling '\n", - " 'overview '\n", - " 'Metrics '\n", - " 'Ingestion '\n", - " 'load '\n", - " 'Memory '\n", - " 'Scaling '\n", - " 'the '\n", - " 'cluster '\n", - " 'Show '\n", - " 'more '\n", - " 'Sitemap '\n", - " 'RSS '\n", - " 'Feed '\n", - " 'Search '\n", - " 'Labs '\n", - " 'Repo '\n", - " 'Elastic.co '\n", - " '©2024. '\n", - " 'Elasticsearch '\n", - " 'B.V. '\n", - " 'All '\n", - " 'Rights '\n", - " 'Reserved.'}],\n", - " 'inference_id': 'my-elser-model',\n", - " 'model_settings': {'task_type': 'sparse_embedding'}}},\n", - " 'title': 'Elasticsearch ingest autoscaling — '\n", - " 'Search Labs',\n", - " 'url': 'https://www.elastic.co/search-labs/blog/elasticsearch-ingest-autoscaling',\n", - " 'url_host': 'www.elastic.co',\n", - " 'url_path': '/search-labs/blog/elasticsearch-ingest-autoscaling',\n", - " 'url_path_dir1': 'search-labs',\n", - " 'url_path_dir2': 'blog',\n", - " 'url_path_dir3': 'elasticsearch-ingest-autoscaling',\n", - " 'url_port': 443,\n", - " 'url_scheme': 'https'}}],\n", - " 'max_score': 1.2861483,\n", - " 'total': {'relation': 'eq', 'value': 228}},\n", - " 'timed_out': False,\n", - " 'took': 2}\n" - ] - } - ] - } - ] -} \ No newline at end of file