oracle-devrel · ttscoff · Oct 29, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/nvidia-nemo-oke/images/access-cluster.png b/nvidia-nemo-oke/images/access-cluster.png
diff --git a/nvidia-nemo-oke/images/create-oke-cluster.png b/nvidia-nemo-oke/images/create-oke-cluster.png
diff --git a/nvidia-nemo-oke/images/generate-ngc-api-key.png b/nvidia-nemo-oke/images/generate-ngc-api-key.png
diff --git a/nvidia-nemo-oke/images/jupyter-db-connection-setup.png b/nvidia-nemo-oke/images/jupyter-db-connection-setup.png
diff --git a/nvidia-nemo-oke/images/jupyter-db-connection-success.png b/nvidia-nemo-oke/images/jupyter-db-connection-success.png
diff --git a/nvidia-nemo-oke/images/jupyter-input-api-key.png b/nvidia-nemo-oke/images/jupyter-input-api-key.png
diff --git a/nvidia-nemo-oke/images/jupyter-install-oracledb.png b/nvidia-nemo-oke/images/jupyter-install-oracledb.png
diff --git a/nvidia-nemo-oke/images/jupyter-run-all-cells.png b/nvidia-nemo-oke/images/jupyter-run-all-cells.png
diff --git a/nvidia-nemo-oke/images/jupyter-upload-files.png b/nvidia-nemo-oke/images/jupyter-upload-files.png
diff --git a/nvidia-nemo-oke/readme.md b/nvidia-nemo-oke/readme.md
diff --git a/nvidia-nemo-oke/sample-notebooks/example_pdf.pdf b/nvidia-nemo-oke/sample-notebooks/example_pdf.pdf
diff --git a/nvidia-nemo-oke/sample-notebooks/reranking_23ai_clean.ipynb b/nvidia-nemo-oke/sample-notebooks/reranking_23ai_clean.ipynb
diff --git a/nvidia-nemo-oke/sample-notebooks/text_embedding_23ai_clean.ipynb b/nvidia-nemo-oke/sample-notebooks/text_embedding_23ai_clean.ipynb
@@ -0,0 +1,395 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "32c6cd3c-a339-4b67-b041-8bf8517a7dbb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importing all the needed packages\n",
+    "import os\n",
+    "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "import oracledb\n",
+    "from langchain_community.document_loaders import PyPDFLoader\n",
+    "from langchain_community.vectorstores.oraclevs import OracleVS\n",
+    "from langchain_community.vectorstores.utils import DistanceStrategy\n",
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c1308beb-9a6a-40fb-bc29-0a7675532b44",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set the NVIDIA API key as an environment variable\n",
+    "os.environ[\"NVIDIA_API_KEY\"] = \"<your nvidia key starting with nvapi**** here>\" \n",
+    "# Initialize the LLM (Large Language Model) with the specified model\n",
+    "llm = ChatNVIDIA(model=\"meta/llama3-8b-instruct\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "613c3945-2e60-4e63-b7c6-9448ec98c577",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a chat prompt template with a system message and a user message\n",
+    "prompt = ChatPromptTemplate.from_messages([\n",
+    "    (\"system\", (\n",
+    "        \"You are a helpful and friendly AI!\"\n",
+    "        \"Your responses should be concise and no longer than two sentences.\"\n",
+    "        \"Say you don't know if you don't have this information.\"\n",
+    "    )),\n",
+    "    (\"user\", \"{question}\")\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1773cd2f-1e28-4002-bd3d-6db55fb03dfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Chain the prompt, LLM, and output parser together\n",
+    "chain = prompt | llm | StrOutputParser()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "12143a42-8e20-40d4-b8bd-2dc5f29103aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "A CPU (Central Processing Unit) is the brain of your computer, handling general computing tasks, executing instructions, and performing calculations. A GPU (Graphics Processing Unit) is designed specifically for handling graphics and computationally intensive tasks, like gaming, video editing, and scientific simulations, with many cores performing parallel processing.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example questions to invoke the LLM chain\n",
+    "print(chain.invoke({\"question\": \"What's the difference between a GPU and a CPU?\"}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "fe31329f-9f95-4d8f-8737-d61a7ea3e251",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I'm happy to help! The \"A\" in NVIDIA A100 likely stands for \"Accelerated\", which refers to the card's enhanced computing capabilities.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example questions to invoke the LLM chain\n",
+    "print(chain.invoke({\"question\": \"What does the A in the NVIDIA A100 stand for?\"}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b77aa0fc-6d7e-4516-bf2c-2ec046263dc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "I'm not familiar with the NVIDIA H200, could you provide more context or information about it?\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example questions to invoke the LLM chain\n",
+    "print(chain.invoke({\"question\": \"How much memory does the NVIDIA H200 have?\"}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d7a94eb5-4ef9-426f-bc36-4418c7cf2518",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The database user name is: vector\n",
+      "Database connection information is: localhost:1521/freepdb1\n",
+      "Connection successful!\n"
+     ]
+    }
+   ],
+   "source": [
+    "## # Database connection setup\n",
+    "username = \"<your username here>\"\n",
+    "password = \"<your password here>\"\n",
+    "host=\"<IP of your host here>\"\n",
+    "port=\"<the port that you are using here>\"\n",
+    "service_name=\"<service name here>\"\n",
+    "dsn=host+\":\"+port+\"/\"+service_name\n",
+    "\n",
+    "print(\"The database user name is:\", username)\n",
+    "print(\"Database connection information is:\", dsn)\n",
+    "\n",
+    "## Connect to the database\n",
+    "try:\n",
+    "    conn23c = oracledb.connect(user=username, password=password, dsn=dsn)\n",
+    "    print(\"Connection successful!\")\n",
+    "except oracledb.DatabaseError as e:\n",
+    "    error, = e.args\n",
+    "    print(f\"Connection failed. Error code: {error.code}\")\n",
+    "    print(f\"Error message: {error.message}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "cee3c7c3-15e7-40ce-9c91-b98dbb44e8a2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/myenv_nemo/lib/python3.12/site-packages/langchain_nvidia_ai_endpoints/_common.py:486: UserWarning: Found nvidia/nv-embedqa-e5-v5 in available_models, but type is unknown and inference may fail.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "## Initialize an embedding model for query embedding\n",
+    "embedding_model = NVIDIAEmbeddings(model=\"nvidia/nv-embedqa-e5-v5\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "b74d98e5-f993-42a7-841e-77271cef322b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.0251007080078125,\n",
+       " -0.038055419921875,\n",
+       " 0.035980224609375,\n",
+       " -0.061309814453125,\n",
+       " 0.056396484375,\n",
+       " -0.001224517822265625,\n",
+       " 0.01220703125,\n",
+       " -0.04010009765625,\n",
+       " -0.0258941650390625,\n",
+       " -0.029815673828125]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "## Create an embedding vector for a specific query\n",
+    "embedding_model.embed_query(\"How much memory does the NVIDIA H200 have?\")[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "128844b0-d715-4fb1-9664-0b31f949da82",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Document(metadata={'source': 'https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf', 'page': 0}, page_content='NVIDIA H200 Tensor Core GPU\\u2002|\\u2002Datasheet\\u2002|\\u2002 1NVIDIA H200 Tensor Core GPU\\nSupercharging AI and HPC workloads.\\nHigher Performance With Larger, Faster Memory\\nThe NVIDIA H200 Tensor Core GPU supercharges generative AI and high-\\nperformance computing (HPC) workloads with game-changing performance  \\nand memory capabilities. \\nBased on the NVIDIA Hopper™ architecture , the NVIDIA H200 is the first GPU to \\noffer 141 gigabytes (GB) of HBM3e memory at 4.8 terabytes per second (TB/s)—\\nthat’s nearly double the capacity of the NVIDIA H100 Tensor Core GPU  with \\n1.4X more memory bandwidth. The H200’s larger and faster memory accelerates \\ngenerative AI and large language models, while advancing scientific computing for \\nHPC workloads with better energy efficiency and lower total cost of ownership. \\nUnlock Insights With High-Performance LLM Inference\\nIn the ever-evolving landscape of AI, businesses rely on large language models to \\naddress a diverse range of inference needs. An AI inference  accelerator must deliver the \\nhighest throughput at the lowest TCO when deployed at scale for a massive user base. \\nThe H200 doubles inference performance compared to H100 GPUs when handling \\nlarge language models such as Llama2 70B.\\n.\\nPreliminary specifications. May be subject to change.\\nLlama2 13B: ISL 128, OSL 2K | Throughput | H100 SXM 1x GPU BS 64 | H200 SXM 1x GPU BS 128\\nGPT-3 175B: ISL 80, OSL 200 | x8 H100 SXM GPUs BS 64 | x8 H200 SXM GPUs BS 128\\nLlama2 70B: ISL 2K, OSL 128 | Throughput | H100 SXM 1x GPU BS 8 | H200 SXM 1x GPU BS 32.Key Features\\n >141GB of HBM3e GPU memory\\n >4.8TB/s of memory bandwidth\\n >4 petaFLOPS of FP8 performance\\n >2X LLM inference performance\\n >110X HPC performance\\nDatasheet')"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load a PDF document from a URL\n",
+    "loader = PyPDFLoader(\"https://nvdam.widen.net/content/udc6mzrk7a/original/hpc-datasheet-sc23-h200-datasheet-3002446.pdf\")\n",
+    "document = loader.load()\n",
+    "document[0]  # Print the first page of the document"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "505690c9-eba3-4aa8-89d3-c24048c2db50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a text splitter to chunk the document into smaller pieces\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    chunk_size=500,\n",
+    "    chunk_overlap=100,\n",
+    "    separators=[\"\\n\\n\", \"\\n\", \".\", \";\", \",\", \" \", \"\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "dcd1e24e-e171-47cf-b291-c1df5d96f6fc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of chunks from the document: 16\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Split the document into chunks\n",
+    "document_chunks = text_splitter.split_documents(document)\n",
+    "print(\"Number of chunks from the document:\", len(document_chunks))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "a08329bc-7289-4557-8ffc-3e181d7b9dbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract text (page content) from the document chunks\n",
+    "page_contents = [doc.page_content for doc in document_chunks]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "d4cccdda-383b-480b-9cf8-abd8c8f314a6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[-0.0394287109375,\n",
+       " -0.03741455078125,\n",
+       " 0.06634521484375,\n",
+       " -0.0518798828125,\n",
+       " 0.08477783203125,\n",
+       " -0.0224456787109375,\n",
+       " 0.02484130859375,\n",
+       " -0.0247802734375,\n",
+       " -0.01496124267578125,\n",
+       " -0.005344390869140625]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Create vector embeddings from the document\n",
+    "embedding_model.embed_documents(page_contents)[0][:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "b47aa9ee-9056-4813-ab07-a96e6d355098",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize a OracleVS vector store to store the document embeddings in oracle 23ai\n",
+    "vector_store = OracleVS.from_documents(\n",
+    "    document_chunks,\n",
+    "    embedding_model,\n",
+    "    client=conn23c,\n",
+    "    table_name=\"MY_DEM04\",\n",
+    "    distance_strategy=DistanceStrategy.DOT_PRODUCT,\n",
+    "    #tablespace=\"my_tablespace\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "a6ef58af-ea62-4e28-b468-552798fb6584",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The NVIDIA H200 has 141 gigabytes (GB) of HBM3e memory.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a new chat prompt template for the AI with context awareness\n",
+    "prompt = ChatPromptTemplate.from_messages([\n",
+    "    (\"system\", \n",
+    "        \"You are a helpful and friendly AI!\"\n",
+    "        \"Your responses should be concise and no longer than two sentences.\"\n",
+    "        \"Do not hallucinate. Say you don't know if you don't have this information.\"\n",
+    "        # \"Answer the question using only the context\"\n",
+    "        \"\\n\\nQuestion:{question}\\n\\nContext:{context}\"\n",
+    "    ),\n",
+    "    (\"user\", \"{question}\")\n",
+    "])\n",
+    "# Create a chain that retrieves context from the vector store and answers questions\n",
+    "chain = (\n",
+    "    {\n",
+    "        \"context\": vector_store.as_retriever(),\n",
+    "        \"question\": RunnablePassthrough()\n",
+    "    }\n",
+    "    | prompt\n",
+    "    | llm\n",
+    "    | StrOutputParser()\n",
+    ")\n",
+    "# Invoke the chain with specific questions, using the retrieved context\n",
+    "print(chain.invoke(\"How much memory does the NVIDIA H200 have?\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}