Skip to content

Commit 85b585f

Browse files
add pinecone kb
1 parent df57725 commit 85b585f

File tree

4 files changed

+147
-10
lines changed

4 files changed

+147
-10
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Unlike traditional AI assistants that forget conversations after each session, S
6363
- Conversational fact search powered by Zep
6464
- X (Twitter) search using Grok
6565
- Conversational message history using MongoDB (on-prem or hosted)
66-
- Knowledge Base search via Pinecone, Cohere, and OpenAI
66+
- Knowledge Base search via Pinecone
6767
- Comprehensive reasoning combining multiple data sources
6868

6969
🛠️ **Data Processing Tools**

poetry.lock

Lines changed: 37 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ zep-cloud = "^2.3.1"
2525
requests = "^2.32.3"
2626
pandas = "^2.2.3"
2727
qdrant-client = "^1.13.2"
28+
pinecone = "^6.0.1"
2829

2930
[build-system]
3031
requires = ["poetry-core>=1.0.0"]

solana_agent/ai.py

Lines changed: 108 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import datetime
33
import json
44
from typing import AsyncGenerator, List, Literal, Optional, Dict, Any, Callable
5+
import uuid
56
from pydantic import BaseModel
67
from motor.motor_asyncio import AsyncIOMotorClient
78
from openai import OpenAI
@@ -14,6 +15,7 @@
1415
from zep_cloud.client import Zep
1516
from zep_cloud.types import Message, RoleType
1617
import pandas as pd
18+
from pinecone import Pinecone
1719

1820

1921
class EventHandler(AssistantEventHandler):
@@ -73,8 +75,13 @@ def __init__(
7375
perplexity_api_key: str = None,
7476
grok_api_key: str = None,
7577
gemini_api_key: str = None,
78+
pinecone_api_key: str = None,
79+
pinecone_index_name: str = None,
7680
code_interpreter: bool = True,
77-
model: Literal["gpt-4o-mini", "gpt-4o"] = "gpt-4o-mini",
81+
openai_assistant_model: Literal["gpt-4o-mini",
82+
"gpt-4o"] = "gpt-4o-mini",
83+
openai_embedding_model: Literal["text-embedding-3-small",
84+
"text-embedding-3-large"] = "text-embedding-3-small"
7885
):
7986
"""Initialize a new AI assistant with memory and tool integration capabilities.
8087
@@ -87,8 +94,11 @@ def __init__(
8794
perplexity_api_key (str, optional): API key for Perplexity search. Defaults to None
8895
grok_api_key (str, optional): API key for X/Twitter search via Grok. Defaults to None
8996
gemini_api_key (str, optional): API key for Google Gemini. Defaults to None
97+
pinecone_api_key (str, optional): API key for Pinecone. Defaults to None
98+
pinecone_index_name (str, optional): Pinecone index name. Defaults to None
9099
code_interpreter (bool, optional): Enable code interpretation. Defaults to True
91-
model (Literal["gpt-4o-mini", "gpt-4o"], optional): AI model to use. Defaults to "gpt-4o-mini"
100+
openai_assistant_model (Literal["gpt-4o-mini", "gpt-4o"], optional): OpenAI model for assistant. Defaults to "gpt-4o-mini"
101+
openai_embedding_model (Literal["text-embedding-3-small", "text-embedding-3-large"], optional): OpenAI model for text embedding. Defaults to "text-embedding-3-small"
92102
93103
Example:
94104
```python
@@ -99,11 +109,18 @@ def __init__(
99109
database=MongoDatabase("mongodb://localhost", "ai_db"),
100110
)
101111
```
112+
Notes:
113+
- Requires valid OpenAI API key for core functionality
114+
- Database instance for storing messages and threads
115+
- Optional integrations for Zep, Perplexity, Grok, Gemini, Pinecone, and Cohere
116+
- Supports code interpretation and custom tool functions
117+
- You must create the Pinecone index in the dashboard before using it
102118
"""
103119
self._client = OpenAI(api_key=openai_api_key)
104120
self._name = name
105121
self._instructions = instructions
106-
self._model = model
122+
self._openai_assistant_model = openai_assistant_model
123+
self._openai_embedding_model = openai_embedding_model
107124
self._tools = [{"type": "code_interpreter"}
108125
] if code_interpreter else []
109126
self._tool_handlers = {}
@@ -121,6 +138,11 @@ def __init__(
121138
self._perplexity_api_key = perplexity_api_key
122139
self._grok_api_key = grok_api_key
123140
self._gemini_api_key = gemini_api_key
141+
self._pinecone = Pinecone(
142+
api_key=pinecone_api_key) if pinecone_api_key else None
143+
self._pinecone_index_name = pinecone_index_name if pinecone_index_name else None
144+
self._pinecone_index = self._pinecone.Index(
145+
self._pinecone_index_name) if self._pinecone else None
124146

125147
async def __aenter__(self):
126148
assistants = self._client.beta.assistants.list()
@@ -134,7 +156,7 @@ async def __aenter__(self):
134156
name=self.name,
135157
instructions=self._instructions,
136158
tools=self._tools,
137-
model=self._model,
159+
model=self._openai_assistant_model,
138160
).id
139161
await self._database.delete_all_threads()
140162

@@ -200,6 +222,81 @@ def csv_to_json(self, file_path: str) -> str:
200222
records = df.to_dict(orient="records")
201223
return json.dumps(records)
202224

225+
# search kb tool - has to be sync
226+
def search_kb(self, query: str, limit: int = 10) -> str:
227+
"""Search Pinecone knowledge base using OpenAI embeddings.
228+
229+
Args:
230+
query (str): Search query to find relevant documents
231+
limit (int, optional): Maximum number of results to return. Defaults to 10.
232+
233+
Returns:
234+
str: JSON string of matched documents or error message
235+
236+
Example:
237+
```python
238+
results = ai.search_kb("machine learning basics", limit=5)
239+
# Returns: '[{"title": "ML Intro", "content": "..."}]'
240+
```
241+
242+
Note:
243+
- Requires configured Pinecone index
244+
- Uses OpenAI embeddings for semantic search
245+
- Returns JSON-serialized Pinecone match metadata results
246+
- Returns error message string if search fails
247+
"""
248+
try:
249+
response = self._client.embeddings.create(
250+
input=query,
251+
model=self._openai_embedding_model,
252+
)
253+
search_results = self._pinecone_index.query(
254+
vector=response.data[0].embedding, top_k=limit, include_metadata=True, include_values=False)
255+
matches = search_results.matches
256+
metadata = [match.metadata for match in matches]
257+
return json.dumps(metadata)
258+
except Exception as e:
259+
return f"Failed to search KB. Error: {e}"
260+
261+
# add document to kb tool - has to be sync
262+
def add_document_to_kb(self, document: Dict[str, str]):
263+
"""Add a document to the Pinecone knowledge base with OpenAI embeddings.
264+
265+
Args:
266+
document (Dict[str, str]): Document to add, with string fields as values
267+
268+
Example:
269+
```python
270+
ai.add_document_to_kb({
271+
"title": "AI Basics",
272+
"content": "Introduction to artificial intelligence...",
273+
"author": "John Doe"
274+
})
275+
```
276+
277+
Note:
278+
- Requires Pinecone index to be configured
279+
- Uses OpenAI embeddings API
280+
- Document values must be strings
281+
- Automatically generates UUID for document
282+
"""
283+
values: List[str] = []
284+
for _, v in document.items():
285+
values.append(v)
286+
response = self._client.embeddings.create(
287+
input=values,
288+
model=self._openai_embedding_model,
289+
)
290+
self._pinecone_index.upsert(
291+
vectors=[
292+
{
293+
"id": uuid.uuid4().hex,
294+
"values": response.data[0].embedding,
295+
"metadata": document,
296+
}
297+
]
298+
)
299+
203300
# summarize tool - has to be sync
204301
def summarize(
205302
self, text: str, model: Literal["gemini-2.0-flash", "gemini-1.5-pro"] = "gemini-1.5-pro"
@@ -368,6 +465,7 @@ def reason(
368465
use_perplexity: bool = True,
369466
use_grok: bool = True,
370467
use_facts: bool = True,
468+
use_kb=True,
371469
perplexity_model: Literal[
372470
"sonar", "sonar-pro", "sonar-reasoning-pro", "sonar-reasoning"
373471
] = "sonar",
@@ -382,6 +480,7 @@ def reason(
382480
use_perplexity (bool, optional): Include Perplexity search results. Defaults to True
383481
use_grok (bool, optional): Include X/Twitter search results. Defaults to True
384482
use_facts (bool, optional): Include stored conversation facts. Defaults to True
483+
use_kb (bool, optional): Include Pinecone knowledge base search results. Defaults to True
385484
perplexity_model (Literal, optional): Perplexity model to use. Defaults to "sonar"
386485
openai_model (Literal, optional): OpenAI model for reasoning. Defaults to "o3-mini"
387486
grok_model (Literal, optional): Grok model for X search. Defaults to "grok-beta"
@@ -394,9 +493,6 @@ def reason(
394493
result = ai.reason(
395494
user_id="user123",
396495
query="What are the latest AI trends?",
397-
use_perplexity=True,
398-
use_grok=True,
399-
use_facts=True
400496
)
401497
# Returns: "Based on multiple sources: [comprehensive answer]"
402498
```
@@ -421,6 +517,10 @@ def reason(
421517
x_search_results = self.search_x(query, grok_model)
422518
else:
423519
x_search_results = ""
520+
if use_kb:
521+
kb_results = self.search_kb(query)
522+
else:
523+
kb_results = ""
424524

425525
response = self._client.chat.completions.create(
426526
model=openai_model,
@@ -431,7 +531,7 @@ def reason(
431531
},
432532
{
433533
"role": "user",
434-
"content": f"Query: {query}, Facts: {facts}, Internet Search Results: {search_results}, X Search Results: {x_search_results}",
534+
"content": f"Query: {query}, Facts: {facts}, KB Results: {kb_results}, Internet Search Results: {search_results}, X Search Results: {x_search_results}",
435535
},
436536
],
437537
)

0 commit comments

Comments
 (0)