Skip to content

Commit 12ae99a

Browse files
committed
Dev Env
1 parent 5a81946 commit 12ae99a

File tree

17 files changed

+718
-0
lines changed

17 files changed

+718
-0
lines changed

backend/app.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import boto3
2+
import logging
3+
from typing import Optional
4+
from fastapi import FastAPI, HTTPException, Request
5+
from fastapi.middleware.cors import CORSMiddleware
6+
from pydantic import BaseModel, Field, ValidationError
7+
from langchain_aws import BedrockEmbeddings
8+
from fastapi.responses import JSONResponse
9+
from contextlib import asynccontextmanager
10+
from src.chatbot.config import DATA_DIRECTORY, FAISS_INDEX_PATH, TITAN_MODEL_ID, LLAMA_MODEL_ID, LOG_LEVEL
11+
from src.chatbot.services import FAISSManager, PDFDocumentProcessor, LLMService
12+
13+
# Initialize FastAPI app
14+
@asynccontextmanager
15+
async def lifespan(app: FastAPI):
16+
try:
17+
logger.info("Lifespan event triggered. Automatically running the /create_index endpoint...")
18+
await create_index() # Automatically trigger create_index during startup
19+
except Exception as e:
20+
logger.error(f"Error during lifespan event: {e}", exc_info=True)
21+
yield # Continue with the application lifecycle
22+
23+
app = FastAPI(lifespan=lifespan)
24+
25+
# Add CORS middleware
26+
app.add_middleware(
27+
CORSMiddleware,
28+
allow_origins=["*"], # Adjust this to your specific needs
29+
allow_credentials=True,
30+
allow_methods=["*"],
31+
allow_headers=["*"],
32+
)
33+
34+
# Initialize logging
35+
logging.basicConfig(
36+
level=LOG_LEVEL,
37+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
38+
)
39+
logger = logging.getLogger(__name__)
40+
41+
# Pydantic model for the question input
42+
class QuestionRequest(BaseModel):
43+
question: str = Field(..., json_schema_extra={"example": "What is the new tax laws??"})
44+
aws_access_key_id: Optional[str] = Field(None, json_schema_extra={"example": "your_access_key_id"})
45+
aws_secret_access_key: Optional[str] = Field(None, json_schema_extra={"example": "your_secret_access_key"})
46+
aws_default_region: Optional[str] = Field(None, json_schema_extra={"example": "your_region"})
47+
48+
# Middleware to log requests and responses
49+
@app.middleware("http")
50+
async def log_requests(request: Request, call_next):
51+
logger.info(f"Incoming request: {request.method} {request.url}")
52+
response = await call_next(request)
53+
logger.info(f"Completed request: {request.method} {request.url} - Status code: {response.status_code}")
54+
return response
55+
56+
# Custom error handler for validation errors
57+
@app.exception_handler(ValidationError)
58+
async def validation_exception_handler(request: Request, exc: ValidationError):
59+
logger.error(f"Validation error for request: {request.url} - {exc.errors()}")
60+
return JSONResponse(
61+
status_code=422,
62+
content={
63+
"detail": exc.errors(),
64+
"body": exc.body
65+
},
66+
)
67+
68+
# General exception handler
69+
@app.exception_handler(Exception)
70+
async def general_exception_handler(request: Request, exc: Exception):
71+
logger.error(f"An unexpected error occurred: {exc}", exc_info=True)
72+
return JSONResponse(status_code=500, content={"detail": "An unexpected error occurred"})
73+
74+
# Endpoint to create FAISS index from PDF documents
75+
@app.post("/create_index")
76+
async def create_index():
77+
try:
78+
logger.info("Creating FAISS index...")
79+
80+
# Load and chunk PDF documents
81+
processor = PDFDocumentProcessor(data_directory=DATA_DIRECTORY)
82+
chunked_documents = processor.load_and_chunk_documents()
83+
84+
# Load embeddings and create FAISS index
85+
embeddings = BedrockEmbeddings(model_id=TITAN_MODEL_ID)
86+
faiss_manager = FAISSManager(index_path=FAISS_INDEX_PATH, embeddings=embeddings)
87+
faiss_manager.create_and_save_vector_store(chunked_documents)
88+
89+
return {"message": "FAISS index created successfully."}
90+
except HTTPException as http_exc:
91+
raise http_exc
92+
except Exception as e:
93+
logger.error(f"Error creating FAISS index: {e}", exc_info=True)
94+
raise HTTPException(status_code=500, detail="Error creating FAISS index")
95+
96+
# Question answering endpoint
97+
@app.post("/answer")
98+
async def answer_question(request: QuestionRequest):
99+
try:
100+
logger.info(f"Received question: {request.question}")
101+
102+
# Validate AWS credentials
103+
if request.aws_access_key_id and request.aws_secret_access_key and request.aws_default_region:
104+
logger.info("AWS credentials provided in the request.")
105+
# Initialize Boto3 client using provided credentials
106+
client = boto3.Session(
107+
aws_access_key_id=request.aws_access_key_id,
108+
aws_secret_access_key=request.aws_secret_access_key,
109+
region_name=request.aws_default_region
110+
).client("bedrock-runtime")
111+
else:
112+
# Initialize Boto3 client using environment variables or credentials from AWS CLI
113+
client = boto3.client("bedrock-runtime")
114+
logger.info("Using AWS credentials from environment variables or AWS CLI configuration.")
115+
116+
# Load FAISS index
117+
embeddings = BedrockEmbeddings(model_id=TITAN_MODEL_ID)
118+
faiss_manager = FAISSManager(index_path=FAISS_INDEX_PATH, embeddings=embeddings)
119+
vectorstore_faiss = faiss_manager.load_vector_store()
120+
121+
# Initialize LLM
122+
llm_service = LLMService(model_id=LLAMA_MODEL_ID, client=client)
123+
llm = llm_service.initialize_llm()
124+
125+
# Generate response
126+
response = llm_service.generate_response(llm=llm, vectorstore_faiss=vectorstore_faiss, query=request.question)
127+
return {"answer": response}
128+
129+
except HTTPException as http_exc:
130+
raise http_exc
131+
except Exception as e:
132+
logger.error(f"Error processing question: {e}", exc_info=True)
133+
raise HTTPException(status_code=500, detail="Error processing question")

backend/data/tax-2024.pdf

1.06 MB
Binary file not shown.

backend/env.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
AWS_ACCESS_KEY_ID=
2+
AWS_SECRET_ACCESS_KEY=
3+
AWS_DEFAULT_REGION=us-east-1
4+
LANGCHAIN_API_KEY=

backend/images/aws-cli-1.png

38.7 KB
Loading

backend/pyproject.toml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
[tool.poetry]
2+
name = "Indian-Tax-Advisor" # Replace with your project's name
3+
version = "0.1.0" # Specify your project's version
4+
description = "A FastAPI application with a React frontend" # Short description of your project
5+
authors = ["Mohit Kumar <mohitpanghal12345@gmail.com>"]
6+
license = "MIT" # Specify the license type
7+
8+
[tool.poetry.dependencies]
9+
# FastAPI and its dependencies
10+
fastapi = "^0.115.0"
11+
uvicorn = "^0.31.0"
12+
13+
# Other dependencies
14+
aiohttp = "^3.10.9"
15+
boto3 = "^1.35.34"
16+
sqlalchemy = "^2.0.35"
17+
requests = "^2.32.3"
18+
python-dotenv = "^1.0.1"
19+
pydantic = "^2.9.2"
20+
pytest = "^8.3.3" # Testing framework
21+
22+
# Additional dependencies based on your requirements
23+
aiohappyeyeballs = "2.4.3"
24+
aiosignal = "1.3.1"
25+
attrs = "24.2.0"
26+
botocore = "1.35.34"
27+
certifi = "2024.8.30"
28+
charset-normalizer = "3.3.2"
29+
click = "8.1.7"
30+
colorama = "0.4.6"
31+
dataclasses-json = "0.6.7"
32+
faiss-cpu = "1.8.0.post1"
33+
frozenlist = "1.4.1"
34+
greenlet = "3.1.1"
35+
h11 = "0.14.0"
36+
httpcore = "1.0.6"
37+
httpx = "0.27.2"
38+
idna = "3.10"
39+
iniconfig = "2.0.0"
40+
jmespath = "1.0.1"
41+
jsonpatch = "1.33"
42+
jsonpointer = "3.0.0"
43+
langchain = "0.3.2"
44+
langchain-aws = "0.2.2"
45+
langchain-community = "0.3.1"
46+
langchain-core = "0.3.9"
47+
langchain-text-splitters = "0.3.0"
48+
langsmith = "0.1.131"
49+
marshmallow = "3.22.0"
50+
multidict = "6.1.0"
51+
mypy-extensions = "1.0.0"
52+
numpy = "1.26.4"
53+
orjson = "3.10.7"
54+
packaging = "24.1"
55+
pluggy = "1.5.0"
56+
pyasn1 = "0.6.1"
57+
pydantic-settings = "2.5.2"
58+
pydantic-core = "2.23.4"
59+
pypdf = "5.0.1"
60+
six = "1.16.0"
61+
sniffio = "1.3.1"
62+
tenacity = "8.5.0"
63+
typing-inspect = "0.9.0"
64+
typing_extensions = "4.12.2"
65+
urllib3 = "2.2.3"
66+
yarl = "1.13.1"
67+
68+
[tool.poetry.dev-dependencies]
69+
# Development dependencies for testing
70+
pytest = "^8.3.3"
71+
# Add additional dev dependencies as needed...
72+
73+
[build-system]
74+
requires = ["poetry-core>=1.0.0"] # Specify the minimum version of poetry-core
75+
build-backend = "poetry.core.masonry.api" # Build backend to use

backend/requirements.txt

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
aiohappyeyeballs==2.4.3
2+
aiohttp==3.10.9
3+
aiosignal==1.3.1
4+
annotated-types==0.7.0
5+
anyio==4.6.0
6+
attrs==24.2.0
7+
awscli==1.35.0
8+
boto3==1.35.34
9+
botocore==1.35.34
10+
certifi==2024.8.30
11+
charset-normalizer==3.3.2
12+
click==8.1.7
13+
colorama==0.4.6
14+
dataclasses-json==0.6.7
15+
docutils==0.16
16+
faiss-cpu==1.8.0.post1
17+
fastapi==0.115.0
18+
frozenlist==1.4.1
19+
greenlet==3.1.1
20+
h11==0.14.0
21+
httpcore==1.0.6
22+
httpx==0.27.2
23+
idna==3.10
24+
iniconfig==2.0.0
25+
jmespath==1.0.1
26+
jsonpatch==1.33
27+
jsonpointer==3.0.0
28+
langchain==0.3.2
29+
langchain-aws==0.2.2
30+
langchain-community==0.3.1
31+
langchain-core==0.3.9
32+
langchain-text-splitters==0.3.0
33+
langsmith==0.1.131
34+
marshmallow==3.22.0
35+
multidict==6.1.0
36+
mypy-extensions==1.0.0
37+
numpy==1.26.4
38+
orjson==3.10.7
39+
packaging==24.1
40+
pluggy==1.5.0
41+
pyasn1==0.6.1
42+
pydantic==2.9.2
43+
pydantic-settings==2.5.2
44+
pydantic_core==2.23.4
45+
pypdf==5.0.1
46+
pytest==8.3.3
47+
python-dateutil==2.9.0.post0
48+
python-dotenv==1.0.1
49+
PyYAML==6.0.2
50+
requests==2.32.3
51+
requests-toolbelt==1.0.0
52+
rsa==4.7.2
53+
s3transfer==0.10.2
54+
six==1.16.0
55+
sniffio==1.3.1
56+
SQLAlchemy==2.0.35
57+
starlette==0.38.6
58+
tenacity==8.5.0
59+
typing-inspect==0.9.0
60+
typing_extensions==4.12.2
61+
urllib3==2.2.3
62+
uvicorn==0.31.0
63+
yarl==1.13.1

backend/setup.py

Whitespace-only changes.

backend/src/__init__.py

Whitespace-only changes.

backend/src/chatbot/__init__.py

Whitespace-only changes.

backend/src/chatbot/config.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
import json
3+
from dotenv import load_dotenv
4+
5+
# Load environment variables from .env file
6+
load_dotenv()
7+
8+
# Load configuration from config.json
9+
try:
10+
with open("src/config.json") as config_file:
11+
config = json.load(config_file)
12+
except FileNotFoundError:
13+
raise RuntimeError("Configuration file 'src/config.json' not found.")
14+
15+
# Configuration variables from config.json
16+
DATA_DIRECTORY = config.get("DATA_DIRECTORY")
17+
FAISS_INDEX_PATH = config.get("FAISS_INDEX_PATH")
18+
TITAN_MODEL_ID = config.get("TITAN_MODEL_ID")
19+
LLAMA_MODEL_ID = config.get("LLAMA_MODEL_ID")
20+
CHUNK_SIZE = config.get("CHUNK_SIZE")
21+
CHUNK_OVERLAP = config.get("CHUNK_OVERLAP")
22+
MAX_THREADS = config.get("MAX_THREADS")
23+
LOG_LEVEL = config.get("LOG_LEVEL", "INFO").upper()
24+
25+
# Validate configuration variables
26+
required_configs = [
27+
DATA_DIRECTORY, FAISS_INDEX_PATH, TITAN_MODEL_ID, LLAMA_MODEL_ID,
28+
CHUNK_SIZE, CHUNK_OVERLAP, MAX_THREADS
29+
]
30+
31+
if any(config is None for config in required_configs):
32+
raise ValueError("Missing required configuration in config.json")
33+
34+
# Set LangChain tracing and API key from the config file
35+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
36+
os.environ["LANGCHAIN_PROJECT"] = "Indian-Tax-Advisor"
37+
38+
# Retrieve and validate the API key from the environment variables
39+
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
40+
if langchain_api_key is None:
41+
raise ValueError("LANGCHAIN_API_KEY is not set in the environment variables.")
42+
43+
os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
44+

0 commit comments

Comments
 (0)