diff --git a/.env.sample b/.env.sample index ee58dcf2..44517fd1 100644 --- a/.env.sample +++ b/.env.sample @@ -37,8 +37,7 @@ OLLAMA_EMBED_MODEL=nomic-embed-text OLLAMA_EMBEDDING_COLUMN=embedding_nomic # Needed for GitHub Models: GITHUB_TOKEN=YOUR-GITHUB-TOKEN -GITHUB_BASE_URL=https://models.inference.ai.azure.com -GITHUB_MODEL=gpt-4o -GITHUB_EMBED_MODEL=text-embedding-3-large +GITHUB_MODEL=openai/gpt-4o +GITHUB_EMBED_MODEL=openai/text-embedding-3-large GITHUB_EMBED_DIMENSIONS=1024 GITHUB_EMBEDDING_COLUMN=embedding_3l diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py index bd7bc4b4..2715819e 100644 --- a/src/backend/fastapi_app/dependencies.py +++ b/src/backend/fastapi_app/dependencies.py @@ -53,7 +53,7 @@ async def common_parameters(): embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic" elif OPENAI_EMBED_HOST == "github": openai_embed_deployment = None - openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large" + openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large" openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024)) embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l" else: @@ -70,7 +70,7 @@ async def common_parameters(): openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text" elif OPENAI_CHAT_HOST == "github": openai_chat_deployment = None - openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o" + openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o" else: openai_chat_deployment = None openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo" diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py index af76229d..b704dc9d 100644 --- a/src/backend/fastapi_app/openai_clients.py +++ b/src/backend/fastapi_app/openai_clients.py @@ -54,11 +54,10 @@ async def create_openai_chat_client( ) elif OPENAI_CHAT_HOST == "github": logger.info("Setting up OpenAI client for chat completions using GitHub Models") - github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") - github_model = os.getenv("GITHUB_MODEL", "gpt-4o") - logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}") + github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o") + logger.info(f"Using GitHub Models with model: {github_model}") openai_chat_client = openai.AsyncOpenAI( - base_url=github_base_url, + base_url="https://models.github.ai/inference", api_key=os.getenv("GITHUB_TOKEN"), ) else: @@ -114,11 +113,10 @@ async def create_openai_embed_client( ) elif OPENAI_EMBED_HOST == "github": logger.info("Setting up OpenAI client for embeddings using GitHub Models") - github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com") - github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small") - logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}") + github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small") + logger.info(f"Using GitHub Models with embedding model: {github_embed_model}") openai_embed_client = openai.AsyncOpenAI( - base_url=github_base_url, + base_url="https://models.github.ai/inference", api_key=os.getenv("GITHUB_TOKEN"), ) else: diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py index ecac8759..47caba26 100644 --- a/tests/test_openai_clients.py +++ b/tests/test_openai_clients.py @@ -1,5 +1,6 @@ import pytest +from fastapi_app.dependencies import common_parameters from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client from tests.data import test_data @@ -22,3 +23,44 @@ async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chat model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}] ) assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]." + + +@pytest.mark.asyncio +async def test_github_models_configuration(monkeypatch): + """Test that GitHub Models uses the correct URLs and model names.""" + # Set up environment for GitHub Models + monkeypatch.setenv("OPENAI_CHAT_HOST", "github") + monkeypatch.setenv("OPENAI_EMBED_HOST", "github") + monkeypatch.setenv("GITHUB_TOKEN", "fake-token") + # Don't set GITHUB_MODEL to test defaults + + # Test chat client configuration + chat_client = await create_openai_chat_client(None) + assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference" + assert chat_client.api_key == "fake-token" + + # Test embed client configuration + embed_client = await create_openai_embed_client(None) + assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference" + assert embed_client.api_key == "fake-token" + + # Test that dependencies use correct defaults + context = await common_parameters() + assert context.openai_chat_model == "openai/gpt-4o" + assert context.openai_embed_model == "openai/text-embedding-3-large" + + +@pytest.mark.asyncio +async def test_github_models_with_custom_values(monkeypatch): + """Test that GitHub Models respects custom environment values.""" + # Set up environment for GitHub Models with custom values + monkeypatch.setenv("OPENAI_CHAT_HOST", "github") + monkeypatch.setenv("OPENAI_EMBED_HOST", "github") + monkeypatch.setenv("GITHUB_TOKEN", "fake-token") + monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4") + monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002") + + # Test that dependencies use custom values + context = await common_parameters() + assert context.openai_chat_model == "openai/gpt-4" + assert context.openai_embed_model == "openai/text-embedding-ada-002"