Skip to content

Commit 36fd1b3

Browse files
committed
cleanup
1 parent 760bc6c commit 36fd1b3

File tree

3 files changed

+74
-269
lines changed

3 files changed

+74
-269
lines changed

chatlas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ._tokens import token_usage
2323
from ._tools import Tool, ToolRejectError
2424
from ._turn import Turn
25-
from ._vllm import ChatVLLM
25+
from ._provider_vllm import ChatVLLM
2626

2727
try:
2828
from ._version import version as __version__

chatlas/_provider_vllm.py

Lines changed: 73 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1,178 +1,129 @@
1-
from __future__ import annotations
2-
31
import os
42
from typing import TYPE_CHECKING, Optional
53

4+
import requests
5+
66
from ._chat import Chat
77
from ._provider_openai import OpenAIProvider
8-
from ._utils import MISSING, MISSING_TYPE, is_testing
8+
from ._turn import Turn
99

1010
if TYPE_CHECKING:
11-
from ._provider_openai import ChatCompletion
12-
from .types.openai import ChatClientArgs, SubmitInputArgs
11+
from openai.types.chat import ChatCompletionToolParam
12+
13+
from .types.openai import ChatClientArgs
1314

1415

15-
def ChatVllm(
16+
def ChatVLLM(
1617
*,
1718
base_url: str,
1819
system_prompt: Optional[str] = None,
20+
turns: Optional[list[Turn]] = None,
1921
model: Optional[str] = None,
2022
api_key: Optional[str] = None,
21-
seed: Optional[int] | MISSING_TYPE = MISSING,
23+
seed: Optional[int] = None,
2224
kwargs: Optional["ChatClientArgs"] = None,
23-
) -> Chat["SubmitInputArgs", ChatCompletion]:
25+
) -> Chat:
2426
"""
25-
Chat with a model hosted by vLLM.
27+
Chat with a model hosted by vLLM
2628
2729
[vLLM](https://docs.vllm.ai/en/latest/) is an open source library that
2830
provides an efficient and convenient LLMs model server. You can use
29-
`ChatVllm()` to connect to endpoints powered by vLLM.
31+
`ChatVLLM()` to connect to endpoints powered by vLLM.
3032
3133
Prerequisites
3234
-------------
3335
3436
::: {.callout-note}
35-
## vLLM Server
37+
## vLLM runtime
3638
37-
You need access to a running vLLM server instance. vLLM provides
38-
OpenAI-compatible API endpoints, so this function works with any
39-
vLLM deployment that exposes the `/v1/chat/completions` endpoint.
39+
`ChatVLLM` requires a vLLM server to be running somewhere (either on your
40+
machine or a remote server). If you want to run a vLLM server locally, see
41+
the [vLLM documentation](https://docs.vllm.ai/en/v0.5.3/getting_started/quickstart.html).
4042
:::
4143
42-
Examples
43-
--------
44+
::: {.callout-note}
45+
## Python requirements
4446
45-
```python
46-
import os
47-
from chatlas import ChatVllm
47+
`ChatVLLM` requires the `openai` package (e.g., `pip install openai`).
48+
:::
4849
49-
# Connect to a vLLM server
50-
chat = ChatVllm(
51-
base_url="http://localhost:8000/v1",
52-
model="meta-llama/Llama-2-7b-chat-hf",
53-
api_key=os.getenv("VLLM_API_KEY"), # Optional, depends on server config
54-
)
55-
chat.chat("What is the capital of France?")
56-
```
5750
5851
Parameters
5952
----------
6053
base_url
61-
The base URL of the vLLM server endpoint. This should include the
62-
`/v1` path if the server follows OpenAI API conventions.
63-
system_prompt
6454
A system prompt to set the behavior of the assistant.
55+
system_prompt
56+
Optional system prompt to prepend to conversation.
57+
turns
58+
A list of turns to start the chat with (i.e., continuing a previous
59+
conversation). If not provided, the conversation begins from scratch. Do
60+
not provide non-`None` values for both `turns` and `system_prompt`. Each
61+
message in the list should be a dictionary with at least `role` (usually
62+
`system`, `user`, or `assistant`, but `tool` is also possible). Normally
63+
there is also a `content` field, which is a string.
6564
model
66-
The model to use for the chat. If None, you may need to specify
67-
the model name that's loaded on your vLLM server.
68-
api_key
69-
The API key to use for authentication. Some vLLM deployments may
70-
not require authentication. You can set the `VLLM_API_KEY`
71-
environment variable instead of passing it directly.
65+
Model identifier to use.
7266
seed
73-
Optional integer seed that vLLM uses to try and make output more
74-
reproducible.
67+
Random seed for reproducibility.
68+
api_key
69+
API key for authentication. If not provided, the `VLLM_API_KEY` environment
70+
variable will be used.
7571
kwargs
76-
Additional arguments to pass to the `openai.OpenAI()` client constructor.
77-
78-
Returns
79-
-------
80-
Chat
81-
A chat object that retains the state of the conversation.
82-
83-
Note
84-
----
85-
This function is a lightweight wrapper around [](`~chatlas.ChatOpenAI`) with
86-
the defaults tweaked for vLLM endpoints.
87-
88-
Note
89-
----
90-
vLLM servers are OpenAI-compatible, so this provider uses the same underlying
91-
client as OpenAI but configured for your vLLM endpoint. Some advanced OpenAI
92-
features may not be available depending on your vLLM server configuration.
93-
94-
Note
95-
----
96-
Pasting an API key into a chat constructor (e.g., `ChatVllm(api_key="...")`)
97-
is the simplest way to get started, and is fine for interactive use, but is
98-
problematic for code that may be shared with others.
99-
100-
Instead, consider using environment variables or a configuration file to manage
101-
your credentials. One popular way to manage credentials is to use a `.env` file
102-
to store your credentials, and then use the `python-dotenv` package to load them
103-
into your environment.
104-
105-
```shell
106-
pip install python-dotenv
107-
```
108-
109-
```shell
110-
# .env
111-
VLLM_API_KEY=...
112-
```
113-
114-
```python
115-
from chatlas import ChatVllm
116-
from dotenv import load_dotenv
117-
118-
load_dotenv()
119-
chat = ChatVllm(base_url="http://localhost:8000/v1")
120-
chat.console()
121-
```
122-
123-
Another, more general, solution is to load your environment variables into the shell
124-
before starting Python (maybe in a `.bashrc`, `.zshrc`, etc. file):
125-
126-
```shell
127-
export VLLM_API_KEY=...
128-
```
72+
Additional arguments to pass to the LLM client.
73+
74+
Returns:
75+
Chat instance configured for vLLM
12976
"""
130-
if api_key is None:
131-
api_key = os.getenv("VLLM_API_KEY")
13277

133-
if isinstance(seed, MISSING_TYPE):
134-
seed = 1014 if is_testing() else None
78+
if api_key is None:
79+
api_key = get_vllm_key()
13580

13681
if model is None:
137-
raise ValueError(
138-
"Must specify model. vLLM servers can host different models, so you need to "
139-
"specify which one to use. Check your vLLM server's /v1/models endpoint "
140-
"to see available models."
141-
)
82+
models = get_vllm_models(base_url, api_key)
83+
available_models = ", ".join(models)
84+
raise ValueError(f"Must specify model. Available models: {available_models}")
14285

14386
return Chat(
144-
provider=VllmProvider(
145-
api_key=api_key,
146-
model=model,
87+
provider=VLLMProvider(
14788
base_url=base_url,
89+
model=model,
14890
seed=seed,
149-
name="vLLM",
91+
api_key=api_key,
15092
kwargs=kwargs,
15193
),
15294
system_prompt=system_prompt,
15395
)
15496

15597

156-
class VllmProvider(OpenAIProvider):
157-
"""
158-
Provider for vLLM endpoints.
98+
class VLLMProvider(OpenAIProvider):
99+
# Just like OpenAI but no strict
100+
@staticmethod
101+
def _tool_schema_json(
102+
schema: "ChatCompletionToolParam",
103+
) -> "ChatCompletionToolParam":
104+
schema["function"]["strict"] = False
105+
return schema
159106

160-
vLLM is OpenAI-compatible but may have some differences in tool handling
161-
and other advanced features.
162-
"""
163107

164-
def _chat_perform_args(self, *args, **kwargs):
165-
"""
166-
Customize request arguments for vLLM compatibility.
108+
def get_vllm_key() -> str:
109+
key = os.getenv("VLLM_API_KEY", os.getenv("VLLM_KEY"))
110+
if not key:
111+
raise ValueError("VLLM_API_KEY environment variable not set")
112+
return key
113+
114+
115+
def get_vllm_models(base_url: str, api_key: Optional[str] = None) -> list[str]:
116+
if api_key is None:
117+
api_key = get_vllm_key()
118+
119+
headers = {"Authorization": f"Bearer {api_key}"}
120+
response = requests.get(f"{base_url}/v1/models", headers=headers)
121+
response.raise_for_status()
122+
data = response.json()
167123

168-
vLLM may not support all OpenAI features like stream_options,
169-
so we remove potentially unsupported parameters.
170-
"""
171-
# Get the base arguments from OpenAI provider
172-
result = super()._chat_perform_args(*args, **kwargs)
124+
return [model["id"] for model in data["data"]]
173125

174-
# Remove stream_options if present (some vLLM versions don't support it)
175-
if "stream_options" in result:
176-
del result["stream_options"]
177126

178-
return result
127+
# def chat_vllm_test(**kwargs) -> Chat:
128+
# """Create a test chat instance with default parameters."""
129+
# return ChatVLLM(base_url="https://llm.nrp-nautilus.io/", model="llama3", **kwargs)

0 commit comments

Comments
 (0)