Skip to content

Commit 6d4acd5

Browse files
committed
Allow env var to overload the embedding token limit.
1 parent 1350f50 commit 6d4acd5

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

src/embeddings.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,12 @@ pub async fn generate_embeddings(
5050
let bpe = Arc::new(cl100k_base().map_err(|e| ServerError::Tiktoken(e.to_string()))?);
5151

5252
const CONCURRENCY_LIMIT: usize = 8; // Number of concurrent requests
53-
const TOKEN_LIMIT: usize = 8000; // Keep a buffer below the 8192 limit
53+
54+
// Our default model only supports roughly 8k tokens
55+
let token_limit: usize = std::env::var("EMBEDDING_TOKEN_LIMIT")
56+
.ok()
57+
.and_then(|lim| lim.trim().parse().ok())
58+
.unwrap_or(8000);
5459

5560
let results = stream::iter(documents.iter().enumerate())
5661
.map(|(index, doc)| {
@@ -64,12 +69,12 @@ pub async fn generate_embeddings(
6469
// Calculate token count for this document
6570
let token_count = bpe.encode_with_special_tokens(&doc.content).len();
6671

67-
if token_count > TOKEN_LIMIT {
72+
if token_count > token_limit {
6873
// eprintln!(
6974
// " Skipping document {}: Actual tokens ({}) exceed limit ({}). Path: {}",
7075
// index + 1,
7176
// token_count,
72-
// TOKEN_LIMIT,
77+
// token_limit,
7378
// doc.path
7479
// );
7580
// Return Ok(None) to indicate skipping, with 0 tokens processed for this doc

0 commit comments

Comments
 (0)