Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a3eef1a
feat(ui): add Token Estimator link to footer
HmbleCreator Jun 29, 2025
6247f92
feat: Add token estimator documentation page and simplify footer link…
HmbleCreator Jun 30, 2025
cef47a2
feat: Add token estimator documentation page and simplify footer link…
HmbleCreator Jun 30, 2025
48328ab
feat: use Jinja template for token estimator API docs
HmbleCreator Jun 30, 2025
2ed86f6
Merge branch 'main' into main
filipchristiansen Jun 30, 2025
3f0c1a5
Merge branch 'main' into main
HmbleCreator Jul 1, 2025
98e5c45
Refactor token count API and routing, enforce API-only for /api/token…
HmbleCreator Jul 1, 2025
5c271b5
Merge branch 'main' of https://github.com/HmbleCreator/gitingest
HmbleCreator Jul 1, 2025
41ecea9
Merge branch 'main' into main
filipchristiansen Jul 1, 2025
92b8660
Merge branch 'main' into main
filipchristiansen Jul 2, 2025
e1648c4
Merge branch 'main' into main
HmbleCreator Jul 3, 2025
75daa98
Merge branch 'main' into main
filipchristiansen Jul 3, 2025
5b282b5
Merge branch 'main' into main
HmbleCreator Jul 4, 2025
806d2d5
Merge branch 'main' into main
HmbleCreator Jul 5, 2025
79eb882
Merge branch 'main' into main
HmbleCreator Jul 6, 2025
7852fde
Merge branch 'cyclotruc:main' into main
HmbleCreator Jul 7, 2025
b802ccf
Merge branch 'main' into main
filipchristiansen Jul 10, 2025
e1c5859
Merge branch 'main' into main
HmbleCreator Jul 14, 2025
9e69ce8
Merge branch 'main' into main
HmbleCreator Jul 19, 2025
260cb9b
Merge branch 'main' into main
HmbleCreator Aug 1, 2025
99bd52a
Merge branch 'main' into main
HmbleCreator Sep 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,17 @@ If you ever get stuck, reach out on [Discord](https://discord.com/invite/zerRaGK
13. **Iterate** on any review feedback—update your branch and repeat **6 – 11** as needed.

*(Optional) Invite a maintainer to your branch for easier collaboration.*

---

## CSS & build artefacts

- **Do not commit `src/static/css/site.css`.** The CI pipeline runs `npm run build:css` during the container/image build, so the artefact is produced automatically.

*(Optional) Invite project maintainer to your branch for easier collaboration.*

- When developing locally you may run the build yourself (see step 9) so you can preview the styles.

## Dependency Management

When you add a new import from an external package, make sure to add it to both `requirements.txt` and `pyproject.toml` (if applicable). This ensures all environments and CI/CD pipelines have the correct dependencies installed.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ dependencies = [
"strenum; python_version < '3.11'",
"tiktoken>=0.7.0", # Support for o200k_base encoding
"typing_extensions>= 4.0.0; python_version < '3.10'",
"uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150)
"autotiktokenizer=*",
"prometheus-client"
]

license = {file = "LICENSE"}
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ slowapi
starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw
tiktoken>=0.7.0 # Support for o200k_base encoding
uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150
autotiktokenizer
7 changes: 6 additions & 1 deletion src/server/routers/dynamic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""The dynamic router module defines handlers for dynamic path requests."""

from fastapi import APIRouter, Request
from fastapi import APIRouter, Depends, Request, HTTPException
from fastapi.responses import HTMLResponse

from server.server_config import get_version_info, templates
Expand Down Expand Up @@ -29,6 +29,11 @@ async def catch_all(request: Request, full_path: str) -> HTMLResponse:
and other default parameters such as file size.

"""
# Block API routes
if full_path.startswith("api/"):
raise HTTPException(status_code=405, detail="Method Not Allowed")

# Build context with version info
context = {
"request": request,
"repo_url": full_path,
Expand Down
162 changes: 143 additions & 19 deletions src/server/routers/index.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,161 @@
"""Module defining the FastAPI router for the home page of the application."""

from fastapi import APIRouter, Request
from fastapi import APIRouter, Depends, Request, Form, HTTPException
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from autotiktokenizer import AutoTikTokenizer
import tiktoken
from typing import Optional

from gitingest.utils.compat_typing import Annotated
from server.models import QueryForm
from server.query_processor import process_query
from server.server_config import EXAMPLE_REPOS, get_version_info, templates
from server.server_utils import limiter
from pydantic import BaseModel, Field

router = APIRouter()
templates = Jinja2Templates(directory="server/templates")

SUPPORTED_MODELS = {
'GPT-2 (OpenAI)': 'openai-community/gpt2',
'GPT-3 (OpenAI)': 'openai-community/gpt2',
'GPT-3.5 (OpenAI)': 'openai-community/gpt2',
'GPT-3.5-turbo (OpenAI)': 'openai-community/gpt2',
'GPT-4 (OpenAI)': 'openai-community/gpt2',
'Claude (approximate, uses GPT-2)': 'openai-community/gpt2',
'Gemini (approximate, uses T5)': 't5-base',
'Llama-2 (Meta)': 'meta-llama/Llama-2-7b-hf',
'Llama-3 (Meta)': 'meta-llama/Meta-Llama-3-8B',
'Mistral-7B (MistralAI)': 'mistralai/Mistral-7B-v0.1',
'Mixtral-8x7B (MistralAI)': 'mistralai/Mixtral-8x7B-v0.1',
'Phi-3-mini (Microsoft)': 'microsoft/phi-3-mini-4k-instruct',
'Gemma-2B (Google)': 'google/gemma-2b',
'Qwen2-7B (Alibaba)': 'Qwen/Qwen2-7B',
'Yi-34B (01.AI)': '01-ai/Yi-34B-Chat',
'Falcon-7B (TII)': 'tiiuae/falcon-7b',
'MPT-7B (MosaicML)': 'mosaicml/mpt-7b',
'Baichuan-7B (Baichuan)': 'baichuan-inc/Baichuan-7B',
'XLM-RoBERTa-base (Facebook)': 'xlm-roberta-base',
'RoBERTa-base (Facebook)': 'roberta-base',
'DistilBERT-base-uncased': 'distilbert-base-uncased',
'GPT-Neo-1.3B (EleutherAI)': 'EleutherAI/gpt-neo-1.3B',
'GPT-J-6B (EleutherAI)': 'EleutherAI/gpt-j-6B',
'GPT-Bloom-560m (BigScience)': 'bigscience/bloom-560m',
'BERT-base-uncased': 'bert-base-uncased',
'T5-base': 't5-base',
}

@router.get("/", response_class=HTMLResponse, include_in_schema=False)
async def home(request: Request) -> HTMLResponse:
"""Render the home page with example repositories and default parameters.

This endpoint serves the home page of the application, rendering the ``index.jinja`` template
and providing it with a list of example repositories and default file size values.

Parameters
----------
request : Request
The incoming request object, which provides context for rendering the response.
def get_tokenizer(model_id):
return AutoTikTokenizer.from_pretrained(model_id)

Returns
-------
HTMLResponse
An HTML response containing the rendered home page template, with example repositories
and other default parameters such as file size.
def count_tokens(input_text, model_id):
if model_id == 'openai-community/gpt2':
enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
return len(enc.encode(input_text))
else:
tokenizer = AutoTikTokenizer.from_pretrained(model_id)
return len(tokenizer.encode(input_text))

"""
@router.get("/", response_class=HTMLResponse, include_in_schema=False)
async def home(request: Request) -> HTMLResponse:
"""Render the home page with example repositories and default parameters."""
context = {
"request": request,
"examples": EXAMPLE_REPOS,
"default_max_file_size": 243,
}
context.update(get_version_info())

return templates.TemplateResponse("index.jinja", context)

@router.post("/", response_class=HTMLResponse)
@limiter.limit("10/minute")
async def index_post(request: Request, form: Annotated[QueryForm, Depends(QueryForm.as_form)]) -> HTMLResponse:
resolved_token = form.token if form.token else None
return await process_query(
request,
input_text=form.input_text,
slider_position=form.max_file_size,
pattern_type=form.pattern_type,
pattern=form.pattern,
is_index=True,
token=resolved_token,
)

class TokenCountRequest(BaseModel):
input_text: str = Field(..., description="The text to count tokens for")
model_id: str = Field(default="openai-community/gpt2", description="The model ID to use for tokenization")

class TokenCountResponse(BaseModel):
token_count: int = Field(..., description="Number of tokens in the input text")
model_id: str = Field(..., description="Model ID used for tokenization")
character_count: int = Field(..., description="Number of characters in the input text")

@router.post("/api/tokencount", response_model=TokenCountResponse)
async def api_token_count(
request: Optional[TokenCountRequest] = None,
input_text: str = Form(None),
model_id: str = Form(default="openai-community/gpt2"),
):
if request:
text = request.input_text
model = request.model_id
else:
text = input_text
model = model_id

if not text or not text.strip():
raise HTTPException(status_code=400, detail="Input text cannot be empty")

if model not in SUPPORTED_MODELS.values():
raise HTTPException(
status_code=400,
detail=f"Unsupported model ID. Must be one of: {', '.join(SUPPORTED_MODELS.values())}"
)

try:
token_count = count_tokens(text, model)
return TokenCountResponse(
token_count=token_count,
model_id=model,
character_count=len(text)
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

@router.get("/tokencount", response_class=HTMLResponse)
async def tokencount_ui(request: Request):
return templates.TemplateResponse(
"tokencount.jinja",
{"request": request, "supported_models": SUPPORTED_MODELS, "input_text": "", "model_id": "openai-community/gpt2", "result": None, "error": None}
)

@router.post("/tokencount", response_class=HTMLResponse)
async def tokencount_post(request: Request, input_text: str = Form(...), model_id: str = Form("openai-community/gpt2")):
error = None
result = None
if not input_text or not input_text.strip():
error = "Input text cannot be empty."
elif model_id not in SUPPORTED_MODELS.values():
error = f"Unsupported model ID. Must be one of: {', '.join(SUPPORTED_MODELS.values())}"
else:
try:
token_count = count_tokens(input_text, model_id)
result = {
"token_count": token_count,
"model_id": model_id,
"character_count": len(input_text)
}
except Exception as e:
error = str(e)
return templates.TemplateResponse(
"tokencount.jinja",
{
"request": request,
"supported_models": SUPPORTED_MODELS,
"input_text": input_text,
"model_id": model_id,
"result": result,
"error": error
}
)
26 changes: 26 additions & 0 deletions src/server/templates/components/footer.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,38 @@
<div class="grid grid-cols-3 items-center text-gray-900 text-sm">
{# Left column — Chrome + PyPI #}
<div class="flex items-center space-x-4">
<a href="https://chromewebstore.google.com/detail/git-ingest-turn-any-git-r/adfjahbijlkjfoicpjkhjicpjpjfaood"
target="_blank"
rel="noopener noreferrer"
class="hover:underline flex items-center">
<img src="https://img.icons8.com/ios/50/chrome--v1.png"
alt="chrome"
class="w-4 h-4 mr-1">
Extension
</a>
<a href="https://pypi.org/project/gitingest/"
target="_blank"
rel="noopener noreferrer"
class="hover:underline flex items-center">
<img src="https://img.icons8.com/windows/32/python.png"
alt="python"
class="w-4 h-4 mr-1">
Python package
</a>
<a href="/tokencount" class="hover:underline flex items-center" title="Backend API endpoint for token counting">
<img src="https://img.icons8.com/ios-filled/50/000000/counter.png" alt="token estimator" class="w-4 h-4 mr-1">
Token Estimator
</a>
{{ footer_icon_link('https://chromewebstore.google.com/detail/adfjahbijlkjfoicpjkhjicpjpjfaood',
'icons/chrome.svg',
'Chrome Extension') }}
{{ footer_icon_link('https://pypi.org/project/gitingest',
'icons/python.svg',
'Python Package') }}
{{ footer_icon_link('/tokencount',
'icons/tokens.svg',
'Token Estimator') }}

</div>
{# Middle column - Version information #}
<div class="flex justify-center">
Expand Down
40 changes: 40 additions & 0 deletions src/server/templates/tokencount.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{% extends "base.jinja" %}
{% block title %}Token Estimator{% endblock %}
{% block content %}
<div class="relative">
<div class="w-full h-full absolute inset-0 bg-gray-900 rounded-xl translate-y-2 translate-x-2"></div>
<div class="rounded-xl relative z-20 p-8 sm:p-10 border-[3px] border-gray-900 bg-[#fff4da]">
<h1 class="text-3xl font-bold text-gray-900 mb-4">Token Estimator</h1>
<form method="post" action="/tokencount" class="space-y-6">
<div>
<label for="input_text" class="block mb-2 font-medium">Text to analyze:</label>
<textarea name="input_text" id="input_text" rows="4" required class="w-full border-[3px] border-gray-900 rounded p-2 mb-2 bg-[#E8F0FE] focus:outline-none">{{ input_text if input_text else '' }}</textarea>
</div>
<div class="mb-10">
<label for="model_id" class="block mb-2 font-medium">Model:</label>
<select name="model_id" id="model_id" class="w-full border-[3px] border-gray-900 rounded p-2 bg-[#E8F0FE] focus:outline-none">
{% for name, model in supported_models.items() %}
<option value="{{ model }}" {% if model_id == model %}selected{% endif %}>{{ name }}</option>
{% endfor %}
</select>
</div>
<div>
<button type="submit" class="bg-yellow-500 hover:bg-yellow-600 text-white font-bold py-2 px-4 rounded border-[3px] border-gray-900">Count Tokens</button>
</div>
</form>
{% if result %}
<div class="mt-6 p-4 border-[3px] border-gray-900 rounded bg-white">
<h2 class="text-xl font-semibold mb-2">Result</h2>
<p><b>Token count:</b> {{ result.token_count }}</p>
<p><b>Character count:</b> {{ result.character_count }}</p>
<p><b>Model:</b> {{ result.model_id }}</p>
</div>
{% endif %}
{% if error %}
<div class="mt-6 p-4 border-[3px] border-red-600 rounded bg-red-100 text-red-800">
<b>Error:</b> {{ error }}
</div>
{% endif %}
</div>
</div>
{% endblock %}
23 changes: 23 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from fastapi.testclient import TestClient
from src.server.main import app

client = TestClient(app, base_url="http://localhost")


def test_tokencount_valid():
response = client.post("/tokencount", json={"input_text": "Hello world!", "model_id": "openai-community/gpt2"}, headers={"host": "localhost"})
if response.status_code != 200:
print("Response content:", response.content)
assert response.status_code == 200
data = response.json()
assert "token_count" in data
assert isinstance(data["token_count"], int)
assert data["token_count"] > 0

def test_tokencount_missing_input():
response = client.post("/tokencount", json={"model_id": "openai-community/gpt2"}, headers={"host": "localhost"})
if response.status_code != 400:
print("Response content:", response.content)
assert response.status_code == 400
data = response.json()
assert "error" in data