Skip to content

Commit 7c5de5c

Browse files
authored
feat: add telemetry export support with multiple backends (#4)
* feat: add telemetry export support with multiple backends - Add support for OpenTelemetry (OTLP), Datadog, and Sentry exporters - Enable telemetry-only mode (project_id now optional) - Add TelemetryManager for coordinating multiple exporters
1 parent 6dda36d commit 7c5de5c

File tree

10 files changed

+1332
-38
lines changed

10 files changed

+1332
-38
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ dependencies = [
2121
"mcp>=1.2.0",
2222
"mcpcat-api==0.1.4",
2323
"pydantic>=2.0.0",
24+
"requests>=2.31.0",
2425
]
2526

2627
[project.urls]

src/mcpcat/__init__.py

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,33 @@
2121
)
2222

2323

24-
def track(server: Any, project_id: str, options: MCPCatOptions | None = None) -> Any:
24+
def track(server: Any, project_id: str | None = None, options: MCPCatOptions | None = None) -> Any:
25+
"""
26+
Initialize MCPCat tracking with optional telemetry export.
27+
28+
Args:
29+
server: MCP server instance to track
30+
project_id: MCPCat project ID (optional if using telemetry-only mode)
31+
options: Configuration options including telemetry exporters
32+
33+
Returns:
34+
The server instance with tracking enabled
35+
36+
Raises:
37+
ValueError: If neither project_id nor exporters are provided
38+
TypeError: If server is not a compatible MCP server instance
39+
"""
2540
# Use default options if not provided
2641
if options is None:
2742
options = MCPCatOptions()
2843

44+
# Validate configuration
45+
if not project_id and not options.exporters:
46+
raise ValueError(
47+
"Either project_id or exporters must be provided. "
48+
"Use project_id for MCPCat, exporters for telemetry-only mode, or both."
49+
)
50+
2951
# Validate server compatibility
3052
if not is_compatible_server(server):
3153
raise TypeError(
@@ -37,6 +59,15 @@ def track(server: Any, project_id: str, options: MCPCatOptions | None = None) ->
3759
if is_fastmcp:
3860
lowlevel_server = server._mcp_server
3961

62+
# Initialize telemetry if exporters configured
63+
if options.exporters:
64+
from mcpcat.modules.telemetry import TelemetryManager
65+
from mcpcat.modules.event_queue import set_telemetry_manager
66+
67+
telemetry_manager = TelemetryManager(options.exporters)
68+
set_telemetry_manager(telemetry_manager)
69+
write_to_log(f"Telemetry initialized with {len(options.exporters)} exporter(s)")
70+
4071
# Create and store tracking data
4172
session_id = new_session_id()
4273
session_info = get_session_info(lowlevel_server)
@@ -53,13 +84,12 @@ def track(server: Any, project_id: str, options: MCPCatOptions | None = None) ->
5384
try:
5485
# Always initialize dynamic tracking for complete tool coverage
5586
from mcpcat.modules.overrides.monkey_patch import apply_monkey_patches
56-
87+
5788
# Initialize the dynamic tracking system by setting the flag
5889
if not data.tracker_initialized:
5990
data.tracker_initialized = True
60-
from mcpcat.modules.logging import write_to_log
6191
write_to_log(f"Dynamic tracking initialized for server {id(lowlevel_server)}")
62-
92+
6393
# Apply appropriate tracking method based on server type
6494
if is_fastmcp:
6595
# For FastMCP servers, use monkey-patching for tool tracking
@@ -70,12 +100,15 @@ def track(server: Any, project_id: str, options: MCPCatOptions | None = None) ->
70100
else:
71101
# For low-level servers, use the traditional overrides (no monkey patching needed)
72102
override_lowlevel_mcp_server(lowlevel_server, data)
73-
74-
write_to_log(f"MCPCat initialized with dynamic tracking for session {session_id} on project {project_id}")
75-
103+
104+
if project_id:
105+
write_to_log(f"MCPCat initialized with dynamic tracking for session {session_id} on project {project_id}")
106+
else:
107+
write_to_log(f"MCPCat initialized in telemetry-only mode for session {session_id}")
108+
76109
except Exception as e:
77110
write_to_log(f"Error initializing MCPCat: {e}")
78-
111+
79112
return server
80113

81114
__all__ = [

src/mcpcat/modules/event_queue.py

Lines changed: 79 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
import time
99
from datetime import datetime, timezone
1010
from concurrent.futures import ThreadPoolExecutor
11-
from typing import Any
11+
from typing import Any, Optional, TYPE_CHECKING
12+
13+
if TYPE_CHECKING:
14+
from .telemetry import TelemetryManager
1215

1316
from mcpcat_api import ApiClient, Configuration, EventsApi
1417
from mcpcat.modules.constants import EVENT_ID_PREFIX, MCPCAT_API_URL
@@ -30,21 +33,21 @@ def __init__(self, api_client=None):
3033
self.max_retries = 3
3134
self.max_queue_size = 10000 # Prevent unbounded growth
3235
self.concurrency = 5 # Max parallel requests
33-
36+
3437
# Allow injection of api_client for testing
3538
if api_client is None:
3639
config = Configuration(host=MCPCAT_API_URL)
3740
api_client_instance = ApiClient(configuration=config)
3841
self.api_client = EventsApi(api_client=api_client_instance)
3942
else:
4043
self.api_client = api_client
41-
44+
4245
self._shutdown = False
4346
self._shutdown_event = threading.Event()
44-
47+
4548
# Thread pool for processing events
4649
self.executor = ThreadPoolExecutor(max_workers=self.concurrency)
47-
50+
4851
# Start worker thread
4952
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
5053
self.worker_thread.start()
@@ -60,15 +63,17 @@ def add(self, event: UnredactedEvent) -> None:
6063
self.queue.put_nowait(event)
6164
except queue.Full:
6265
# Queue is full, drop the new event
63-
write_to_log(f"Event queue full, dropping event {event.id or 'unknown'} of type {event.event_type}")
66+
write_to_log(
67+
f"Event queue full, dropping event {event.id or 'unknown'} of type {event.event_type}"
68+
)
6469

6570
def _worker(self) -> None:
6671
"""Worker thread that processes events from the queue."""
6772
while not self._shutdown_event.is_set():
6873
try:
6974
# Wait for an event with timeout
7075
event = self.queue.get(timeout=0.1)
71-
76+
7277
# Submit event processing to thread pool
7378
# The executor will queue it if all workers are busy
7479
try:
@@ -79,8 +84,10 @@ def _worker(self) -> None:
7984
try:
8085
self.queue.put_nowait(event)
8186
except queue.Full:
82-
write_to_log(f"Could not requeue event {event.id or 'unknown'} - queue full")
83-
87+
write_to_log(
88+
f"Could not requeue event {event.id or 'unknown'} - queue full"
89+
)
90+
8491
except queue.Empty:
8592
continue
8693
except Exception as e:
@@ -100,12 +107,30 @@ def _process_event(self, event: UnredactedEvent) -> None:
100107
event = redacted_event
101108
event.redaction_fn = None # Clear the function to avoid reprocessing
102109
except Exception as error:
103-
write_to_log(f"WARNING: Dropping event {event.id or 'unknown'} due to redaction failure: {error}")
110+
write_to_log(
111+
f"WARNING: Dropping event {event.id or 'unknown'} due to redaction failure: {error}"
112+
)
104113
return # Skip this event if redaction fails
105114

106115
if event:
107116
event.id = event.id or generate_prefixed_ksuid("evt")
108-
self._send_event(event)
117+
118+
# Send to MCPCat API only if project_id exists
119+
if event.project_id:
120+
self._send_event(event)
121+
122+
# Export to telemetry backends if configured
123+
if _telemetry_manager:
124+
try:
125+
_telemetry_manager.export(event)
126+
except Exception as e:
127+
write_to_log(f"Telemetry export submission failed: {e}")
128+
129+
if not event.project_id and not _telemetry_manager:
130+
# Warn if we have neither MCPCat nor telemetry configured
131+
write_to_log(
132+
"Warning: Event has no project_id and no telemetry exporters configured"
133+
)
109134

110135
def _send_event(self, event: Event, retries: int = 0) -> None:
111136
"""Send event to API."""
@@ -126,7 +151,9 @@ def _send_event(self, event: Event, retries: int = 0) -> None:
126151
time.sleep(2**retries)
127152
self._send_event(event, retries + 1)
128153
else:
129-
write_to_log(f"Failed to send event {event.id} after {self.max_retries} retries")
154+
write_to_log(
155+
f"Failed to send event {event.id} after {self.max_retries} retries"
156+
)
130157

131158
def get_stats(self) -> dict[str, Any]:
132159
"""Get queue stats for monitoring."""
@@ -146,7 +173,9 @@ def destroy(self) -> None:
146173
if self.queue.qsize() > 0:
147174
# If there are events in queue, wait 5 seconds
148175
wait_time = 5.0
149-
write_to_log(f"Shutting down with {self.queue.qsize()} events in queue, waiting up to {wait_time}s")
176+
write_to_log(
177+
f"Shutting down with {self.queue.qsize()} events in queue, waiting up to {wait_time}s"
178+
)
150179
else:
151180
# If queue is empty, just wait 1 second for in-flight requests
152181
wait_time = 1.0
@@ -164,22 +193,41 @@ def destroy(self) -> None:
164193
write_to_log(f"Shutdown complete. {remaining} events were not processed.")
165194

166195

196+
# Global telemetry manager instance (optional)
197+
_telemetry_manager: Optional["TelemetryManager"] = None
198+
199+
200+
def set_telemetry_manager(manager: Optional["TelemetryManager"]) -> None:
201+
"""
202+
Set the global telemetry manager instance.
203+
204+
Args:
205+
manager: TelemetryManager instance or None to disable telemetry
206+
"""
207+
global _telemetry_manager
208+
_telemetry_manager = manager
209+
if manager:
210+
write_to_log(
211+
f"Telemetry manager set with {manager.get_exporter_count()} exporter(s)"
212+
)
213+
214+
167215
# Global event queue instance
168216
event_queue = EventQueue()
169217

170218

171219
def _shutdown_handler(signum, frame):
172220
"""Handle shutdown signals."""
173-
221+
174222
write_to_log("Received shutdown signal, gracefully shutting down...")
175-
223+
176224
# Reset signal handlers to default behavior to avoid recursive calls
177225
signal.signal(signal.SIGINT, signal.SIG_DFL)
178226
signal.signal(signal.SIGTERM, signal.SIG_DFL)
179-
227+
180228
# Perform graceful shutdown
181229
event_queue.destroy()
182-
230+
183231
# Force exit after graceful shutdown
184232
os._exit(0)
185233

@@ -202,28 +250,35 @@ def publish_event(server: Any, event: UnredactedEvent) -> None:
202250
"""Publish an event to the queue."""
203251
if not event.duration:
204252
if event.timestamp:
205-
event.duration = int((datetime.now(timezone.utc).timestamp() - event.timestamp.timestamp()) * 1000)
253+
event.duration = int(
254+
(datetime.now(timezone.utc).timestamp() - event.timestamp.timestamp())
255+
* 1000
256+
)
206257
else:
207258
event.duration = None
208259

209-
210260
data = get_server_tracking_data(server)
211261
if not data:
212-
write_to_log("Warning: Server tracking data not found. Event will not be published.")
262+
write_to_log(
263+
"Warning: Server tracking data not found. Event will not be published."
264+
)
213265
return
214266

215267
session_info = get_session_info(server, data)
216268

217269
# Create full event with all required fields
218-
# Merge event data with session info
270+
# Merge event data with session info
219271
event_data = event.model_dump(exclude_none=True)
220272
session_data = session_info.model_dump(exclude_none=True)
221-
273+
274+
# Merge data, ensuring project_id from data takes precedence
222275
merged_data = {**event_data, **session_data}
223-
276+
merged_data["project_id"] = (
277+
data.project_id
278+
) # Override with tracking data's project_id
279+
224280
full_event = UnredactedEvent(
225281
**merged_data,
226-
project_id=data.project_id,
227282
redaction_fn=data.options.redact_sensitive_information,
228283
)
229284

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""Base exporter interface for telemetry exporters."""
2+
3+
from abc import ABC, abstractmethod
4+
from ...types import Event
5+
6+
7+
class Exporter(ABC):
8+
"""Abstract base class for telemetry exporters."""
9+
10+
@abstractmethod
11+
def export(self, event: Event) -> None:
12+
"""
13+
Export an event to the telemetry backend.
14+
15+
Args:
16+
event: The MCPCat event to export
17+
18+
Note:
19+
This method should handle all errors internally and never
20+
raise exceptions that could affect the main MCP server.
21+
"""
22+
pass

0 commit comments

Comments
 (0)