Skip to content

Commit 0595ad0

Browse files
Add integration test for Firecracker with clippy-tracing instrumentation
1 parent e36e774 commit 0595ad0

File tree

1 file changed

+305
-0
lines changed

1 file changed

+305
-0
lines changed
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Test that Firecracker works correctly when instrumented with tracing and trace level logs are enabled.
5+
6+
This test addresses GitHub issue about adding a test that checks if simple integration tests work
7+
when Firecracker is instrumented with src/clippy-tracing and trace level logs are enabled.
8+
9+
The existing test_log_instrument.py only tests the log-instrument crate with example programs.
10+
This test verifies that the log-instrument crate can successfully instrument the actual Firecracker
11+
binary and that the instrumented Firecracker works correctly with trace-level logging.
12+
13+
Test Coverage:
14+
1. Uses clippy-tracing tool to add instrumentation to key Firecracker source files
15+
2. Builds Firecracker with --features tracing to enable the log-instrument functionality
16+
3. Spawns instrumented Firecracker with trace-level logging enabled
17+
4. Performs basic API operations (describe_instance, machine_config, logger config)
18+
5. Verifies that:
19+
- Firecracker starts and responds to API calls correctly
20+
- Trace-level logs are generated
21+
- log-instrument traces (function entry/exit) appear in logs with expected format
22+
- Meaningful functions from main, API server, and VMM are being traced
23+
- Runtime log level filtering works correctly
24+
- Performance impact is reasonable (basic smoke test)
25+
26+
The test ensures that instrumentation doesn't break Firecracker's core functionality while
27+
providing the tracing capabilities needed for debugging deadlocks and performance issues.
28+
"""
29+
30+
import os
31+
import re
32+
import tempfile
33+
from pathlib import Path
34+
35+
import pytest
36+
37+
from framework import utils
38+
from framework.microvm import MicroVMFactory
39+
from framework.properties import global_props
40+
from host_tools.cargo_build import cargo, get_binary
41+
42+
43+
def build_instrumented_firecracker():
44+
"""Build Firecracker with tracing instrumentation enabled."""
45+
# First, add instrumentation using clippy-tracing
46+
clippy_tracing = get_binary("clippy-tracing")
47+
48+
# Add instrumentation to a subset of files to avoid performance issues
49+
# We'll instrument just the API server and main entry points for meaningful traces
50+
cargo_args = [
51+
"--action", "fix",
52+
"--path", "./src/firecracker/src/main.rs",
53+
"--path", "./src/firecracker/src/api_server",
54+
"--path", "./src/vmm/src/lib.rs",
55+
"--path", "./src/vmm/src/builder.rs"
56+
]
57+
58+
utils.check_output(f"{clippy_tracing} {' '.join(cargo_args)}")
59+
60+
# Build Firecracker with tracing feature enabled
61+
cargo("build", "--features tracing --bin firecracker")
62+
63+
return get_binary("firecracker")
64+
65+
66+
def cleanup_instrumentation():
67+
"""Remove instrumentation from source files."""
68+
clippy_tracing = get_binary("clippy-tracing")
69+
70+
# Strip instrumentation from the files we modified
71+
strip_args = [
72+
"--action", "strip",
73+
"--path", "./src/firecracker/src/main.rs",
74+
"--path", "./src/firecracker/src/api_server",
75+
"--path", "./src/vmm/src/lib.rs",
76+
"--path", "./src/vmm/src/builder.rs"
77+
]
78+
79+
utils.check_output(f"{clippy_tracing} {' '.join(strip_args)}")
80+
81+
82+
@pytest.fixture(scope="module")
83+
def instrumented_firecracker_binary():
84+
"""Fixture that builds an instrumented Firecracker binary and cleans up after tests."""
85+
try:
86+
binary_path = build_instrumented_firecracker()
87+
yield binary_path
88+
finally:
89+
cleanup_instrumentation()
90+
91+
92+
def test_firecracker_tracing_basic_functionality(instrumented_firecracker_binary):
93+
"""Test that instrumented Firecracker can start and handle basic API calls with trace logging."""
94+
# Create a temporary directory for this test
95+
with tempfile.TemporaryDirectory() as temp_dir:
96+
temp_path = Path(temp_dir)
97+
98+
# Create a MicroVM factory with the instrumented binary
99+
factory = MicroVMFactory(instrumented_firecracker_binary)
100+
101+
# Build a microVM
102+
vm = factory.build()
103+
104+
# Configure basic VM settings
105+
vm.basic_config(vcpu_count=1, mem_size_mib=128)
106+
107+
# Spawn the VM with trace level logging
108+
vm.spawn(log_level="Trace", log_show_level=True, log_show_origin=True)
109+
110+
try:
111+
# Wait for the API socket to be available
112+
vm._wait_for_api_socket()
113+
114+
# Make some basic API calls to generate trace logs
115+
# Get instance info
116+
response = vm.api.describe_instance.get()
117+
assert response.status_code == 200
118+
119+
# Get machine config
120+
response = vm.api.machine_config.get()
121+
assert response.status_code == 200
122+
123+
# Set logger to trace level to ensure we capture instrumentation logs
124+
logger_config = {
125+
"level": "Trace"
126+
}
127+
response = vm.api.logger.put(**logger_config)
128+
assert response.status_code == 204
129+
130+
# Make another API call after setting trace level
131+
response = vm.api.describe_instance.get()
132+
assert response.status_code == 200
133+
134+
# Verify that the VM is working correctly
135+
assert vm.state == "Not started"
136+
137+
finally:
138+
vm.kill()
139+
140+
# Check the logs for instrumentation traces
141+
log_data = vm.log_data
142+
143+
# Verify that trace level logs are present
144+
assert "TRACE" in log_data, "Expected TRACE level logs in output"
145+
146+
# Look for log-instrument traces (function entry/exit)
147+
# These should have the format: ThreadId(X)>>function_name or ThreadId(X)<<function_name
148+
trace_pattern = r"ThreadId\(\d+\)(?:::[^>]*)?(?:>>|<<)\w+"
149+
trace_matches = re.findall(trace_pattern, log_data)
150+
151+
assert len(trace_matches) > 0, f"Expected to find log-instrument traces in logs, but found none. Log data: {log_data[:1000]}..."
152+
153+
# Verify we see function entry and exit traces
154+
entry_traces = [match for match in trace_matches if ">>" in match]
155+
exit_traces = [match for match in trace_matches if "<<" in match]
156+
157+
assert len(entry_traces) > 0, "Expected to find function entry traces (>>)"
158+
assert len(exit_traces) > 0, "Expected to find function exit traces (<<)"
159+
160+
# Verify that meaningful functions are being traced
161+
# Look for traces from main, API handling, or VM management functions
162+
meaningful_functions = ["main", "api", "vmm", "request", "response"]
163+
found_meaningful = False
164+
165+
for trace in trace_matches:
166+
for func in meaningful_functions:
167+
if func.lower() in trace.lower():
168+
found_meaningful = True
169+
break
170+
if found_meaningful:
171+
break
172+
173+
assert found_meaningful, f"Expected to find traces from meaningful functions, but traces were: {trace_matches[:10]}"
174+
175+
176+
def test_firecracker_tracing_with_vm_lifecycle(instrumented_firecracker_binary):
177+
"""Test that instrumented Firecracker works through a complete VM lifecycle with tracing."""
178+
# Skip this test if we don't have the necessary resources
179+
if not global_props.host_linux_version_tup >= (4, 14):
180+
pytest.skip("Requires Linux kernel >= 4.14")
181+
182+
# Create a MicroVM factory with the instrumented binary
183+
factory = MicroVMFactory(instrumented_firecracker_binary)
184+
185+
# Build a microVM
186+
vm = factory.build()
187+
188+
# Configure the VM with minimal resources
189+
vm.basic_config(vcpu_count=1, mem_size_mib=128, add_root_device=False)
190+
191+
# Spawn with trace logging
192+
vm.spawn(log_level="Trace")
193+
194+
try:
195+
# Set logger to trace level
196+
logger_config = {"level": "Trace"}
197+
response = vm.api.logger.put(**logger_config)
198+
assert response.status_code == 204
199+
200+
# Test pause/resume functionality with tracing
201+
# Note: We can't actually start the VM without a kernel/rootfs,
202+
# but we can test the API endpoints
203+
204+
# Get metrics (this exercises the metrics system)
205+
metrics = vm.get_all_metrics()
206+
assert metrics is not None
207+
208+
# Test machine config updates
209+
machine_config = {
210+
"vcpu_count": 1,
211+
"mem_size_mib": 128,
212+
"smt": False
213+
}
214+
response = vm.api.machine_config.put(**machine_config)
215+
assert response.status_code == 204
216+
217+
finally:
218+
vm.kill()
219+
220+
# Verify tracing worked throughout the lifecycle
221+
log_data = vm.log_data
222+
223+
# Should have trace logs
224+
assert "TRACE" in log_data
225+
226+
# Should have instrumentation traces
227+
trace_pattern = r"ThreadId\(\d+\)(?:::[^>]*)?(?:>>|<<)\w+"
228+
trace_matches = re.findall(trace_pattern, log_data)
229+
assert len(trace_matches) > 0, "Expected instrumentation traces during VM lifecycle"
230+
231+
232+
def test_firecracker_tracing_performance_impact():
233+
"""Test that instrumented Firecracker still performs reasonably (basic smoke test)."""
234+
import time
235+
236+
# This is a basic performance smoke test to ensure tracing doesn't break functionality
237+
# We're not doing detailed performance analysis, just ensuring it doesn't hang or crash
238+
239+
# Build instrumented binary
240+
instrumented_binary = build_instrumented_firecracker()
241+
242+
try:
243+
factory = MicroVMFactory(instrumented_binary)
244+
vm = factory.build()
245+
246+
# Time the basic configuration and startup
247+
start_time = time.time()
248+
249+
vm.basic_config(vcpu_count=1, mem_size_mib=128, add_root_device=False)
250+
vm.spawn(log_level="Trace")
251+
252+
# Make several API calls
253+
for _ in range(5):
254+
response = vm.api.describe_instance.get()
255+
assert response.status_code == 200
256+
257+
elapsed = time.time() - start_time
258+
259+
# Should complete within reasonable time (30 seconds is very generous)
260+
# This is just to catch major performance regressions or hangs
261+
assert elapsed < 30, f"Instrumented Firecracker took too long to start and handle API calls: {elapsed}s"
262+
263+
vm.kill()
264+
265+
finally:
266+
cleanup_instrumentation()
267+
268+
269+
def test_trace_log_filtering():
270+
"""Test that trace log filtering works correctly with instrumented Firecracker."""
271+
instrumented_binary = build_instrumented_firecracker()
272+
273+
try:
274+
factory = MicroVMFactory(instrumented_binary)
275+
vm = factory.build()
276+
277+
vm.basic_config(vcpu_count=1, mem_size_mib=128, add_root_device=False)
278+
vm.spawn(log_level="Info") # Start with Info level
279+
280+
try:
281+
# Initially should not have trace logs
282+
initial_log_data = vm.log_data
283+
284+
# Set logger to trace level
285+
logger_config = {"level": "Trace"}
286+
response = vm.api.logger.put(**logger_config)
287+
assert response.status_code == 204
288+
289+
# Make API calls to generate traces
290+
for _ in range(3):
291+
response = vm.api.describe_instance.get()
292+
assert response.status_code == 200
293+
294+
# Now should have trace logs
295+
final_log_data = vm.log_data
296+
297+
# The new log data should contain trace information
298+
new_log_data = final_log_data[len(initial_log_data):]
299+
assert "TRACE" in new_log_data, "Expected TRACE logs after setting log level to Trace"
300+
301+
finally:
302+
vm.kill()
303+
304+
finally:
305+
cleanup_instrumentation()

0 commit comments

Comments
 (0)