1
+ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Test that Firecracker works correctly when instrumented with tracing and trace level logs are enabled.
5
+
6
+ This test addresses GitHub issue about adding a test that checks if simple integration tests work
7
+ when Firecracker is instrumented with src/clippy-tracing and trace level logs are enabled.
8
+
9
+ The existing test_log_instrument.py only tests the log-instrument crate with example programs.
10
+ This test verifies that the log-instrument crate can successfully instrument the actual Firecracker
11
+ binary and that the instrumented Firecracker works correctly with trace-level logging.
12
+
13
+ Test Coverage:
14
+ 1. Uses clippy-tracing tool to add instrumentation to key Firecracker source files
15
+ 2. Builds Firecracker with --features tracing to enable the log-instrument functionality
16
+ 3. Spawns instrumented Firecracker with trace-level logging enabled
17
+ 4. Performs basic API operations (describe_instance, machine_config, logger config)
18
+ 5. Verifies that:
19
+ - Firecracker starts and responds to API calls correctly
20
+ - Trace-level logs are generated
21
+ - log-instrument traces (function entry/exit) appear in logs with expected format
22
+ - Meaningful functions from main, API server, and VMM are being traced
23
+ - Runtime log level filtering works correctly
24
+ - Performance impact is reasonable (basic smoke test)
25
+
26
+ The test ensures that instrumentation doesn't break Firecracker's core functionality while
27
+ providing the tracing capabilities needed for debugging deadlocks and performance issues.
28
+ """
29
+
30
+ import os
31
+ import re
32
+ import tempfile
33
+ from pathlib import Path
34
+
35
+ import pytest
36
+
37
+ from framework import utils
38
+ from framework .microvm import MicroVMFactory
39
+ from framework .properties import global_props
40
+ from host_tools .cargo_build import cargo , get_binary
41
+
42
+
43
+ def build_instrumented_firecracker ():
44
+ """Build Firecracker with tracing instrumentation enabled."""
45
+ # First, add instrumentation using clippy-tracing
46
+ clippy_tracing = get_binary ("clippy-tracing" )
47
+
48
+ # Add instrumentation to a subset of files to avoid performance issues
49
+ # We'll instrument just the API server and main entry points for meaningful traces
50
+ cargo_args = [
51
+ "--action" , "fix" ,
52
+ "--path" , "./src/firecracker/src/main.rs" ,
53
+ "--path" , "./src/firecracker/src/api_server" ,
54
+ "--path" , "./src/vmm/src/lib.rs" ,
55
+ "--path" , "./src/vmm/src/builder.rs"
56
+ ]
57
+
58
+ utils .check_output (f"{ clippy_tracing } { ' ' .join (cargo_args )} " )
59
+
60
+ # Build Firecracker with tracing feature enabled
61
+ cargo ("build" , "--features tracing --bin firecracker" )
62
+
63
+ return get_binary ("firecracker" )
64
+
65
+
66
+ def cleanup_instrumentation ():
67
+ """Remove instrumentation from source files."""
68
+ clippy_tracing = get_binary ("clippy-tracing" )
69
+
70
+ # Strip instrumentation from the files we modified
71
+ strip_args = [
72
+ "--action" , "strip" ,
73
+ "--path" , "./src/firecracker/src/main.rs" ,
74
+ "--path" , "./src/firecracker/src/api_server" ,
75
+ "--path" , "./src/vmm/src/lib.rs" ,
76
+ "--path" , "./src/vmm/src/builder.rs"
77
+ ]
78
+
79
+ utils .check_output (f"{ clippy_tracing } { ' ' .join (strip_args )} " )
80
+
81
+
82
+ @pytest .fixture (scope = "module" )
83
+ def instrumented_firecracker_binary ():
84
+ """Fixture that builds an instrumented Firecracker binary and cleans up after tests."""
85
+ try :
86
+ binary_path = build_instrumented_firecracker ()
87
+ yield binary_path
88
+ finally :
89
+ cleanup_instrumentation ()
90
+
91
+
92
+ def test_firecracker_tracing_basic_functionality (instrumented_firecracker_binary ):
93
+ """Test that instrumented Firecracker can start and handle basic API calls with trace logging."""
94
+ # Create a temporary directory for this test
95
+ with tempfile .TemporaryDirectory () as temp_dir :
96
+ temp_path = Path (temp_dir )
97
+
98
+ # Create a MicroVM factory with the instrumented binary
99
+ factory = MicroVMFactory (instrumented_firecracker_binary )
100
+
101
+ # Build a microVM
102
+ vm = factory .build ()
103
+
104
+ # Configure basic VM settings
105
+ vm .basic_config (vcpu_count = 1 , mem_size_mib = 128 )
106
+
107
+ # Spawn the VM with trace level logging
108
+ vm .spawn (log_level = "Trace" , log_show_level = True , log_show_origin = True )
109
+
110
+ try :
111
+ # Wait for the API socket to be available
112
+ vm ._wait_for_api_socket ()
113
+
114
+ # Make some basic API calls to generate trace logs
115
+ # Get instance info
116
+ response = vm .api .describe_instance .get ()
117
+ assert response .status_code == 200
118
+
119
+ # Get machine config
120
+ response = vm .api .machine_config .get ()
121
+ assert response .status_code == 200
122
+
123
+ # Set logger to trace level to ensure we capture instrumentation logs
124
+ logger_config = {
125
+ "level" : "Trace"
126
+ }
127
+ response = vm .api .logger .put (** logger_config )
128
+ assert response .status_code == 204
129
+
130
+ # Make another API call after setting trace level
131
+ response = vm .api .describe_instance .get ()
132
+ assert response .status_code == 200
133
+
134
+ # Verify that the VM is working correctly
135
+ assert vm .state == "Not started"
136
+
137
+ finally :
138
+ vm .kill ()
139
+
140
+ # Check the logs for instrumentation traces
141
+ log_data = vm .log_data
142
+
143
+ # Verify that trace level logs are present
144
+ assert "TRACE" in log_data , "Expected TRACE level logs in output"
145
+
146
+ # Look for log-instrument traces (function entry/exit)
147
+ # These should have the format: ThreadId(X)>>function_name or ThreadId(X)<<function_name
148
+ trace_pattern = r"ThreadId\(\d+\)(?:::[^>]*)?(?:>>|<<)\w+"
149
+ trace_matches = re .findall (trace_pattern , log_data )
150
+
151
+ assert len (trace_matches ) > 0 , f"Expected to find log-instrument traces in logs, but found none. Log data: { log_data [:1000 ]} ..."
152
+
153
+ # Verify we see function entry and exit traces
154
+ entry_traces = [match for match in trace_matches if ">>" in match ]
155
+ exit_traces = [match for match in trace_matches if "<<" in match ]
156
+
157
+ assert len (entry_traces ) > 0 , "Expected to find function entry traces (>>)"
158
+ assert len (exit_traces ) > 0 , "Expected to find function exit traces (<<)"
159
+
160
+ # Verify that meaningful functions are being traced
161
+ # Look for traces from main, API handling, or VM management functions
162
+ meaningful_functions = ["main" , "api" , "vmm" , "request" , "response" ]
163
+ found_meaningful = False
164
+
165
+ for trace in trace_matches :
166
+ for func in meaningful_functions :
167
+ if func .lower () in trace .lower ():
168
+ found_meaningful = True
169
+ break
170
+ if found_meaningful :
171
+ break
172
+
173
+ assert found_meaningful , f"Expected to find traces from meaningful functions, but traces were: { trace_matches [:10 ]} "
174
+
175
+
176
+ def test_firecracker_tracing_with_vm_lifecycle (instrumented_firecracker_binary ):
177
+ """Test that instrumented Firecracker works through a complete VM lifecycle with tracing."""
178
+ # Skip this test if we don't have the necessary resources
179
+ if not global_props .host_linux_version_tup >= (4 , 14 ):
180
+ pytest .skip ("Requires Linux kernel >= 4.14" )
181
+
182
+ # Create a MicroVM factory with the instrumented binary
183
+ factory = MicroVMFactory (instrumented_firecracker_binary )
184
+
185
+ # Build a microVM
186
+ vm = factory .build ()
187
+
188
+ # Configure the VM with minimal resources
189
+ vm .basic_config (vcpu_count = 1 , mem_size_mib = 128 , add_root_device = False )
190
+
191
+ # Spawn with trace logging
192
+ vm .spawn (log_level = "Trace" )
193
+
194
+ try :
195
+ # Set logger to trace level
196
+ logger_config = {"level" : "Trace" }
197
+ response = vm .api .logger .put (** logger_config )
198
+ assert response .status_code == 204
199
+
200
+ # Test pause/resume functionality with tracing
201
+ # Note: We can't actually start the VM without a kernel/rootfs,
202
+ # but we can test the API endpoints
203
+
204
+ # Get metrics (this exercises the metrics system)
205
+ metrics = vm .get_all_metrics ()
206
+ assert metrics is not None
207
+
208
+ # Test machine config updates
209
+ machine_config = {
210
+ "vcpu_count" : 1 ,
211
+ "mem_size_mib" : 128 ,
212
+ "smt" : False
213
+ }
214
+ response = vm .api .machine_config .put (** machine_config )
215
+ assert response .status_code == 204
216
+
217
+ finally :
218
+ vm .kill ()
219
+
220
+ # Verify tracing worked throughout the lifecycle
221
+ log_data = vm .log_data
222
+
223
+ # Should have trace logs
224
+ assert "TRACE" in log_data
225
+
226
+ # Should have instrumentation traces
227
+ trace_pattern = r"ThreadId\(\d+\)(?:::[^>]*)?(?:>>|<<)\w+"
228
+ trace_matches = re .findall (trace_pattern , log_data )
229
+ assert len (trace_matches ) > 0 , "Expected instrumentation traces during VM lifecycle"
230
+
231
+
232
+ def test_firecracker_tracing_performance_impact ():
233
+ """Test that instrumented Firecracker still performs reasonably (basic smoke test)."""
234
+ import time
235
+
236
+ # This is a basic performance smoke test to ensure tracing doesn't break functionality
237
+ # We're not doing detailed performance analysis, just ensuring it doesn't hang or crash
238
+
239
+ # Build instrumented binary
240
+ instrumented_binary = build_instrumented_firecracker ()
241
+
242
+ try :
243
+ factory = MicroVMFactory (instrumented_binary )
244
+ vm = factory .build ()
245
+
246
+ # Time the basic configuration and startup
247
+ start_time = time .time ()
248
+
249
+ vm .basic_config (vcpu_count = 1 , mem_size_mib = 128 , add_root_device = False )
250
+ vm .spawn (log_level = "Trace" )
251
+
252
+ # Make several API calls
253
+ for _ in range (5 ):
254
+ response = vm .api .describe_instance .get ()
255
+ assert response .status_code == 200
256
+
257
+ elapsed = time .time () - start_time
258
+
259
+ # Should complete within reasonable time (30 seconds is very generous)
260
+ # This is just to catch major performance regressions or hangs
261
+ assert elapsed < 30 , f"Instrumented Firecracker took too long to start and handle API calls: { elapsed } s"
262
+
263
+ vm .kill ()
264
+
265
+ finally :
266
+ cleanup_instrumentation ()
267
+
268
+
269
+ def test_trace_log_filtering ():
270
+ """Test that trace log filtering works correctly with instrumented Firecracker."""
271
+ instrumented_binary = build_instrumented_firecracker ()
272
+
273
+ try :
274
+ factory = MicroVMFactory (instrumented_binary )
275
+ vm = factory .build ()
276
+
277
+ vm .basic_config (vcpu_count = 1 , mem_size_mib = 128 , add_root_device = False )
278
+ vm .spawn (log_level = "Info" ) # Start with Info level
279
+
280
+ try :
281
+ # Initially should not have trace logs
282
+ initial_log_data = vm .log_data
283
+
284
+ # Set logger to trace level
285
+ logger_config = {"level" : "Trace" }
286
+ response = vm .api .logger .put (** logger_config )
287
+ assert response .status_code == 204
288
+
289
+ # Make API calls to generate traces
290
+ for _ in range (3 ):
291
+ response = vm .api .describe_instance .get ()
292
+ assert response .status_code == 200
293
+
294
+ # Now should have trace logs
295
+ final_log_data = vm .log_data
296
+
297
+ # The new log data should contain trace information
298
+ new_log_data = final_log_data [len (initial_log_data ):]
299
+ assert "TRACE" in new_log_data , "Expected TRACE logs after setting log level to Trace"
300
+
301
+ finally :
302
+ vm .kill ()
303
+
304
+ finally :
305
+ cleanup_instrumentation ()
0 commit comments