Skip to content

Commit af5acd6

Browse files
tonykao8080Tony Kao
andauthored
torchx - add exception_type, exception_message, and exception_source_location to torchx event (#966) (#966)
Summary: Add exception type, exception message, exception source location to torchx event. This allows for better logging of exception details for further analysis. Differential Revision: D64406552 Co-authored-by: Tony Kao <tonykao@meta.com>
1 parent 3855ae4 commit af5acd6

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

torchx/runner/events/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
2121
"""
2222

23+
import json
2324
import logging
25+
import sys
2426
import time
2527
import traceback
2628
from types import TracebackType
@@ -123,6 +125,20 @@ def __exit__(
123125
) // 1000
124126
if traceback_type:
125127
self._torchx_event.raw_exception = traceback.format_exc()
128+
typ, value, tb = sys.exc_info()
129+
if tb:
130+
last_frame = traceback.extract_tb(tb)[-1]
131+
self._torchx_event.exception_source_location = json.dumps(
132+
{
133+
"filename": last_frame.filename,
134+
"lineno": last_frame.lineno,
135+
"name": last_frame.name,
136+
}
137+
)
138+
if exec_type:
139+
self._torchx_event.exception_type = exec_type.__name__
140+
if exec_value:
141+
self._torchx_event.exception_message = str(exec_value)
126142
record(self._torchx_event)
127143

128144
def _generate_torchx_event(

torchx/runner/events/api.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ class TorchxEvent:
5252
wall_time_usec: Optional[int] = None
5353
start_epoch_time_usec: Optional[int] = None
5454
workspace: Optional[str] = None
55+
exception_type: Optional[str] = None
56+
exception_message: Optional[str] = None
57+
exception_source_location: Optional[str] = None
5558

5659
def __str__(self) -> str:
5760
return self.serialize()

0 commit comments

Comments
 (0)