Skip to content

Commit 101e85d

Browse files
authored
feat: Improve issues classification (abandonment/confusion/exception instead of failure) (#32100)
1 parent b485b08 commit 101e85d

File tree

9 files changed

+489
-726
lines changed

9 files changed

+489
-726
lines changed

ee/session_recordings/session_summary/llm/consume.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import datetime
22
import json
3+
import os
34
from typing import Any
45
import openai
56
import structlog
@@ -99,16 +100,14 @@ def _convert_llm_content_to_session_summary_json(
99100
session_metadata=session_metadata,
100101
session_id=session_id,
101102
)
102-
# TODO: Uncomment for local testing
103103
# Track generation for history of experiments
104-
# if final_validation:
105-
# _track_session_summary_generation(
106-
# summary_prompt=summary_prompt,
107-
# raw_session_summary=json.dumps(raw_session_summary.data, indent=4),
108-
# session_summary=json.dumps(session_summary.data, indent=4),
109-
# # TODO: Store path in local env file? Production won't have it set, so no saving will happen
110-
# results_base_dir_path=""",
111-
# )
104+
if final_validation and os.environ.get("LOCAL_SESSION_SUMMARY_RESULTS_DIR"):
105+
_track_session_summary_generation(
106+
summary_prompt=summary_prompt,
107+
raw_session_summary=json.dumps(raw_session_summary.data, indent=4),
108+
session_summary=json.dumps(session_summary.data, indent=4),
109+
results_base_dir_path=os.environ["LOCAL_SESSION_SUMMARY_RESULTS_DIR"],
110+
)
112111
return json.dumps(session_summary.data)
113112

114113

ee/session_recordings/session_summary/output_data.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313
class RawKeyActionSerializer(serializers.Serializer):
1414
description = serializers.CharField(min_length=1, max_length=1024, required=False, allow_null=True)
15-
failure = serializers.BooleanField(required=False, default=None, allow_null=True)
15+
abandonment = serializers.BooleanField(required=False, default=False, allow_null=True)
16+
confusion = serializers.BooleanField(required=False, default=False, allow_null=True)
17+
exception = serializers.ChoiceField(choices=["blocking", "non-blocking"], required=False, allow_null=True)
1618
event_id = serializers.CharField(min_length=1, max_length=128, required=False, allow_null=True)
1719

1820

@@ -72,6 +74,9 @@ class SegmentMetaSerializer(serializers.Serializer):
7274
events_percentage = serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True)
7375
key_action_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
7476
failure_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
77+
abandonment_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
78+
confusion_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
79+
exception_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
7580

7681

7782
class EnrichedSegmentSerializer(RawSegmentSerializer):
@@ -356,14 +361,27 @@ def _calculate_segment_meta(
356361
segment_meta_data["key_action_count"] = len(key_group_events)
357362
# Calculate failure count
358363
failure_count = 0
364+
abandonment_count = 0
365+
confusion_count = 0
366+
exception_count = 0
359367
for key_action_event in key_group_events:
360-
if_failure = key_action_event.get("failure")
361-
if if_failure is None:
362-
# If failure isn't generated yet - skip this event
363-
continue
364-
if if_failure:
368+
abandonment = key_action_event.get("abandonment")
369+
confusion = key_action_event.get("confusion")
370+
exception = key_action_event.get("exception")
371+
# Count each type of issue
372+
if abandonment:
373+
abandonment_count += 1
374+
if confusion:
375+
confusion_count += 1
376+
if exception:
377+
exception_count += 1
378+
# If any of the fields indicate a failure, increment the total count
379+
if abandonment or confusion or exception:
365380
failure_count += 1
366381
segment_meta_data["failure_count"] = failure_count
382+
segment_meta_data["abandonment_count"] = abandonment_count
383+
segment_meta_data["confusion_count"] = confusion_count
384+
segment_meta_data["exception_count"] = exception_count
367385
# Fallback - if enough events processed and the data drastically changes - calculate the meta from the key actions
368386
if len(key_group_events) < 2:
369387
# If not enough events yet

ee/session_recordings/session_summary/templates/identify-objectives/example.yml

+16-6
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,32 @@ key_actions:
1313
- segment_index: 0 # Must match EXACTLY with a segments.index value
1414
events:
1515
- event_id: 'abcd1234' # Must be copied EXACTLY from event data, never invented
16-
failure: false
16+
abandonment: false # true or false
17+
confusion: false # true or false
18+
exception: null # null if no exception, `blocking` or `non-blocking` if exception
1719
description: 'First significant action in this segment'
1820
- event_id: 'defg4567'
19-
failure: false
21+
abandonment: false
22+
confusion: false
23+
exception: null
2024
description: 'Second action in this segment'
2125
# Note: The actual number of events should be based on your analysis of the current segment events in the session data.
2226
- segment_index: 1
2327
events:
2428
- event_id: 'ghij7890'
25-
failure: false
29+
abandonment: false
30+
confusion: false
31+
exception: null
2632
description: 'Significant action in this segment'
2733
- event_id: 'mnop3456'
28-
failure: true
29-
description: 'User attempted to perform an action but encountered an error'
34+
abandonment: false
35+
confusion: true
36+
exception: 'blocking'
37+
description: 'User repeatedly attempted to perform an action but encountered an error'
3038
- event_id: 'stuv9012'
31-
failure: false
39+
abandonment: false
40+
confusion: false
41+
exception: null
3242
description: 'Final action in this chronological segment'
3343
# Note: Each segment should have key actions.
3444

ee/session_recordings/session_summary/templates/identify-objectives/prompt.djt

+46-31
Original file line numberDiff line numberDiff line change
@@ -138,36 +138,45 @@ After segmenting the timeline, identify the most significant actions within each
138138
3.2. Key Actions Guidelines:
139139

140140
- Include the most important key actions per segment that follow the prioritization above.
141-
- Actions must be listed in chronological order within each segment
142-
- Each key action could be a successful user action/sequence of actions (`"failure": false`) or a failure/sequence of failures (`"failure": true`) based on the guidelines below, and should be an actual event from the timeline.
141+
- Actions must be listed in chronological order within each segment.
142+
- Each key action is a user action/sequence of actions, and should be classified based on the issue types below (exception, confusion, abandonment).
143+
- Each key action is an actual event or the first event in a sequence of events from the timeline.
143144

144-
3.3. Failure Identification Guidelines (CRITICAL):
145+
3.3. Issue Identification Guidelines (CRITICAL):
145146

146-
- NEVER flag as failures:
147+
- NEVER flag issues for:
147148
✗ Normal page navigation
148149
✗ Sequential UI interactions (changing filters, adjusting parameters)
149150
✗ Any other action without direct evidence of technical error or user frustration
150151
✗ Non-blocking background errors (tracking failures, minor rendering glitches, etc.)
151152

152-
- Flag event as a failure (`failure: true`) with EXPLICIT evidence of either:
153+
- Flag event issues with EXPLICIT evidence of:
153154

154-
Technical errors:
155-
✓ Event name/type contains: 'exception', 'failed', 'error', etc.
155+
Exceptions (`exception: null | blocking | non-blocking`):
156+
✓ Event name or type contains: 'exception', 'failed', 'error', etc.
157+
✓ `exception_types` or `exception_values` provide exception context
156158
✓ `elements_chain_texts` contains error messages (e.g., "Try again", "Failed to load", etc.)
157-
✓ Session abandonment shortly after exception events
159+
✓ Mark as `blocking` when the error prevented user from continuing their intended flow
160+
✓ Mark as `non-blocking` when the user could continue despite the error
161+
✓ Set as `null` when no technical exception occurred
158162

159-
User experience failures:
160-
✓ Form abandonment after significant time investment (started typing but left without submitting)
161-
✓ Multiple rapid identical form submissions followed by a change in form data
162-
✓ Rageclicks (multiple rapid clicks on the same element with no visible response)
163-
✓ Conversion flow abandonment (e.g., leaving checkout, subscription, or signup flow)
164-
✓ Back-and-forth navigation indicating search for functionality
163+
Confusion (`confusion: true | false`):
164+
✓ Back-and-forth navigation loops indicating search for functionality
165+
✓ Multiple rapid identical form submissions
165166
✓ Repeated attempts to complete the same action without success
166-
✓ And other similar patterns showing user frustration or inability to complete tasks
167-
168-
- Failure description requirements:
169-
- For technical failures: Specify error nature (API failure, validation error, timeout, etc.)
170-
- For UX failures: Describe the abandoned task, point of frustration, or flow impediment
167+
✓ Rageclicks (multiple rapid clicks on the same element with no visible response)
168+
✓ Deadclicks (clicks on non-interactive elements)
169+
170+
Abandonment (`abandonment: true/false`):
171+
✓ Form abandonment after significant time investment (started typing but left without submitting)
172+
✓ Conversion flow abandonment (e.g., leaving checkout, subscription, or signup flow midway, etc.)
173+
✓ Feature exploration followed by exit without completion
174+
✓ Session termination during a multi-step process
175+
176+
- Issue description requirements:
177+
- For exceptions: Specify error nature (API failure, validation error, timeout, etc.) and impact (blocking/non-blocking)
178+
- For confusion: Describe the specific pattern indicating confusion and potential cause
179+
- For abandonment: Describe the abandoned flow/task and approximate progress before abandonment
171180
- Include potential causes when identifiable
172181
- Explain impact on user flow and business goals
173182
- Provide more detail than regular action descriptions, including potential business impact
@@ -237,19 +246,25 @@ After analyzing the chronological segments and key actions, evaluate the overall
237246
6.3. Session Success Determination:
238247

239248
- Mark the session as successful (true) when:
240-
- User completed one or more significant conversion actions
241-
- User accomplished apparent goals despite minor obstacles
242-
- Session shows logical progression and completion
249+
- User completed one or more significant conversion actions, even if they experienced:
250+
* Multiple non-blocking exceptions
251+
* Minor confusion moments
252+
* Abandonment of secondary/non-critical flows
253+
- User accomplished their apparent primary goals despite obstacles
254+
- Session shows logical progression to completion of key user objectives
255+
243256
- Mark the session as unsuccessful (false) when:
244-
- User abandoned critical conversion flows
245-
- Technical errors prevented completion of conversion attempts
246-
- Session ended abruptly during an important process
257+
- User experienced blocking technical errors that prevented completion of primary conversion attempts with no successful workaround
258+
- User abandoned critical conversion flows (signup, checkout, upgrade, etc.) at a late stage
259+
- Session ended abruptly during an important process that was the primary goal
260+
- User showed clear signs of significant frustration (multiple rageclicks, repeated failed attempts) on critical paths with no successful resolution
247261

248262
6.4. Final Outcome Description:
249263

250264
- Provide a short focused summary (1-2 sentences, up to 30 words in total) of the overall user journey
251-
- Emphasize conversion successes or failures
252-
- Note any critical issues that affected the user's workflow
265+
- For successful sessions: emphasize completed conversions despite any minor issues encountered
266+
- For unsuccessful sessions: clearly identify the critical blocking issue that prevented the primary goal completion
267+
- Always prioritize conversion completions over minor issues when determining session success
253268

254269
# Step 7: Self-Consistency Check
255270

@@ -265,13 +280,13 @@ Before finalizing your analysis, verify:
265280

266281
7.2. Key Action Prioritization Verification:
267282

268-
- Are conversion events and conversion-blocking failures properly identified in each segment?
283+
- Are conversion events and conversion-blocking issues properly identified in each segment?
269284
- Are critical interruptions to user flow highlighted appropriately?
270285
- Do selected key actions accurately represent the most significant events in each segment?
271286
- Are event IDs copied EXACTLY from the event data (never invented)?
272-
- Are failure flags supported by explicit evidence in the event data?
273-
- Have you properly distinguished between technical failures and user experience failures?
274-
- Have you aggressively consolidated similar actions and errors?
287+
- Are issue flags (abandonment, confusion, exception) supported by explicit evidence in the event data?
288+
- Have you properly distinguished between technical exceptions, user confusion, and flow abandonment?
289+
- Have you aggressively consolidated similar actions and issues?
275290

276291
7.3. Conversion Focus Verification:
277292

ee/session_recordings/session_summary/tests/conftest.py

+87-5
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,32 @@ def mock_valid_llm_yaml_response() -> str:
2424
- segment_index: 0
2525
events:
2626
- event_id: 'abcd1234'
27-
failure: false
2827
description: 'First significant action in this segment'
28+
abandonment: false
29+
confusion: false
30+
exception: null
2931
- event_id: 'defg4567'
30-
failure: false
3132
description: 'Second action in this segment'
33+
abandonment: false
34+
confusion: false
35+
exception: null
3236
- segment_index: 1
3337
events:
3438
- event_id: 'ghij7890'
35-
failure: false
3639
description: 'Significant action in this segment'
40+
abandonment: false
41+
confusion: false
42+
exception: null
3743
- event_id: 'mnop3456'
38-
failure: true
3944
description: 'User attempted to perform an action but encountered an error'
45+
abandonment: false
46+
confusion: true
47+
exception: 'blocking'
4048
- event_id: 'stuv9012'
41-
failure: false
4249
description: 'Final action in this chronological segment'
50+
abandonment: true
51+
confusion: false
52+
exception: null
4353
4454
segment_outcomes:
4555
- segment_index: 0
@@ -55,6 +65,78 @@ def mock_valid_llm_yaml_response() -> str:
5565
```"""
5666

5767

68+
@pytest.fixture
69+
def mock_loaded_llm_json_response() -> dict[str, Any]:
70+
"""
71+
Exact YAML response, but converted into JSON.
72+
"""
73+
return {
74+
"segments": [
75+
{"index": 0, "start_event_id": "abcd1234", "end_event_id": "vbgs1287", "name": "Example Segment"},
76+
{"index": 1, "start_event_id": "gfgz6242", "end_event_id": "stuv9012", "name": "Another Example Segment"},
77+
],
78+
"key_actions": [
79+
{
80+
"segment_index": 0,
81+
"events": [
82+
{
83+
"event_id": "abcd1234",
84+
"description": "First significant action in this segment",
85+
"abandonment": False,
86+
"confusion": False,
87+
"exception": None,
88+
},
89+
{
90+
"event_id": "defg4567",
91+
"description": "Second action in this segment",
92+
"abandonment": False,
93+
"confusion": False,
94+
"exception": None,
95+
},
96+
],
97+
},
98+
{
99+
"segment_index": 1,
100+
"events": [
101+
{
102+
"event_id": "ghij7890",
103+
"description": "Significant action in this segment",
104+
"abandonment": False,
105+
"confusion": False,
106+
"exception": None,
107+
},
108+
{
109+
"event_id": "mnop3456",
110+
"description": "User attempted to perform an action but encountered an error",
111+
"abandonment": False,
112+
"confusion": True,
113+
"exception": "blocking",
114+
},
115+
{
116+
"event_id": "stuv9012",
117+
"description": "Final action in this chronological segment",
118+
"abandonment": True,
119+
"confusion": False,
120+
"exception": None,
121+
},
122+
],
123+
},
124+
],
125+
"segment_outcomes": [
126+
{"segment_index": 0, "success": True, "summary": "Detailed description incorporating key action insights"},
127+
{
128+
"segment_index": 1,
129+
"success": False,
130+
"summary": "Description highlighting encountered failures and their impact",
131+
},
132+
],
133+
"session_outcome": {
134+
"success": True,
135+
"description": "Concise session outcome description focusing on conversion attempts, feature usage, and critical issues",
136+
},
137+
}
138+
139+
58140
@pytest.fixture
59141
def mock_chat_completion(mock_valid_llm_yaml_response: str) -> ChatCompletion:
60142
return ChatCompletion(

0 commit comments

Comments
 (0)