PostHog
diff --git a/‎ee/session_recordings/session_summary/llm/consume.py
+8-9 b/‎ee/session_recordings/session_summary/llm/consume.py
+8-9
diff --git a/‎ee/session_recordings/session_summary/output_data.py
+24-6 b/‎ee/session_recordings/session_summary/output_data.py
+24-6
diff --git a/‎ee/session_recordings/session_summary/templates/identify-objectives/example.yml
+16-6 b/‎ee/session_recordings/session_summary/templates/identify-objectives/example.yml
+16-6
diff --git a/‎ee/session_recordings/session_summary/templates/identify-objectives/prompt.djt
+46-31 b/‎ee/session_recordings/session_summary/templates/identify-objectives/prompt.djt
+46-31
diff --git a/‎ee/session_recordings/session_summary/tests/conftest.py
+87-5 b/‎ee/session_recordings/session_summary/tests/conftest.py
+87-5
@@ -1,5 +1,6 @@
 from datetime import datetime
 import json
+import os
 from typing import Any
 import openai
 import structlog
@@ -99,16 +100,14 @@ def _convert_llm_content_to_session_summary_json(
         session_metadata=session_metadata,
         session_id=session_id,
     )
-    # TODO: Uncomment for local testing
     # Track generation for history of experiments
-    # if final_validation:
-    #     _track_session_summary_generation(
-    #         summary_prompt=summary_prompt,
-    #         raw_session_summary=json.dumps(raw_session_summary.data, indent=4),
-    #         session_summary=json.dumps(session_summary.data, indent=4),
-    #         # TODO: Store path in local env file? Production won't have it set, so no saving will happen
-    #         results_base_dir_path=""",
-    #     )
+    if final_validation and os.environ.get("LOCAL_SESSION_SUMMARY_RESULTS_DIR"):
+        _track_session_summary_generation(
+            summary_prompt=summary_prompt,
+            raw_session_summary=json.dumps(raw_session_summary.data, indent=4),
+            session_summary=json.dumps(session_summary.data, indent=4),
+            results_base_dir_path=os.environ["LOCAL_SESSION_SUMMARY_RESULTS_DIR"],
+        )
     return json.dumps(session_summary.data)
 
 
 
@@ -12,7 +12,9 @@
 
 class RawKeyActionSerializer(serializers.Serializer):
     description = serializers.CharField(min_length=1, max_length=1024, required=False, allow_null=True)
-    failure = serializers.BooleanField(required=False, default=None, allow_null=True)
+    abandonment = serializers.BooleanField(required=False, default=False, allow_null=True)
+    confusion = serializers.BooleanField(required=False, default=False, allow_null=True)
+    exception = serializers.ChoiceField(choices=["blocking", "non-blocking"], required=False, allow_null=True)
     event_id = serializers.CharField(min_length=1, max_length=128, required=False, allow_null=True)
 
 
@@ -72,6 +74,9 @@ class SegmentMetaSerializer(serializers.Serializer):
     events_percentage = serializers.FloatField(min_value=0, max_value=1, required=False, allow_null=True)
     key_action_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
     failure_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
+    abandonment_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
+    confusion_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
+    exception_count = serializers.IntegerField(min_value=0, required=False, allow_null=True)
 
 
 class EnrichedSegmentSerializer(RawSegmentSerializer):
@@ -356,14 +361,27 @@ def _calculate_segment_meta(
     segment_meta_data["key_action_count"] = len(key_group_events)
     # Calculate failure count
     failure_count = 0
+    abandonment_count = 0
+    confusion_count = 0
+    exception_count = 0
     for key_action_event in key_group_events:
-        if_failure = key_action_event.get("failure")
-        if if_failure is None:
-            # If failure isn't generated yet - skip this event
-            continue
-        if if_failure:
+        abandonment = key_action_event.get("abandonment")
+        confusion = key_action_event.get("confusion")
+        exception = key_action_event.get("exception")
+        # Count each type of issue
+        if abandonment:
+            abandonment_count += 1
+        if confusion:
+            confusion_count += 1
+        if exception:
+            exception_count += 1
+        # If any of the fields indicate a failure, increment the total count
+        if abandonment or confusion or exception:
             failure_count += 1
     segment_meta_data["failure_count"] = failure_count
+    segment_meta_data["abandonment_count"] = abandonment_count
+    segment_meta_data["confusion_count"] = confusion_count
+    segment_meta_data["exception_count"] = exception_count
     # Fallback - if enough events processed and the data drastically changes - calculate the meta from the key actions
     if len(key_group_events) < 2:
         # If not enough events yet
 
@@ -13,22 +13,32 @@ key_actions:
     - segment_index: 0 # Must match EXACTLY with a segments.index value
       events:
           - event_id: 'abcd1234' # Must be copied EXACTLY from event data, never invented
-            failure: false
+            abandonment: false # true or false
+            confusion: false # true or false
+            exception: null # null if no exception, `blocking` or `non-blocking` if exception
             description: 'First significant action in this segment'
           - event_id: 'defg4567'
-            failure: false
+            abandonment: false
+            confusion: false
+            exception: null
             description: 'Second action in this segment'
           # Note: The actual number of events should be based on your analysis of the current segment events in the session data.
     - segment_index: 1
       events:
           - event_id: 'ghij7890'
-            failure: false
+            abandonment: false
+            confusion: false
+            exception: null
             description: 'Significant action in this segment'
           - event_id: 'mnop3456'
-            failure: true
-            description: 'User attempted to perform an action but encountered an error'
+            abandonment: false
+            confusion: true
+            exception: 'blocking'
+            description: 'User repeatedly attempted to perform an action but encountered an error'
           - event_id: 'stuv9012'
-            failure: false
+            abandonment: false
+            confusion: false
+            exception: null
             description: 'Final action in this chronological segment'
     # Note: Each segment should have key actions.
 
 
@@ -138,36 +138,45 @@ After segmenting the timeline, identify the most significant actions within each
 3.2. Key Actions Guidelines:
 
 - Include the most important key actions per segment that follow the prioritization above.
-- Actions must be listed in chronological order within each segment
-- Each key action could be a successful user action/sequence of actions (`"failure": false`) or a failure/sequence of failures (`"failure": true`) based on the guidelines below, and should be an actual event from the timeline.
+- Actions must be listed in chronological order within each segment.
+- Each key action is a user action/sequence of actions, and should be classified based on the issue types below (exception, confusion, abandonment).
+- Each key action is an actual event or the first event in a sequence of events from the timeline.
 
-3.3. Failure Identification Guidelines (CRITICAL):
+3.3. Issue Identification Guidelines (CRITICAL):
 
-- NEVER flag as failures:
+- NEVER flag issues for:
   ✗ Normal page navigation
   ✗ Sequential UI interactions (changing filters, adjusting parameters)
   ✗ Any other action without direct evidence of technical error or user frustration
   ✗ Non-blocking background errors (tracking failures, minor rendering glitches, etc.)
 
-- Flag event as a failure (`failure: true`) with EXPLICIT evidence of either:
+- Flag event issues with EXPLICIT evidence of:
 
-  Technical errors:
-  ✓ Event name/type contains: 'exception', 'failed', 'error', etc.
+  Exceptions (`exception: null | blocking | non-blocking`):
+  ✓ Event name or type contains: 'exception', 'failed', 'error', etc.
+  ✓ `exception_types` or `exception_values` provide exception context
   ✓ `elements_chain_texts` contains error messages (e.g., "Try again", "Failed to load", etc.)
-  ✓ Session abandonment shortly after exception events
+  ✓ Mark as `blocking` when the error prevented user from continuing their intended flow
+  ✓ Mark as `non-blocking` when the user could continue despite the error
+  ✓ Set as `null` when no technical exception occurred
 
-  User experience failures:
-  ✓ Form abandonment after significant time investment (started typing but left without submitting)
-  ✓ Multiple rapid identical form submissions followed by a change in form data
-  ✓ Rageclicks (multiple rapid clicks on the same element with no visible response)
-  ✓ Conversion flow abandonment (e.g., leaving checkout, subscription, or signup flow)
-  ✓ Back-and-forth navigation indicating search for functionality
+  Confusion (`confusion: true | false`):
+  ✓ Back-and-forth navigation loops indicating search for functionality
+  ✓ Multiple rapid identical form submissions 
   ✓ Repeated attempts to complete the same action without success
-  ✓ And other similar patterns showing user frustration or inability to complete tasks
-
-- Failure description requirements:
-  - For technical failures: Specify error nature (API failure, validation error, timeout, etc.)
-  - For UX failures: Describe the abandoned task, point of frustration, or flow impediment
+  ✓ Rageclicks (multiple rapid clicks on the same element with no visible response)
+  ✓ Deadclicks (clicks on non-interactive elements)
+  
+  Abandonment (`abandonment: true/false`):
+  ✓ Form abandonment after significant time investment (started typing but left without submitting)
+  ✓ Conversion flow abandonment (e.g., leaving checkout, subscription, or signup flow midway, etc.)
+  ✓ Feature exploration followed by exit without completion
+  ✓ Session termination during a multi-step process
+
+- Issue description requirements:
+  - For exceptions: Specify error nature (API failure, validation error, timeout, etc.) and impact (blocking/non-blocking)
+  - For confusion: Describe the specific pattern indicating confusion and potential cause
+  - For abandonment: Describe the abandoned flow/task and approximate progress before abandonment
   - Include potential causes when identifiable
   - Explain impact on user flow and business goals
   - Provide more detail than regular action descriptions, including potential business impact
@@ -237,19 +246,25 @@ After analyzing the chronological segments and key actions, evaluate the overall
 6.3. Session Success Determination:
 
 - Mark the session as successful (true) when:
-  - User completed one or more significant conversion actions
-  - User accomplished apparent goals despite minor obstacles
-  - Session shows logical progression and completion
+  - User completed one or more significant conversion actions, even if they experienced:
+    * Multiple non-blocking exceptions
+    * Minor confusion moments
+    * Abandonment of secondary/non-critical flows
+  - User accomplished their apparent primary goals despite obstacles
+  - Session shows logical progression to completion of key user objectives
+  
 - Mark the session as unsuccessful (false) when:
-  - User abandoned critical conversion flows
-  - Technical errors prevented completion of conversion attempts
-  - Session ended abruptly during an important process
+  - User experienced blocking technical errors that prevented completion of primary conversion attempts with no successful workaround
+  - User abandoned critical conversion flows (signup, checkout, upgrade, etc.) at a late stage
+  - Session ended abruptly during an important process that was the primary goal
+  - User showed clear signs of significant frustration (multiple rageclicks, repeated failed attempts) on critical paths with no successful resolution
 
 6.4. Final Outcome Description:
 
 - Provide a short focused summary (1-2 sentences, up to 30 words in total) of the overall user journey
-- Emphasize conversion successes or failures
-- Note any critical issues that affected the user's workflow
+- For successful sessions: emphasize completed conversions despite any minor issues encountered
+- For unsuccessful sessions: clearly identify the critical blocking issue that prevented the primary goal completion
+- Always prioritize conversion completions over minor issues when determining session success
 
 # Step 7: Self-Consistency Check
 
@@ -265,13 +280,13 @@ Before finalizing your analysis, verify:
 
 7.2. Key Action Prioritization Verification:
 
-- Are conversion events and conversion-blocking failures properly identified in each segment?
+- Are conversion events and conversion-blocking issues properly identified in each segment?
 - Are critical interruptions to user flow highlighted appropriately?
 - Do selected key actions accurately represent the most significant events in each segment?
 - Are event IDs copied EXACTLY from the event data (never invented)?
-- Are failure flags supported by explicit evidence in the event data?
-- Have you properly distinguished between technical failures and user experience failures?
-- Have you aggressively consolidated similar actions and errors?
+- Are issue flags (abandonment, confusion, exception) supported by explicit evidence in the event data?
+- Have you properly distinguished between technical exceptions, user confusion, and flow abandonment?
+- Have you aggressively consolidated similar actions and issues?
 
 7.3. Conversion Focus Verification:
 
 
@@ -24,22 +24,32 @@ def mock_valid_llm_yaml_response() -> str:
     - segment_index: 0
       events:
           - event_id: 'abcd1234'
-            failure: false
             description: 'First significant action in this segment'
+            abandonment: false
+            confusion: false
+            exception: null
           - event_id: 'defg4567'
-            failure: false
             description: 'Second action in this segment'
+            abandonment: false
+            confusion: false
+            exception: null
     - segment_index: 1
       events:
           - event_id: 'ghij7890'
-            failure: false
             description: 'Significant action in this segment'
+            abandonment: false
+            confusion: false
+            exception: null
           - event_id: 'mnop3456'
-            failure: true
             description: 'User attempted to perform an action but encountered an error'
+            abandonment: false
+            confusion: true
+            exception: 'blocking'
           - event_id: 'stuv9012'
-            failure: false
             description: 'Final action in this chronological segment'
+            abandonment: true
+            confusion: false
+            exception: null
 
 segment_outcomes:
     - segment_index: 0
@@ -55,6 +65,78 @@ def mock_valid_llm_yaml_response() -> str:
 ```"""
 
 
+@pytest.fixture
+def mock_loaded_llm_json_response() -> dict[str, Any]:
+    """
+    Exact YAML response, but converted into JSON.
+    """
+    return {
+        "segments": [
+            {"index": 0, "start_event_id": "abcd1234", "end_event_id": "vbgs1287", "name": "Example Segment"},
+            {"index": 1, "start_event_id": "gfgz6242", "end_event_id": "stuv9012", "name": "Another Example Segment"},
+        ],
+        "key_actions": [
+            {
+                "segment_index": 0,
+                "events": [
+                    {
+                        "event_id": "abcd1234",
+                        "description": "First significant action in this segment",
+                        "abandonment": False,
+                        "confusion": False,
+                        "exception": None,
+                    },
+                    {
+                        "event_id": "defg4567",
+                        "description": "Second action in this segment",
+                        "abandonment": False,
+                        "confusion": False,
+                        "exception": None,
+                    },
+                ],
+            },
+            {
+                "segment_index": 1,
+                "events": [
+                    {
+                        "event_id": "ghij7890",
+                        "description": "Significant action in this segment",
+                        "abandonment": False,
+                        "confusion": False,
+                        "exception": None,
+                    },
+                    {
+                        "event_id": "mnop3456",
+                        "description": "User attempted to perform an action but encountered an error",
+                        "abandonment": False,
+                        "confusion": True,
+                        "exception": "blocking",
+                    },
+                    {
+                        "event_id": "stuv9012",
+                        "description": "Final action in this chronological segment",
+                        "abandonment": True,
+                        "confusion": False,
+                        "exception": None,
+                    },
+                ],
+            },
+        ],
+        "segment_outcomes": [
+            {"segment_index": 0, "success": True, "summary": "Detailed description incorporating key action insights"},
+            {
+                "segment_index": 1,
+                "success": False,
+                "summary": "Description highlighting encountered failures and their impact",
+            },
+        ],
+        "session_outcome": {
+            "success": True,
+            "description": "Concise session outcome description focusing on conversion attempts, feature usage, and critical issues",
+        },
+    }
+
+
 @pytest.fixture
 def mock_chat_completion(mock_valid_llm_yaml_response: str) -> ChatCompletion:
     return ChatCompletion(