@@ -402,8 +402,6 @@ async def send_cdp(self, method: str, params: Optional[dict] = None) -> dict:
402
402
self ._stagehand .logger .debug (
403
403
f"CDP command '{ method } ' failed: { e } . Attempting to reconnect..."
404
404
)
405
- # Try to reconnect
406
- await self ._ensure_cdp_session ()
407
405
# Handle specific errors if needed (e.g., session closed)
408
406
if "Target closed" in str (e ) or "Session closed" in str (e ):
409
407
# Attempt to reset the client if the session closed unexpectedly
@@ -446,70 +444,212 @@ async def _wait_for_settled_dom(self, timeout_ms: int = None):
446
444
"""
447
445
Wait for the DOM to settle (stop changing) before proceeding.
448
446
447
+ **Definition of "settled"**
448
+ • No in-flight network requests (except WebSocket / Server-Sent-Events).
449
+ • That idle state lasts for at least **500 ms** (the "quiet-window").
450
+
451
+ **How it works**
452
+ 1. Subscribes to CDP Network and Page events for the main target and all
453
+ out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
454
+ 2. Every time `Network.requestWillBeSent` fires, the request ID is added
455
+ to an **`inflight`** set.
456
+ 3. When the request finishes—`loadingFinished`, `loadingFailed`,
457
+ `requestServedFromCache`, or a *data:* response—the request ID is
458
+ removed.
459
+ 4. *Document* requests are also mapped **frameId → requestId**; when
460
+ `Page.frameStoppedLoading` fires the corresponding Document request is
461
+ removed immediately (covers iframes whose network events never close).
462
+ 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
463
+ request has been open for ≥ 2 s it is forcibly removed; this prevents
464
+ ad/analytics iframes from blocking the wait forever.
465
+ 6. When `inflight` becomes empty the helper starts a 500 ms timer.
466
+ If no new request appears before the timer fires, the promise
467
+ resolves → **DOM is considered settled**.
468
+ 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
469
+ default ≈ 30 s) ensures we always resolve; if it fires we log how many
470
+ requests were still outstanding.
471
+
449
472
Args:
450
473
timeout_ms (int, optional): Maximum time to wait in milliseconds.
451
474
If None, uses the stagehand client's dom_settle_timeout_ms.
452
475
"""
453
- try :
454
- timeout = timeout_ms or getattr (
455
- self ._stagehand , "dom_settle_timeout_ms" , 30000
456
- )
457
- import asyncio
458
-
459
- # Wait for domcontentloaded first
460
- await self ._page .wait_for_load_state ("domcontentloaded" )
461
-
462
- # Create a timeout promise that resolves after the specified time
463
- timeout_task = asyncio .create_task (asyncio .sleep (timeout / 1000 ))
476
+ import asyncio
477
+ import time
464
478
465
- # Try to check if the DOM has settled
466
- try :
467
- # Create a task for evaluating the DOM settling
468
- eval_task = asyncio .create_task (
469
- self ._page .evaluate (
470
- """
471
- () => {
472
- return new Promise((resolve) => {
473
- if (typeof window.waitForDomSettle === 'function') {
474
- window.waitForDomSettle().then(resolve);
475
- } else {
476
- console.warn('waitForDomSettle is not defined, considering DOM as settled');
477
- resolve();
478
- }
479
- });
480
- }
481
- """
482
- )
483
- )
484
-
485
- # Create tasks for other ways to determine page readiness
486
- dom_task = asyncio .create_task (
487
- self ._page .wait_for_load_state ("domcontentloaded" )
488
- )
489
- body_task = asyncio .create_task (self ._page .wait_for_selector ("body" ))
479
+ timeout = timeout_ms or getattr (self ._stagehand , "dom_settle_timeout_ms" , 30000 )
480
+ client = await self .get_cdp_client ()
490
481
491
- # Wait for the first task to complete
492
- done , pending = await asyncio . wait (
493
- [ eval_task , dom_task , body_task , timeout_task ],
494
- return_when = asyncio . FIRST_COMPLETED ,
495
- )
482
+ # Check if document exists
483
+ try :
484
+ await self . _page . title ()
485
+ except Exception :
486
+ await self . _page . wait_for_load_state ( "domcontentloaded" )
496
487
497
- # Cancel any pending tasks
498
- for task in pending :
499
- task .cancel ()
488
+ # Enable CDP domains
489
+ await client .send ("Network.enable" )
490
+ await client .send ("Page.enable" )
491
+ await client .send (
492
+ "Target.setAutoAttach" ,
493
+ {
494
+ "autoAttach" : True ,
495
+ "waitForDebuggerOnStart" : False ,
496
+ "flatten" : True ,
497
+ "filter" : [
498
+ {"type" : "worker" , "exclude" : True },
499
+ {"type" : "shared_worker" , "exclude" : True },
500
+ ],
501
+ },
502
+ )
500
503
501
- # If the timeout was hit, log a warning
502
- if timeout_task in done :
504
+ # Set up tracking structures
505
+ inflight = set () # Set of request IDs
506
+ meta = {} # Dict of request ID -> {"url": str, "start": float}
507
+ doc_by_frame = {} # Dict of frame ID -> request ID
508
+
509
+ # Event tracking
510
+ quiet_timer = None
511
+ stalled_request_sweep_task = None
512
+ loop = asyncio .get_event_loop ()
513
+ done_event = asyncio .Event ()
514
+
515
+ def clear_quiet ():
516
+ nonlocal quiet_timer
517
+ if quiet_timer :
518
+ quiet_timer .cancel ()
519
+ quiet_timer = None
520
+
521
+ def resolve_done ():
522
+ """Cleanup and mark as done"""
523
+ clear_quiet ()
524
+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
525
+ stalled_request_sweep_task .cancel ()
526
+ done_event .set ()
527
+
528
+ def maybe_quiet ():
529
+ """Start quiet timer if no requests are in flight"""
530
+ nonlocal quiet_timer
531
+ if len (inflight ) == 0 and not quiet_timer :
532
+ quiet_timer = loop .call_later (0.5 , resolve_done )
533
+
534
+ def finish_req (request_id : str ):
535
+ """Mark a request as finished"""
536
+ if request_id not in inflight :
537
+ return
538
+ inflight .remove (request_id )
539
+ meta .pop (request_id , None )
540
+ # Remove from frame mapping
541
+ for fid , rid in list (doc_by_frame .items ()):
542
+ if rid == request_id :
543
+ doc_by_frame .pop (fid )
544
+ clear_quiet ()
545
+ maybe_quiet ()
546
+
547
+ # Event handlers
548
+ def on_request (params ):
549
+ """Handle Network.requestWillBeSent"""
550
+ if params .get ("type" ) in ["WebSocket" , "EventSource" ]:
551
+ return
552
+
553
+ request_id = params ["requestId" ]
554
+ inflight .add (request_id )
555
+ meta [request_id ] = {"url" : params ["request" ]["url" ], "start" : time .time ()}
556
+
557
+ if params .get ("type" ) == "Document" and params .get ("frameId" ):
558
+ doc_by_frame [params ["frameId" ]] = request_id
559
+
560
+ clear_quiet ()
561
+
562
+ def on_finish (params ):
563
+ """Handle Network.loadingFinished"""
564
+ finish_req (params ["requestId" ])
565
+
566
+ def on_failed (params ):
567
+ """Handle Network.loadingFailed"""
568
+ finish_req (params ["requestId" ])
569
+
570
+ def on_cached (params ):
571
+ """Handle Network.requestServedFromCache"""
572
+ finish_req (params ["requestId" ])
573
+
574
+ def on_data_url (params ):
575
+ """Handle Network.responseReceived for data: URLs"""
576
+ if params .get ("response" , {}).get ("url" , "" ).startswith ("data:" ):
577
+ finish_req (params ["requestId" ])
578
+
579
+ def on_frame_stop (params ):
580
+ """Handle Page.frameStoppedLoading"""
581
+ frame_id = params ["frameId" ]
582
+ if frame_id in doc_by_frame :
583
+ finish_req (doc_by_frame [frame_id ])
584
+
585
+ # Register event handlers
586
+ client .on ("Network.requestWillBeSent" , on_request )
587
+ client .on ("Network.loadingFinished" , on_finish )
588
+ client .on ("Network.loadingFailed" , on_failed )
589
+ client .on ("Network.requestServedFromCache" , on_cached )
590
+ client .on ("Network.responseReceived" , on_data_url )
591
+ client .on ("Page.frameStoppedLoading" , on_frame_stop )
592
+
593
+ async def sweep_stalled_requests ():
594
+ """Remove stalled document requests after 2 seconds"""
595
+ while not done_event .is_set ():
596
+ await asyncio .sleep (0.5 )
597
+ now = time .time ()
598
+ for request_id , request_meta in list (meta .items ()):
599
+ if now - request_meta ["start" ] > 2.0 :
600
+ inflight .discard (request_id )
601
+ meta .pop (request_id , None )
602
+ self ._stagehand .logger .debug (
603
+ "⏳ forcing completion of stalled iframe document" ,
604
+ extra = {"url" : request_meta ["url" ][:120 ]},
605
+ )
606
+ maybe_quiet ()
607
+
608
+ # Start stalled request sweeper
609
+ stalled_request_sweep_task = asyncio .create_task (sweep_stalled_requests ())
610
+
611
+ # Set up timeout guard
612
+ async def timeout_guard ():
613
+ await asyncio .sleep (timeout / 1000 )
614
+ if not done_event .is_set ():
615
+ if len (inflight ) > 0 :
503
616
self ._stagehand .logger .debug (
504
- "DOM settle timeout exceeded, continuing anyway " ,
505
- extra = {"timeout_ms " : timeout },
617
+ "⚠️ DOM- settle timeout reached – network requests still pending " ,
618
+ extra = {"count " : len ( inflight ) },
506
619
)
620
+ resolve_done ()
507
621
508
- except Exception as e :
509
- self ._stagehand .logger .debug (f"Error waiting for DOM to settle: { e } " )
622
+ timeout_task = asyncio .create_task (timeout_guard ())
510
623
511
- except Exception as e :
512
- self ._stagehand .logger .error (f"Error in _wait_for_settled_dom: { e } " )
624
+ # Initial check
625
+ maybe_quiet ()
626
+
627
+ try :
628
+ # Wait for completion
629
+ await done_event .wait ()
630
+ finally :
631
+ # Cleanup
632
+ client .remove_listener ("Network.requestWillBeSent" , on_request )
633
+ client .remove_listener ("Network.loadingFinished" , on_finish )
634
+ client .remove_listener ("Network.loadingFailed" , on_failed )
635
+ client .remove_listener ("Network.requestServedFromCache" , on_cached )
636
+ client .remove_listener ("Network.responseReceived" , on_data_url )
637
+ client .remove_listener ("Page.frameStoppedLoading" , on_frame_stop )
638
+
639
+ if quiet_timer :
640
+ quiet_timer .cancel ()
641
+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
642
+ stalled_request_sweep_task .cancel ()
643
+ try :
644
+ await stalled_request_sweep_task
645
+ except asyncio .CancelledError :
646
+ pass
647
+ if timeout_task and not timeout_task .done ():
648
+ timeout_task .cancel ()
649
+ try :
650
+ await timeout_task
651
+ except asyncio .CancelledError :
652
+ pass
513
653
514
654
# Forward other Page methods to underlying Playwright page
515
655
def __getattr__ (self , name ):
0 commit comments