@@ -110,7 +110,11 @@ class _BasicCrawlerOptions(TypedDict):
110
110
"""HTTP client used by `BasicCrawlingContext.send_request` method."""
111
111
112
112
max_request_retries : NotRequired [int ]
113
- """Maximum number of attempts to process a single request."""
113
+ """Specifies the maximum number of retries allowed for a request if its processing fails.
114
+ This includes retries due to navigation errors or errors thrown from user-supplied functions
115
+ (`request_handler`, `pre_navigation_hooks` etc.).
116
+
117
+ This limit does not apply to retries triggered by session rotation (see `max_session_rotations`)."""
114
118
115
119
max_requests_per_crawl : NotRequired [int | None ]
116
120
"""Maximum number of pages to open during a crawl. The crawl stops upon reaching this limit.
@@ -119,7 +123,10 @@ class _BasicCrawlerOptions(TypedDict):
119
123
120
124
max_session_rotations : NotRequired [int ]
121
125
"""Maximum number of session rotations per request. The crawler rotates the session if a proxy error occurs
122
- or if the website blocks the request."""
126
+ or if the website blocks the request.
127
+
128
+ The session rotations are not counted towards the `max_request_retries` limit.
129
+ """
123
130
124
131
max_crawl_depth : NotRequired [int | None ]
125
132
"""Specifies the maximum crawl depth. If set, the crawler will stop processing links beyond this depth.
@@ -269,14 +276,20 @@ def __init__(
269
276
proxy_configuration: HTTP proxy configuration used when making requests.
270
277
http_client: HTTP client used by `BasicCrawlingContext.send_request` method.
271
278
request_handler: A callable responsible for handling requests.
272
- max_request_retries: Maximum number of attempts to process a single request.
279
+ max_request_retries: Specifies the maximum number of retries allowed for a request if its processing fails.
280
+ This includes retries due to navigation errors or errors thrown from user-supplied functions
281
+ (`request_handler`, `pre_navigation_hooks` etc.).
282
+
283
+ This limit does not apply to retries triggered by session rotation (see `max_session_rotations`).
273
284
max_requests_per_crawl: Maximum number of pages to open during a crawl. The crawl stops upon reaching
274
285
this limit. Setting this value can help avoid infinite loops in misconfigured crawlers. `None` means
275
286
no limit. Due to concurrency settings, the actual number of pages visited may slightly exceed
276
287
this value. If used together with `keep_alive`, then the crawler will be kept alive only until
277
288
`max_requests_per_crawl` is achieved.
278
289
max_session_rotations: Maximum number of session rotations per request. The crawler rotates the session
279
290
if a proxy error occurs or if the website blocks the request.
291
+
292
+ The session rotations are not counted towards the `max_request_retries` limit.
280
293
max_crawl_depth: Specifies the maximum crawl depth. If set, the crawler will stop processing links beyond
281
294
this depth. The crawl depth starts at 0 for initial requests and increases with each subsequent level
282
295
of links. Requests at the maximum depth will still be processed, but no new links will be enqueued
0 commit comments