@@ -33,6 +33,7 @@ import {
33
33
type PageState ,
34
34
} from './types' ;
35
35
import { createLogger , getBuildDomTreeScript } from '../utils' ;
36
+ import { waitForPageAndFramesLoad } from './utils' ;
36
37
37
38
const logger = createLogger ( 'Page' ) ;
38
39
@@ -1038,235 +1039,11 @@ export default class Page {
1038
1039
return false ;
1039
1040
}
1040
1041
1041
- async waitForPageLoadState ( timeout ?: number ) {
1042
- const timeoutValue = timeout || 8000 ;
1043
- await this . _puppeteerPage ?. waitForNavigation ( { timeout : timeoutValue } ) ;
1044
- }
1045
-
1046
- private async _waitForStableNetwork ( ) {
1047
- if ( ! this . _puppeteerPage ) {
1048
- throw new Error ( 'Puppeteer page is not connected' ) ;
1049
- }
1050
-
1051
- const RELEVANT_RESOURCE_TYPES = new Set ( [
1052
- 'document' ,
1053
- 'stylesheet' ,
1054
- 'image' ,
1055
- 'font' ,
1056
- 'script' ,
1057
- 'iframe' ,
1058
- ] ) ;
1059
-
1060
- const RELEVANT_CONTENT_TYPES = new Set ( [
1061
- 'text/html' ,
1062
- 'text/css' ,
1063
- 'application/javascript' ,
1064
- 'image/' ,
1065
- 'font/' ,
1066
- 'application/json' ,
1067
- ] ) ;
1068
-
1069
- const IGNORED_URL_PATTERNS = new Set ( [
1070
- // Analytics and tracking
1071
- 'analytics' ,
1072
- 'tracking' ,
1073
- 'telemetry' ,
1074
- 'beacon' ,
1075
- 'metrics' ,
1076
- // Ad-related
1077
- 'doubleclick' ,
1078
- 'adsystem' ,
1079
- 'adserver' ,
1080
- 'advertising' ,
1081
- // Social media widgets
1082
- 'facebook.com/plugins' ,
1083
- 'platform.twitter' ,
1084
- 'linkedin.com/embed' ,
1085
- // Live chat and support
1086
- 'livechat' ,
1087
- 'zendesk' ,
1088
- 'intercom' ,
1089
- 'crisp.chat' ,
1090
- 'hotjar' ,
1091
- // Push notifications
1092
- 'push-notifications' ,
1093
- 'onesignal' ,
1094
- 'pushwoosh' ,
1095
- // Background sync/heartbeat
1096
- 'heartbeat' ,
1097
- 'ping' ,
1098
- 'alive' ,
1099
- // WebRTC and streaming
1100
- 'webrtc' ,
1101
- 'rtmp://' ,
1102
- 'wss://' ,
1103
- // Common CDNs
1104
- 'cloudfront.net' ,
1105
- 'fastly.net' ,
1106
- ] ) ;
1107
-
1108
- const pendingRequests = new Set ( ) ;
1109
- let lastActivity = Date . now ( ) ;
1110
-
1111
- const onRequest = ( request : HTTPRequest ) => {
1112
- // Filter by resource type
1113
- const resourceType = request . resourceType ( ) ;
1114
- if ( ! RELEVANT_RESOURCE_TYPES . has ( resourceType ) ) {
1115
- return ;
1116
- }
1117
-
1118
- // Filter out streaming, websocket, and other real-time requests
1119
- if (
1120
- [ 'websocket' , 'media' , 'eventsource' , 'manifest' , 'other' ] . includes (
1121
- resourceType ,
1122
- )
1123
- ) {
1124
- return ;
1125
- }
1126
-
1127
- // Filter out by URL patterns
1128
- const url = request . url ( ) . toLowerCase ( ) ;
1129
- if (
1130
- Array . from ( IGNORED_URL_PATTERNS ) . some ( ( pattern ) =>
1131
- url . includes ( pattern ) ,
1132
- )
1133
- ) {
1134
- return ;
1135
- }
1136
-
1137
- // Filter out data URLs and blob URLs
1138
- if ( url . startsWith ( 'data:' ) || url . startsWith ( 'blob:' ) ) {
1139
- return ;
1140
- }
1141
-
1142
- // Filter out requests with certain headers
1143
- const headers = request . headers ( ) ;
1144
- if (
1145
- // biome-ignore lint/complexity/useLiteralKeys: <explanation>
1146
- headers [ 'purpose' ] === 'prefetch' ||
1147
- headers [ 'sec-fetch-dest' ] === 'video' ||
1148
- headers [ 'sec-fetch-dest' ] === 'audio'
1149
- ) {
1150
- return ;
1151
- }
1152
-
1153
- pendingRequests . add ( request ) ;
1154
- lastActivity = Date . now ( ) ;
1155
- } ;
1156
-
1157
- const onResponse = ( response : HTTPResponse ) => {
1158
- const request = response . request ( ) ;
1159
- if ( ! pendingRequests . has ( request ) ) {
1160
- return ;
1161
- }
1162
-
1163
- // Filter by content type
1164
- const contentType =
1165
- response . headers ( ) [ 'content-type' ] ?. toLowerCase ( ) || '' ;
1166
-
1167
- // Skip streaming content
1168
- if (
1169
- [
1170
- 'streaming' ,
1171
- 'video' ,
1172
- 'audio' ,
1173
- 'webm' ,
1174
- 'mp4' ,
1175
- 'event-stream' ,
1176
- 'websocket' ,
1177
- 'protobuf' ,
1178
- ] . some ( ( t ) => contentType . includes ( t ) )
1179
- ) {
1180
- pendingRequests . delete ( request ) ;
1181
- return ;
1182
- }
1183
-
1184
- // Only process relevant content types
1185
- if (
1186
- ! Array . from ( RELEVANT_CONTENT_TYPES ) . some ( ( ct ) =>
1187
- contentType . includes ( ct ) ,
1188
- )
1189
- ) {
1190
- pendingRequests . delete ( request ) ;
1191
- return ;
1192
- }
1193
-
1194
- // Skip large responses
1195
- const contentLength = response . headers ( ) [ 'content-length' ] ;
1196
- if ( contentLength && Number . parseInt ( contentLength ) > 5 * 1024 * 1024 ) {
1197
- // 5MB
1198
- pendingRequests . delete ( request ) ;
1199
- return ;
1200
- }
1201
-
1202
- pendingRequests . delete ( request ) ;
1203
- lastActivity = Date . now ( ) ;
1204
- } ;
1205
-
1206
- // Add event listeners
1207
- this . _puppeteerPage . on ( 'request' , onRequest as any ) ;
1208
- this . _puppeteerPage . on ( 'response' , onResponse as any ) ;
1209
-
1210
- try {
1211
- const startTime = Date . now ( ) ;
1212
-
1213
- // eslint-disable-next-line no-constant-condition
1214
- while ( true ) {
1215
- await new Promise ( ( resolve ) => setTimeout ( resolve , 100 ) ) ;
1216
-
1217
- const now = Date . now ( ) ;
1218
- const timeSinceLastActivity = ( now - lastActivity ) / 1000 ; // Convert to seconds
1219
-
1220
- if (
1221
- pendingRequests . size === 0 &&
1222
- timeSinceLastActivity >= this . _config . waitForNetworkIdlePageLoadTime
1223
- ) {
1224
- break ;
1225
- }
1226
-
1227
- const elapsedTime = ( now - startTime ) / 1000 ; // Convert to seconds
1228
- if ( elapsedTime > this . _config . maximumWaitPageLoadTime ) {
1229
- console . debug (
1230
- `Network timeout after ${ this . _config . maximumWaitPageLoadTime } s with ${ pendingRequests . size } pending requests:` ,
1231
- Array . from ( pendingRequests ) . map ( ( r ) => ( r as HTTPRequest ) . url ( ) ) ,
1232
- ) ;
1233
- break ;
1234
- }
1235
- }
1236
- } finally {
1237
- // Clean up event listeners
1238
- this . _puppeteerPage . off ( 'request' , onRequest as any ) ;
1239
- this . _puppeteerPage . off ( 'response' , onResponse as any ) ;
1240
- }
1241
- console . debug (
1242
- `Network stabilized for ${ this . _config . waitForNetworkIdlePageLoadTime } seconds` ,
1243
- ) ;
1244
- }
1245
-
1246
1042
async waitForPageAndFramesLoad ( timeoutOverwrite ?: number ) : Promise < void > {
1247
- // Start timing
1248
- const startTime = Date . now ( ) ;
1249
-
1250
- // Wait for page load
1251
- try {
1252
- await this . _waitForStableNetwork ( ) ;
1253
- } catch ( error ) {
1254
- console . warn ( 'Page load failed, continuing...' ) ;
1255
- }
1256
-
1257
- // Calculate remaining time to meet minimum wait time
1258
- const elapsed = ( Date . now ( ) - startTime ) / 1000 ; // Convert to seconds
1259
- const minWaitTime =
1260
- timeoutOverwrite || this . _config . minimumWaitPageLoadTime ;
1261
- const remaining = Math . max ( minWaitTime - elapsed , 0 ) ;
1262
-
1263
- console . debug (
1264
- `--Page loaded in ${ elapsed . toFixed ( 2 ) } seconds, waiting for additional ${ remaining . toFixed ( 2 ) } seconds` ,
1043
+ await waitForPageAndFramesLoad (
1044
+ this . _puppeteerPage ,
1045
+ timeoutOverwrite ,
1046
+ this . _config ,
1265
1047
) ;
1266
-
1267
- // Sleep remaining time if needed
1268
- if ( remaining > 0 ) {
1269
- await new Promise ( ( resolve ) => setTimeout ( resolve , remaining * 1000 ) ) ; // Convert seconds to milliseconds
1270
- }
1271
1048
}
1272
1049
}
0 commit comments