@@ -47,7 +47,7 @@ class PuppeteerPlugin {
47
47
await blockNavigation ( page , url ) ;
48
48
}
49
49
50
- await page . goto ( url , this . gotoOptions ) ;
50
+ const puppeteerResponse = await page . goto ( url , this . gotoOptions ) ;
51
51
52
52
if ( this . scrollToBottom ) {
53
53
await scrollToBottom ( page , this . scrollToBottom . timeout , this . scrollToBottom . viewportN ) ;
@@ -56,10 +56,12 @@ class PuppeteerPlugin {
56
56
const content = await page . content ( ) ;
57
57
await page . close ( ) ;
58
58
59
- // convert utf-8 -> binary string because website-scraper needs binary
60
- return Buffer . from ( content ) . toString ( 'binary' ) ;
59
+ const encoding = extractEncodingFromHeader ( puppeteerResponse . headers ( ) )
60
+ const body = Buffer . from ( content ) . toString ( encoding ) ;
61
+
62
+ return { body, encoding }
61
63
} else {
62
- return response . body ;
64
+ return { body : response . body }
63
65
}
64
66
} ) ;
65
67
@@ -91,4 +93,10 @@ async function blockNavigation (page, url) {
91
93
await page . setRequestInterception ( true ) ;
92
94
}
93
95
96
+ function extractEncodingFromHeader ( headers ) {
97
+ const contentTypeHeader = headers [ 'content-type' ] ;
98
+
99
+ return contentTypeHeader && contentTypeHeader . includes ( 'utf-8' ) ? 'utf8' : 'binary' ;
100
+ }
101
+
94
102
export default PuppeteerPlugin ;
0 commit comments