1- const got = require ( 'got' )
21const ms = require ( 'ms' )
32const metascraper = require ( 'metascraper' ) ( [
43 require ( 'metascraper-author' ) ( ) ,
@@ -11,7 +10,8 @@ const metascraper = require('metascraper')([
1110 require ( 'metascraper-title' ) ( )
1211] )
1312const httpError = require ( 'http-errors' )
14- const log = require ( './logger' )
13+ const got = require ( './got' )
14+ const logger = require ( './logger' )
1515
1616// const nock = require('nock')
1717// nock.disableNetConnect()
@@ -25,25 +25,25 @@ const log = require('./logger')
2525const timeoutMs = ms ( process . env . LINK_TIMEOUT )
2626
2727module . exports = async url => {
28- log . info ( `Scraping %s for metadata...` , url )
28+ const requestLogger = logger . child ( { url } )
29+ requestLogger . info ( `Scraping %s for metadata...` , url )
2930
3031 try {
31- const promise = got ( url , {
32- timeout : { request : timeoutMs }
32+ const { body : html , url : finalUrl } = await got ( url , {
33+ // Got is fucking stupid and this is the only way we can actually get the fucking timeouts to work.
34+ timeout : { socket : timeoutMs , request : timeoutMs } ,
35+ context : { requestLogger }
3336 } )
34- // TODO: just rely on got's built-in timeout once got v12 comes out
35- setTimeout ( ( ) => {
36- // At the moment, got's timeout doesn't work for shit
37- promise . cancel ( )
38- } , timeoutMs )
39-
40- const { body : html , url : finalUrl } = await promise
4137 return metascraper ( { html, url : finalUrl } )
4238 } catch ( err ) {
4339 if ( err . name === 'RequestError' && err . code === 'ENOTFOUND' )
4440 throw httpError ( 404 , 'The address to shorten does not exist!' )
45- if ( err . name === 'CancelError ' )
41+ if ( err . name === 'TimeoutError ' )
4642 throw httpError ( 504 , 'Could not scrape link in time!' )
43+ // If we were able to reach an actual thing at the other end,
44+ // but the request got canceled because it's not an HTML,
45+ // we don't care about it as we cannot get any useful metadata from the response.
46+ if ( err . name === 'CancelError' ) return null
4747 else throw err
4848 }
4949}
0 commit comments