-
-
Notifications
You must be signed in to change notification settings - Fork 134
enhance: improve media type recognition with HEAD or magic bytes #2599
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
60001dd
3d4e0ef
41f380f
c05e813
f6267a4
fd80b8a
012af00
d23456b
d6ea440
8c3e6d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import { filetypemime } from 'magic-bytes.js' | ||
|
||
const TIMEOUT = 2000 | ||
const BYTE_LIMIT = 8192 | ||
|
||
export function isImageMime (mime) { return typeof mime === 'string' && mime.startsWith('image/') } | ||
|
||
export function isVideoMime (mime) { return typeof mime === 'string' && mime.startsWith('video/') } | ||
|
||
// adapted from lib/time.js | ||
function timeoutSignal (timeout) { | ||
const controller = new AbortController() | ||
|
||
if (timeout) { | ||
setTimeout(() => { | ||
controller.abort(new Error(`timeout after ${timeout / 1000}s`)) | ||
}, timeout) | ||
} | ||
|
||
return controller.signal | ||
} | ||
|
||
function hasBasicAuth (res) { | ||
const wwwAuth = res.headers.get('www-authenticate') || '' | ||
if (res.status === 401 && /basic/i.test(wwwAuth)) return true | ||
} | ||
|
||
async function headMime (url, timeout = TIMEOUT) { | ||
const res = await fetch(url, { method: 'HEAD', signal: timeoutSignal(timeout) }) | ||
// bail on basic authentication requirement | ||
if (hasBasicAuth(res)) return null | ||
|
||
return res.headers.get('content-type') | ||
} | ||
|
||
async function readMagicBytes (url, { timeout = TIMEOUT, byteLimit = BYTE_LIMIT } = {}) { | ||
const res = await fetch(url, { | ||
method: 'GET', | ||
// accept image and video, but not other types | ||
headers: { Range: `bytes=0-${byteLimit - 1}`, Accept: 'image/*,video/*;q=0.9,*/*;q=0.8' }, | ||
signal: timeoutSignal(timeout) | ||
}) | ||
// bail on basic authentication requirement | ||
if (hasBasicAuth(res)) return { bytes: null, headers: res.headers } | ||
|
||
// stream a small chunk if possible, otherwise read buffer | ||
if (res.body?.getReader) { | ||
const reader = res.body.getReader() | ||
let received = 0 | ||
const chunks = [] | ||
try { | ||
while (received < byteLimit) { | ||
const { done, value } = await reader.read() | ||
if (done) break | ||
chunks.push(value) | ||
received += value.byteLength | ||
} | ||
} finally { | ||
try { reader.releaseLock?.() } catch {} | ||
try { res.body?.cancel?.() } catch {} | ||
} | ||
const buf = new Uint8Array(received) | ||
let offset = 0 | ||
for (const c of chunks) { | ||
buf.set(c, offset) | ||
offset += c.byteLength | ||
} | ||
return { bytes: buf, headers: res.headers } | ||
} else { | ||
const ab = await res.arrayBuffer() | ||
const buf = new Uint8Array(ab.slice(0, byteLimit)) | ||
return { bytes: buf, headers: res.headers } | ||
} | ||
} | ||
|
||
export default async function mediaCheck (req, res) { | ||
const url = req.params.url | ||
if (typeof url !== 'string' || !/^(https?:\/\/)/.test(url)) { | ||
return res.status(400).json({ error: 'Invalid URL' }) | ||
} | ||
|
||
try { | ||
// trying with HEAD first, as it's the cheapest option | ||
// TODO: should we trust it? | ||
try { | ||
const ct = await headMime(url) | ||
if (isImageMime(ct) || isVideoMime(ct)) { | ||
return res.status(200).json({ mime: ct, isImage: isImageMime(ct), isVideo: isVideoMime(ct) }) | ||
} | ||
} catch {} | ||
|
||
// otherwise, read the first bytes | ||
const { bytes, headers } = await readMagicBytes(url, { timeout: TIMEOUT, byteLimit: BYTE_LIMIT }) | ||
const mimes = bytes ? filetypemime(bytes) : null | ||
const mime = mimes?.[0] ?? headers.get('content-type') ?? null | ||
return res.status(200).json({ mime, isImage: isImageMime(mime), isVideo: isVideoMime(mime) }) | ||
} catch (err) { | ||
console.log('media check error:', err) | ||
return res.status(500).json({ mime: null, isImage: false, isVideo: false }) | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,6 @@ import { extractUrls } from '@/lib/md' | |
import { isJob } from '@/lib/item' | ||
import path from 'node:path' | ||
import { decodeProxyUrl } from '@/lib/url' | ||
import { fetchWithTimeout } from '@/lib/fetch' | ||
|
||
const imgProxyEnabled = process.env.NODE_ENV === 'production' || | ||
(process.env.NEXT_PUBLIC_IMGPROXY_URL && process.env.IMGPROXY_SALT && process.env.IMGPROXY_KEY) | ||
|
@@ -15,6 +14,7 @@ if (!imgProxyEnabled) { | |
const IMGPROXY_URL = process.env.IMGPROXY_URL_DOCKER || process.env.NEXT_PUBLIC_IMGPROXY_URL | ||
const IMGPROXY_SALT = process.env.IMGPROXY_SALT | ||
const IMGPROXY_KEY = process.env.IMGPROXY_KEY | ||
const MEDIA_CHECK_URL = process.env.MEDIA_CHECK_URL_DOCKER || process.env.NEXT_PUBLIC_MEDIA_CHECK_URL | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Media Type URL Fetch Fails Without Env VarsThe new media type checking mechanism relies on environment variables for its URL. If these are unset, Additional Locations (2)There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well... yeah, everything would show as links. |
||
|
||
const cache = new Map() | ||
|
||
|
@@ -144,30 +144,13 @@ const isMediaURL = async (url, { forceFetch }) => { | |
return false | ||
} | ||
|
||
let isMedia | ||
|
||
// first run HEAD with small timeout | ||
let isMedia = false | ||
try { | ||
// https://stackoverflow.com/a/68118683 | ||
const res = await fetchWithTimeout(url, { timeout: 1000, method: 'HEAD' }) | ||
const buf = await res.blob() | ||
isMedia = buf.type.startsWith('image/') || buf.type.startsWith('video/') | ||
} catch (err) { | ||
console.log(url, err) | ||
} | ||
const res = await fetch(`${MEDIA_CHECK_URL}/${encodeURIComponent(url)}`) | ||
if (!res.ok) return false | ||
|
||
// For HEAD requests, positives are most likely true positives. | ||
// However, negatives may be false negatives | ||
if (isMedia) { | ||
cache.set(url, true) | ||
return true | ||
} | ||
|
||
// if not known yet, run GET request with longer timeout | ||
try { | ||
const res = await fetchWithTimeout(url, { timeout: 10000 }) | ||
const buf = await res.blob() | ||
isMedia = buf.type.startsWith('image/') || buf.type.startsWith('video/') | ||
const data = await res.json() | ||
isMedia = data.isImage || data.isVideo | ||
} catch (err) { | ||
console.log(url, err) | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.