Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.development
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ NEXT_PUBLIC_EXTRA_LONG_POLL_INTERVAL_MS=300000
IMGPROXY_URL_DOCKER=http://imgproxy:8080
MEDIA_URL_DOCKER=http://s3:4566/uploads

# media check with capture container
MEDIA_CHECK_ROUTE=media
MEDIA_CHECK_URL_DOCKER=http://capture:5678/media
NEXT_PUBLIC_MEDIA_CHECK_URL=http://localhost:5678/media

# postgres container stuff
POSTGRES_PASSWORD=password
POSTGRES_USER=sn
Expand Down
1 change: 1 addition & 0 deletions .env.production
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ NEXTAUTH_URL=https://stacker.news
NEXTAUTH_URL_INTERNAL=http://127.0.0.1:8080/api/auth
NEXT_PUBLIC_AWS_UPLOAD_BUCKET=snuploads
NEXT_PUBLIC_IMGPROXY_URL=https://imgprxy.stacker.news/
NEXT_PUBLIC_MEDIA_CHECK_URL=https://capture.stacker.news/media
NEXT_PUBLIC_MEDIA_DOMAIN=m.stacker.news
PUBLIC_URL=https://stacker.news
SELF_URL=http://127.0.0.1:8080
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ COMPOSE_PROFILES=minimal
To run with images and payments services:

```.env
COMPOSE_PROFILES=images,payments
COMPOSE_PROFILES=images,capture,payments
```

#### Merging compose files
Expand Down
4 changes: 4 additions & 0 deletions capture/index.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import express from 'express'
import puppeteer from 'puppeteer'
import mediaCheck from './media-check.js'

const captureUrl = process.env.CAPTURE_URL || 'http://host.docker.internal:3000/'
const mediaCheckRoute = process.env.MEDIA_CHECK_ROUTE || 'media'
const port = process.env.PORT || 5678
const maxPages = Number(process.env.MAX_PAGES) || 5
const timeout = Number(process.env.TIMEOUT) || 10000
Expand Down Expand Up @@ -55,6 +57,8 @@ app.get('/health', (req, res) => {
res.status(200).end()
})

app.get(`/${mediaCheckRoute}/:url`, mediaCheck)

app.get('/*', async (req, res) => {
const url = new URL(req.originalUrl, captureUrl)
const timeLabel = `${Date.now()}-${url.href}`
Expand Down
101 changes: 101 additions & 0 deletions capture/media-check.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { filetypemime } from 'magic-bytes.js'

const TIMEOUT = 2000
const BYTE_LIMIT = 8192

export function isImageMime (mime) { return typeof mime === 'string' && mime.startsWith('image/') }

export function isVideoMime (mime) { return typeof mime === 'string' && mime.startsWith('video/') }

// adapted from lib/time.js
function timeoutSignal (timeout) {
const controller = new AbortController()

if (timeout) {
setTimeout(() => {
controller.abort(new Error(`timeout after ${timeout / 1000}s`))
}, timeout)
}

return controller.signal
}

function hasBasicAuth (res) {
const wwwAuth = res.headers.get('www-authenticate') || ''
if (res.status === 401 && /basic/i.test(wwwAuth)) return true
}

async function headMime (url, timeout = TIMEOUT) {
const res = await fetch(url, { method: 'HEAD', signal: timeoutSignal(timeout) })
// bail on basic authentication requirement
if (hasBasicAuth(res)) return null

return res.headers.get('content-type')
}

async function readMagicBytes (url, { timeout = TIMEOUT, byteLimit = BYTE_LIMIT } = {}) {
const res = await fetch(url, {
method: 'GET',
// accept image and video, but not other types
headers: { Range: `bytes=0-${byteLimit - 1}`, Accept: 'image/*,video/*;q=0.9,*/*;q=0.8' },
signal: timeoutSignal(timeout)
})
// bail on basic authentication requirement
if (hasBasicAuth(res)) return { bytes: null, headers: res.headers }

// stream a small chunk if possible, otherwise read buffer
if (res.body?.getReader) {
const reader = res.body.getReader()
let received = 0
const chunks = []
try {
while (received < byteLimit) {
const { done, value } = await reader.read()
if (done) break
chunks.push(value)
received += value.byteLength
}
} finally {
try { reader.releaseLock?.() } catch {}
try { res.body?.cancel?.() } catch {}
}
const buf = new Uint8Array(received)
let offset = 0
for (const c of chunks) {
buf.set(c, offset)
offset += c.byteLength
}
return { bytes: buf, headers: res.headers }
} else {
const ab = await res.arrayBuffer()
const buf = new Uint8Array(ab.slice(0, byteLimit))
return { bytes: buf, headers: res.headers }
}
}

export default async function mediaCheck (req, res) {
const url = req.params.url
if (typeof url !== 'string' || !/^(https?:\/\/)/.test(url)) {
return res.status(400).json({ error: 'Invalid URL' })
}

try {
// trying with HEAD first, as it's the cheapest option
// TODO: should we trust it?
try {
const ct = await headMime(url)
if (isImageMime(ct) || isVideoMime(ct)) {
return res.status(200).json({ mime: ct, isImage: isImageMime(ct), isVideo: isVideoMime(ct) })
}
} catch {}

// otherwise, read the first bytes
const { bytes, headers } = await readMagicBytes(url, { timeout: TIMEOUT, byteLimit: BYTE_LIMIT })
const mimes = bytes ? filetypemime(bytes) : null
const mime = mimes?.[0] ?? headers.get('content-type') ?? null
return res.status(200).json({ mime, isImage: isImageMime(mime), isVideo: isVideoMime(mime) })
} catch (err) {
console.log('media check error:', err)
return res.status(500).json({ mime: null, isImage: false, isVideo: false })
}
}
12 changes: 12 additions & 0 deletions capture/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions capture/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"license": "ISC",
"dependencies": {
"express": "^4.20.0",
"magic-bytes.js": "^1.12.1",
"puppeteer": "^20.8.2"
},
"type": "module"
Expand Down
48 changes: 27 additions & 21 deletions components/media-or-link.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import styles from './text.module.css'
import { useState, useEffect, useMemo, useCallback, memo, useRef } from 'react'
import { decodeProxyUrl, IMGPROXY_URL_REGEXP, MEDIA_DOMAIN_REGEXP } from '@/lib/url'
import { useMe } from './me'
import { UNKNOWN_LINK_REL } from '@/lib/constants'
import { UNKNOWN_LINK_REL, PUBLIC_MEDIA_CHECK_URL } from '@/lib/constants'
import classNames from 'classnames'
import { useCarousel } from './carousel'

Expand Down Expand Up @@ -130,29 +130,35 @@ export const useMediaHelper = ({ src, srcSet: srcSetIntital, topLevel, tab }) =>
// don't load the video at all if user doesn't want these
if (!showMedia || isVideo || isImage) return

// check if it's a video by trying to load it
const video = document.createElement('video')
video.onloadedmetadata = () => {
setIsVideo(true)
setIsImage(false)
}
video.onerror = () => {
// hack
// if it's not a video it will throw an error, so we can assume it's an image
const img = new window.Image()
img.src = src
img.decode().then(() => { // decoding beforehand to prevent wrong image cropping
setIsImage(true)
}).catch((e) => {
console.warn('Cannot decode image:', src, e)
})
let aborted = false
const controller = new AbortController()

const checkMedia = async () => {
try {
const res = await fetch(`${PUBLIC_MEDIA_CHECK_URL}/${encodeURIComponent(src)}`, { signal: controller.signal })
if (!res.ok) return

const data = await res.json()
// bail if the fetch was aborted
if (aborted) return

if (data.isVideo) {
setIsVideo(true)
setIsImage(false)
} else if (data.isImage) {
setIsImage(true)
}
} catch (error) {
if (aborted) return
console.error('cannot check media type', error)
}
}
video.src = src
checkMedia()

return () => {
video.onloadedmetadata = null
video.onerror = null
video.src = ''
// abort the fetch
aborted = true
try { controller.abort() } catch {}
}
}, [src, setIsImage, setIsVideo, showMedia, isImage])

Expand Down
1 change: 1 addition & 0 deletions lib/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const BOOST_MAX = 1_000_000
export const IMAGE_PIXELS_MAX = 35000000
// backwards compatibile with old media domain env var and precedence for docker url if set
export const MEDIA_URL = process.env.MEDIA_URL_DOCKER || process.env.NEXT_PUBLIC_MEDIA_URL || `https://${process.env.NEXT_PUBLIC_MEDIA_DOMAIN}`
export const PUBLIC_MEDIA_CHECK_URL = process.env.NEXT_PUBLIC_MEDIA_CHECK_URL
export const AWS_S3_URL_REGEXP = new RegExp(`${process.env.NEXT_PUBLIC_MEDIA_URL || `https://${process.env.NEXT_PUBLIC_MEDIA_DOMAIN}`}/([0-9]+)`, 'g')
export const UPLOAD_TYPES_ALLOW = [
'image/gif',
Expand Down
1 change: 1 addition & 0 deletions next.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ module.exports = withPlausibleProxy()({
'process.env.NODE_ENV': JSON.stringify(process.env.NODE_ENV),
'process.env.MEDIA_URL_DOCKER': JSON.stringify(process.env.MEDIA_URL_DOCKER),
'process.env.NEXT_PUBLIC_MEDIA_URL': JSON.stringify(process.env.NEXT_PUBLIC_MEDIA_URL),
'process.env.NEXT_PUBLIC_MEDIA_CHECK_URL': JSON.stringify(process.env.NEXT_PUBLIC_MEDIA_CHECK_URL),
'process.env.NEXT_PUBLIC_MEDIA_DOMAIN': JSON.stringify(process.env.NEXT_PUBLIC_MEDIA_DOMAIN),
'process.env.NEXT_PUBLIC_URL': JSON.stringify(process.env.NEXT_PUBLIC_URL),
'process.env.NEXT_PUBLIC_FAST_POLL_INTERVAL_MS': JSON.stringify(process.env.NEXT_PUBLIC_FAST_POLL_INTERVAL_MS),
Expand Down
29 changes: 6 additions & 23 deletions worker/imgproxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { extractUrls } from '@/lib/md'
import { isJob } from '@/lib/item'
import path from 'node:path'
import { decodeProxyUrl } from '@/lib/url'
import { fetchWithTimeout } from '@/lib/fetch'

const imgProxyEnabled = process.env.NODE_ENV === 'production' ||
(process.env.NEXT_PUBLIC_IMGPROXY_URL && process.env.IMGPROXY_SALT && process.env.IMGPROXY_KEY)
Expand All @@ -15,6 +14,7 @@ if (!imgProxyEnabled) {
const IMGPROXY_URL = process.env.IMGPROXY_URL_DOCKER || process.env.NEXT_PUBLIC_IMGPROXY_URL
const IMGPROXY_SALT = process.env.IMGPROXY_SALT
const IMGPROXY_KEY = process.env.IMGPROXY_KEY
const MEDIA_CHECK_URL = process.env.MEDIA_CHECK_URL_DOCKER || process.env.NEXT_PUBLIC_MEDIA_CHECK_URL
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Media Type URL Fetch Fails Without Env Vars

The new media type checking mechanism relies on environment variables for its URL. If these are unset, fetch calls are made to invalid undefined/... URLs, causing TypeError or fetch failures. This prevents media from being correctly identified, regressing from the previous self-contained implementation.

Additional Locations (2)

Fix in Cursor Fix in Web

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well... yeah, everything would show as links.
I was wondering if a fallback to the traditional system might be acceptable considering the dislocation of the endpoint to another service.


const cache = new Map()

Expand Down Expand Up @@ -144,30 +144,13 @@ const isMediaURL = async (url, { forceFetch }) => {
return false
}

let isMedia

// first run HEAD with small timeout
let isMedia = false
try {
// https://stackoverflow.com/a/68118683
const res = await fetchWithTimeout(url, { timeout: 1000, method: 'HEAD' })
const buf = await res.blob()
isMedia = buf.type.startsWith('image/') || buf.type.startsWith('video/')
} catch (err) {
console.log(url, err)
}
const res = await fetch(`${MEDIA_CHECK_URL}/${encodeURIComponent(url)}`)
if (!res.ok) return false

// For HEAD requests, positives are most likely true positives.
// However, negatives may be false negatives
if (isMedia) {
cache.set(url, true)
return true
}

// if not known yet, run GET request with longer timeout
try {
const res = await fetchWithTimeout(url, { timeout: 10000 })
const buf = await res.blob()
isMedia = buf.type.startsWith('image/') || buf.type.startsWith('video/')
const data = await res.json()
isMedia = data.isImage || data.isVideo
} catch (err) {
console.log(url, err)
}
Expand Down