import { Router } from 'express'; import { getLinkPreviewConfig } from '../config/variables'; import { resolveAndValidateHost, safeFetch } from './ssrf-guard'; const router = Router(); const REQUEST_TIMEOUT_MS = 8000; const MAX_HTML_BYTES = 512 * 1024; const BYTES_PER_MB = 1024 * 1024; const MAX_FIELD_LENGTH = 512; interface CachedMetadata { title?: string; description?: string; imageUrl?: string; siteName?: string; failed?: boolean; cachedAt: number; } const metadataCache = new Map(); let cacheByteEstimate = 0; function estimateEntryBytes(key: string, entry: CachedMetadata): number { let bytes = key.length * 2; if (entry.title) bytes += entry.title.length * 2; if (entry.description) bytes += entry.description.length * 2; if (entry.imageUrl) bytes += entry.imageUrl.length * 2; if (entry.siteName) bytes += entry.siteName.length * 2; return bytes + 64; } function cacheSet(key: string, entry: CachedMetadata): void { const config = getLinkPreviewConfig(); const maxBytes = config.maxCacheSizeMb * BYTES_PER_MB; if (metadataCache.has(key)) { const existing = metadataCache.get(key) as CachedMetadata; cacheByteEstimate -= estimateEntryBytes(key, existing); } const entryBytes = estimateEntryBytes(key, entry); while (cacheByteEstimate + entryBytes > maxBytes && metadataCache.size > 0) { const oldest = metadataCache.keys().next().value as string; const oldestEntry = metadataCache.get(oldest) as CachedMetadata; cacheByteEstimate -= estimateEntryBytes(oldest, oldestEntry); metadataCache.delete(oldest); } metadataCache.set(key, entry); cacheByteEstimate += entryBytes; } function truncateField(value: string | undefined): string | undefined { if (!value) return value; if (value.length <= MAX_FIELD_LENGTH) return value; return value.slice(0, MAX_FIELD_LENGTH); } function sanitizeImageUrl(rawUrl: string | undefined, baseUrl: string): string | undefined { if (!rawUrl) return undefined; try { const resolved = new URL(rawUrl, baseUrl); if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:') return undefined; return resolved.href; } catch { return undefined; } } function getMetaContent(html: string, patterns: RegExp[]): string | undefined { for (const pattern of patterns) { const match = pattern.exec(html); if (match?.[1]) return decodeHtmlEntities(match[1].trim()); } return undefined; } function decodeHtmlEntities(text: string): string { return text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'") .replace(///g, '/'); } function parseMetadata(html: string, url: string): CachedMetadata { const title = getMetaContent(html, [ /]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+property=["']og:title["']/i, /]+name=["']twitter:title["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+name=["']twitter:title["']/i, /]*>([^<]+)<\/title>/i ]); const description = getMetaContent(html, [ /]+property=["']og:description["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+property=["']og:description["']/i, /]+name=["']twitter:description["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+name=["']twitter:description["']/i, /]+name=["']description["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+name=["']description["']/i ]); const rawImageUrl = getMetaContent(html, [ /]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i, /]+name=["']twitter:image["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+name=["']twitter:image["']/i ]); const siteNamePatterns = [ // eslint-disable-next-line @stylistic/js/array-element-newline /]+property=["']og:site_name["'][^>]+content=["']([^"']+)["']/i, /]+content=["']([^"']+)["'][^>]+property=["']og:site_name["']/i ]; const siteName = getMetaContent(html, siteNamePatterns); const imageUrl = sanitizeImageUrl(rawImageUrl, url); return { title: truncateField(title), description: truncateField(description), imageUrl, siteName: truncateField(siteName), cachedAt: Date.now() }; } function evictExpired(): void { const config = getLinkPreviewConfig(); if (config.cacheTtlMinutes === 0) { cacheByteEstimate = 0; metadataCache.clear(); return; } const ttlMs = config.cacheTtlMinutes * 60 * 1000; const now = Date.now(); for (const [key, entry] of metadataCache) { if (now - entry.cachedAt > ttlMs) { cacheByteEstimate -= estimateEntryBytes(key, entry); metadataCache.delete(key); } } } router.get('/link-metadata', async (req, res) => { try { const config = getLinkPreviewConfig(); if (!config.enabled) { return res.status(403).json({ error: 'Link previews are disabled' }); } const url = String(req.query.url || ''); if (!/^https?:\/\//i.test(url)) { return res.status(400).json({ error: 'Invalid URL' }); } const hostAllowed = await resolveAndValidateHost(url); if (!hostAllowed) { return res.status(400).json({ error: 'URL resolves to a blocked address' }); } evictExpired(); const cached = metadataCache.get(url); if (cached) { const { cachedAt: _cachedAt, ...metadata } = cached; return res.json(metadata); } console.log(`[Link Metadata] Cache miss for ${url}. Fetching...`); const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); const response = await safeFetch(url, { signal: controller.signal, headers: { 'Accept': 'text/html', 'User-Agent': 'MetoYou-LinkPreview/1.0' } }); clearTimeout(timeout); if (!response || !response.ok) { const failed: CachedMetadata = { failed: true, cachedAt: Date.now() }; cacheSet(url, failed); return res.json({ failed: true }); } const contentType = response.headers.get('content-type') || ''; if (!contentType.includes('text/html')) { const failed: CachedMetadata = { failed: true, cachedAt: Date.now() }; cacheSet(url, failed); return res.json({ failed: true }); } const reader = response.body?.getReader(); if (!reader) { const failed: CachedMetadata = { failed: true, cachedAt: Date.now() }; cacheSet(url, failed); return res.json({ failed: true }); } const chunks: Uint8Array[] = []; let totalBytes = 0; let done = false; while (!done) { const result = await reader.read(); done = result.done; if (result.value) { chunks.push(result.value); totalBytes += result.value.length; if (totalBytes > MAX_HTML_BYTES) { reader.cancel(); break; } } } const html = Buffer.concat(chunks).toString('utf-8'); const metadata = parseMetadata(html, url); cacheSet(url, metadata); const { cachedAt, ...result } = metadata; res.json(result); } catch (err) { const url = String(req.query.url || ''); if (url) { cacheSet(url, { failed: true, cachedAt: Date.now() }); } if ((err as { name?: string })?.name === 'AbortError') { return res.json({ failed: true }); } console.error('Link metadata error:', err); res.json({ failed: true }); } }); export default router;