292 lines
7.7 KiB
TypeScript
292 lines
7.7 KiB
TypeScript
import { Router } from 'express';
|
|
import { getLinkPreviewConfig } from '../config/variables';
|
|
import { resolveAndValidateHost, safeFetch } from './ssrf-guard';
|
|
|
|
const router = Router();
|
|
const REQUEST_TIMEOUT_MS = 8000;
|
|
const MAX_HTML_BYTES = 512 * 1024;
|
|
const BYTES_PER_MB = 1024 * 1024;
|
|
const MAX_FIELD_LENGTH = 512;
|
|
|
|
interface CachedMetadata {
|
|
title?: string;
|
|
description?: string;
|
|
imageUrl?: string;
|
|
siteName?: string;
|
|
failed?: boolean;
|
|
cachedAt: number;
|
|
}
|
|
|
|
const metadataCache = new Map<string, CachedMetadata>();
|
|
|
|
let cacheByteEstimate = 0;
|
|
|
|
function estimateEntryBytes(key: string, entry: CachedMetadata): number {
|
|
let bytes = key.length * 2;
|
|
|
|
if (entry.title)
|
|
bytes += entry.title.length * 2;
|
|
|
|
if (entry.description)
|
|
bytes += entry.description.length * 2;
|
|
|
|
if (entry.imageUrl)
|
|
bytes += entry.imageUrl.length * 2;
|
|
|
|
if (entry.siteName)
|
|
bytes += entry.siteName.length * 2;
|
|
|
|
return bytes + 64;
|
|
}
|
|
|
|
function cacheSet(key: string, entry: CachedMetadata): void {
|
|
const config = getLinkPreviewConfig();
|
|
const maxBytes = config.maxCacheSizeMb * BYTES_PER_MB;
|
|
|
|
if (metadataCache.has(key)) {
|
|
const existing = metadataCache.get(key) as CachedMetadata;
|
|
|
|
cacheByteEstimate -= estimateEntryBytes(key, existing);
|
|
}
|
|
|
|
const entryBytes = estimateEntryBytes(key, entry);
|
|
|
|
while (cacheByteEstimate + entryBytes > maxBytes && metadataCache.size > 0) {
|
|
const oldest = metadataCache.keys().next().value as string;
|
|
const oldestEntry = metadataCache.get(oldest) as CachedMetadata;
|
|
|
|
cacheByteEstimate -= estimateEntryBytes(oldest, oldestEntry);
|
|
metadataCache.delete(oldest);
|
|
}
|
|
|
|
metadataCache.set(key, entry);
|
|
cacheByteEstimate += entryBytes;
|
|
}
|
|
|
|
function truncateField(value: string | undefined): string | undefined {
|
|
if (!value)
|
|
return value;
|
|
|
|
if (value.length <= MAX_FIELD_LENGTH)
|
|
return value;
|
|
|
|
return value.slice(0, MAX_FIELD_LENGTH);
|
|
}
|
|
|
|
function sanitizeImageUrl(rawUrl: string | undefined, baseUrl: string): string | undefined {
|
|
if (!rawUrl)
|
|
return undefined;
|
|
|
|
try {
|
|
const resolved = new URL(rawUrl, baseUrl);
|
|
|
|
if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:')
|
|
return undefined;
|
|
|
|
return resolved.href;
|
|
} catch {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function getMetaContent(html: string, patterns: RegExp[]): string | undefined {
|
|
for (const pattern of patterns) {
|
|
const match = pattern.exec(html);
|
|
|
|
if (match?.[1])
|
|
return decodeHtmlEntities(match[1].trim());
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
function decodeHtmlEntities(text: string): string {
|
|
return text
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/'/g, "'")
|
|
.replace(///g, '/');
|
|
}
|
|
|
|
function parseMetadata(html: string, url: string): CachedMetadata {
|
|
const title = getMetaContent(html, [
|
|
/<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:title["']/i,
|
|
/<meta[^>]+name=["']twitter:title["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:title["']/i,
|
|
/<title[^>]*>([^<]+)<\/title>/i
|
|
]);
|
|
const description = getMetaContent(html, [
|
|
/<meta[^>]+property=["']og:description["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:description["']/i,
|
|
/<meta[^>]+name=["']twitter:description["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:description["']/i,
|
|
/<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']description["']/i
|
|
]);
|
|
const rawImageUrl = getMetaContent(html, [
|
|
/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i,
|
|
/<meta[^>]+name=["']twitter:image["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:image["']/i
|
|
]);
|
|
const siteNamePatterns = [
|
|
// eslint-disable-next-line @stylistic/js/array-element-newline
|
|
/<meta[^>]+property=["']og:site_name["'][^>]+content=["']([^"']+)["']/i,
|
|
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:site_name["']/i
|
|
];
|
|
const siteName = getMetaContent(html, siteNamePatterns);
|
|
const imageUrl = sanitizeImageUrl(rawImageUrl, url);
|
|
|
|
return {
|
|
title: truncateField(title),
|
|
description: truncateField(description),
|
|
imageUrl,
|
|
siteName: truncateField(siteName),
|
|
cachedAt: Date.now()
|
|
};
|
|
}
|
|
|
|
function evictExpired(): void {
|
|
const config = getLinkPreviewConfig();
|
|
|
|
if (config.cacheTtlMinutes === 0) {
|
|
cacheByteEstimate = 0;
|
|
metadataCache.clear();
|
|
|
|
return;
|
|
}
|
|
|
|
const ttlMs = config.cacheTtlMinutes * 60 * 1000;
|
|
const now = Date.now();
|
|
|
|
for (const [key, entry] of metadataCache) {
|
|
if (now - entry.cachedAt > ttlMs) {
|
|
cacheByteEstimate -= estimateEntryBytes(key, entry);
|
|
metadataCache.delete(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
router.get('/link-metadata', async (req, res) => {
|
|
try {
|
|
const config = getLinkPreviewConfig();
|
|
|
|
if (!config.enabled) {
|
|
return res.status(403).json({ error: 'Link previews are disabled' });
|
|
}
|
|
|
|
const url = String(req.query.url || '');
|
|
|
|
if (!/^https?:\/\//i.test(url)) {
|
|
return res.status(400).json({ error: 'Invalid URL' });
|
|
}
|
|
|
|
const hostAllowed = await resolveAndValidateHost(url);
|
|
|
|
if (!hostAllowed) {
|
|
return res.status(400).json({ error: 'URL resolves to a blocked address' });
|
|
}
|
|
|
|
evictExpired();
|
|
|
|
const cached = metadataCache.get(url);
|
|
|
|
if (cached) {
|
|
const { cachedAt: _cachedAt, ...metadata } = cached;
|
|
|
|
return res.json(metadata);
|
|
}
|
|
|
|
console.log(`[Link Metadata] Cache miss for ${url}. Fetching...`);
|
|
|
|
const controller = new AbortController();
|
|
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
|
|
const response = await safeFetch(url, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
'Accept': 'text/html',
|
|
'User-Agent': 'MetoYou-LinkPreview/1.0'
|
|
}
|
|
});
|
|
|
|
clearTimeout(timeout);
|
|
|
|
if (!response || !response.ok) {
|
|
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
|
|
|
cacheSet(url, failed);
|
|
|
|
return res.json({ failed: true });
|
|
}
|
|
|
|
const contentType = response.headers.get('content-type') || '';
|
|
|
|
if (!contentType.includes('text/html')) {
|
|
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
|
|
|
cacheSet(url, failed);
|
|
|
|
return res.json({ failed: true });
|
|
}
|
|
|
|
const reader = response.body?.getReader();
|
|
|
|
if (!reader) {
|
|
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
|
|
|
cacheSet(url, failed);
|
|
|
|
return res.json({ failed: true });
|
|
}
|
|
|
|
const chunks: Uint8Array[] = [];
|
|
|
|
let totalBytes = 0;
|
|
let done = false;
|
|
|
|
while (!done) {
|
|
const result = await reader.read();
|
|
|
|
done = result.done;
|
|
|
|
if (result.value) {
|
|
chunks.push(result.value);
|
|
totalBytes += result.value.length;
|
|
|
|
if (totalBytes > MAX_HTML_BYTES) {
|
|
reader.cancel();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
const html = Buffer.concat(chunks).toString('utf-8');
|
|
const metadata = parseMetadata(html, url);
|
|
|
|
cacheSet(url, metadata);
|
|
|
|
const { cachedAt, ...result } = metadata;
|
|
|
|
res.json(result);
|
|
} catch (err) {
|
|
const url = String(req.query.url || '');
|
|
|
|
if (url) {
|
|
cacheSet(url, { failed: true, cachedAt: Date.now() });
|
|
}
|
|
|
|
if ((err as { name?: string })?.name === 'AbortError') {
|
|
return res.json({ failed: true });
|
|
}
|
|
|
|
console.error('Link metadata error:', err);
|
|
res.json({ failed: true });
|
|
}
|
|
});
|
|
|
|
export default router;
|