feat: Add chat embeds v1
Youtube and Website metadata embeds
This commit is contained in:
292
server/src/routes/link-metadata.ts
Normal file
292
server/src/routes/link-metadata.ts
Normal file
@@ -0,0 +1,292 @@
|
||||
import { Router } from 'express';
|
||||
import { getLinkPreviewConfig } from '../config/variables';
|
||||
import { resolveAndValidateHost, safeFetch } from './ssrf-guard';
|
||||
|
||||
const router = Router();
|
||||
const REQUEST_TIMEOUT_MS = 8000;
|
||||
const MAX_HTML_BYTES = 512 * 1024;
|
||||
const BYTES_PER_MB = 1024 * 1024;
|
||||
const MAX_FIELD_LENGTH = 512;
|
||||
|
||||
interface CachedMetadata {
|
||||
title?: string;
|
||||
description?: string;
|
||||
imageUrl?: string;
|
||||
siteName?: string;
|
||||
failed?: boolean;
|
||||
cachedAt: number;
|
||||
}
|
||||
|
||||
const metadataCache = new Map<string, CachedMetadata>();
|
||||
|
||||
let cacheByteEstimate = 0;
|
||||
|
||||
function estimateEntryBytes(key: string, entry: CachedMetadata): number {
|
||||
let bytes = key.length * 2;
|
||||
|
||||
if (entry.title)
|
||||
bytes += entry.title.length * 2;
|
||||
|
||||
if (entry.description)
|
||||
bytes += entry.description.length * 2;
|
||||
|
||||
if (entry.imageUrl)
|
||||
bytes += entry.imageUrl.length * 2;
|
||||
|
||||
if (entry.siteName)
|
||||
bytes += entry.siteName.length * 2;
|
||||
|
||||
return bytes + 64;
|
||||
}
|
||||
|
||||
function cacheSet(key: string, entry: CachedMetadata): void {
|
||||
const config = getLinkPreviewConfig();
|
||||
const maxBytes = config.maxCacheSizeMb * BYTES_PER_MB;
|
||||
|
||||
if (metadataCache.has(key)) {
|
||||
const existing = metadataCache.get(key) as CachedMetadata;
|
||||
|
||||
cacheByteEstimate -= estimateEntryBytes(key, existing);
|
||||
}
|
||||
|
||||
const entryBytes = estimateEntryBytes(key, entry);
|
||||
|
||||
while (cacheByteEstimate + entryBytes > maxBytes && metadataCache.size > 0) {
|
||||
const oldest = metadataCache.keys().next().value as string;
|
||||
const oldestEntry = metadataCache.get(oldest) as CachedMetadata;
|
||||
|
||||
cacheByteEstimate -= estimateEntryBytes(oldest, oldestEntry);
|
||||
metadataCache.delete(oldest);
|
||||
}
|
||||
|
||||
metadataCache.set(key, entry);
|
||||
cacheByteEstimate += entryBytes;
|
||||
}
|
||||
|
||||
function truncateField(value: string | undefined): string | undefined {
|
||||
if (!value)
|
||||
return value;
|
||||
|
||||
if (value.length <= MAX_FIELD_LENGTH)
|
||||
return value;
|
||||
|
||||
return value.slice(0, MAX_FIELD_LENGTH);
|
||||
}
|
||||
|
||||
function sanitizeImageUrl(rawUrl: string | undefined, baseUrl: string): string | undefined {
|
||||
if (!rawUrl)
|
||||
return undefined;
|
||||
|
||||
try {
|
||||
const resolved = new URL(rawUrl, baseUrl);
|
||||
|
||||
if (resolved.protocol !== 'http:' && resolved.protocol !== 'https:')
|
||||
return undefined;
|
||||
|
||||
return resolved.href;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function getMetaContent(html: string, patterns: RegExp[]): string | undefined {
|
||||
for (const pattern of patterns) {
|
||||
const match = pattern.exec(html);
|
||||
|
||||
if (match?.[1])
|
||||
return decodeHtmlEntities(match[1].trim());
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(text: string): string {
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/'/g, "'")
|
||||
.replace(///g, '/');
|
||||
}
|
||||
|
||||
function parseMetadata(html: string, url: string): CachedMetadata {
|
||||
const title = getMetaContent(html, [
|
||||
/<meta[^>]+property=["']og:title["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:title["']/i,
|
||||
/<meta[^>]+name=["']twitter:title["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:title["']/i,
|
||||
/<title[^>]*>([^<]+)<\/title>/i
|
||||
]);
|
||||
const description = getMetaContent(html, [
|
||||
/<meta[^>]+property=["']og:description["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:description["']/i,
|
||||
/<meta[^>]+name=["']twitter:description["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:description["']/i,
|
||||
/<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']description["']/i
|
||||
]);
|
||||
const rawImageUrl = getMetaContent(html, [
|
||||
/<meta[^>]+property=["']og:image["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:image["']/i,
|
||||
/<meta[^>]+name=["']twitter:image["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+name=["']twitter:image["']/i
|
||||
]);
|
||||
const siteNamePatterns = [
|
||||
// eslint-disable-next-line @stylistic/js/array-element-newline
|
||||
/<meta[^>]+property=["']og:site_name["'][^>]+content=["']([^"']+)["']/i,
|
||||
/<meta[^>]+content=["']([^"']+)["'][^>]+property=["']og:site_name["']/i
|
||||
];
|
||||
const siteName = getMetaContent(html, siteNamePatterns);
|
||||
const imageUrl = sanitizeImageUrl(rawImageUrl, url);
|
||||
|
||||
return {
|
||||
title: truncateField(title),
|
||||
description: truncateField(description),
|
||||
imageUrl,
|
||||
siteName: truncateField(siteName),
|
||||
cachedAt: Date.now()
|
||||
};
|
||||
}
|
||||
|
||||
function evictExpired(): void {
|
||||
const config = getLinkPreviewConfig();
|
||||
|
||||
if (config.cacheTtlMinutes === 0) {
|
||||
cacheByteEstimate = 0;
|
||||
metadataCache.clear();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const ttlMs = config.cacheTtlMinutes * 60 * 1000;
|
||||
const now = Date.now();
|
||||
|
||||
for (const [key, entry] of metadataCache) {
|
||||
if (now - entry.cachedAt > ttlMs) {
|
||||
cacheByteEstimate -= estimateEntryBytes(key, entry);
|
||||
metadataCache.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
router.get('/link-metadata', async (req, res) => {
|
||||
try {
|
||||
const config = getLinkPreviewConfig();
|
||||
|
||||
if (!config.enabled) {
|
||||
return res.status(403).json({ error: 'Link previews are disabled' });
|
||||
}
|
||||
|
||||
const url = String(req.query.url || '');
|
||||
|
||||
if (!/^https?:\/\//i.test(url)) {
|
||||
return res.status(400).json({ error: 'Invalid URL' });
|
||||
}
|
||||
|
||||
const hostAllowed = await resolveAndValidateHost(url);
|
||||
|
||||
if (!hostAllowed) {
|
||||
return res.status(400).json({ error: 'URL resolves to a blocked address' });
|
||||
}
|
||||
|
||||
evictExpired();
|
||||
|
||||
const cached = metadataCache.get(url);
|
||||
|
||||
if (cached) {
|
||||
const { cachedAt, ...metadata } = cached;
|
||||
|
||||
console.log(`[Link Metadata] Cache hit for ${url} (cached at ${new Date(cachedAt).toISOString()})`);
|
||||
return res.json(metadata);
|
||||
}
|
||||
|
||||
console.log(`[Link Metadata] Cache miss for ${url}. Fetching...`);
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS);
|
||||
const response = await safeFetch(url, {
|
||||
signal: controller.signal,
|
||||
headers: {
|
||||
'Accept': 'text/html',
|
||||
'User-Agent': 'MetoYou-LinkPreview/1.0'
|
||||
}
|
||||
});
|
||||
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!response || !response.ok) {
|
||||
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
||||
|
||||
cacheSet(url, failed);
|
||||
|
||||
return res.json({ failed: true });
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type') || '';
|
||||
|
||||
if (!contentType.includes('text/html')) {
|
||||
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
||||
|
||||
cacheSet(url, failed);
|
||||
|
||||
return res.json({ failed: true });
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
|
||||
if (!reader) {
|
||||
const failed: CachedMetadata = { failed: true, cachedAt: Date.now() };
|
||||
|
||||
cacheSet(url, failed);
|
||||
|
||||
return res.json({ failed: true });
|
||||
}
|
||||
|
||||
const chunks: Uint8Array[] = [];
|
||||
|
||||
let totalBytes = 0;
|
||||
let done = false;
|
||||
|
||||
while (!done) {
|
||||
const result = await reader.read();
|
||||
|
||||
done = result.done;
|
||||
|
||||
if (result.value) {
|
||||
chunks.push(result.value);
|
||||
totalBytes += result.value.length;
|
||||
|
||||
if (totalBytes > MAX_HTML_BYTES) {
|
||||
reader.cancel();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const html = Buffer.concat(chunks).toString('utf-8');
|
||||
const metadata = parseMetadata(html, url);
|
||||
|
||||
cacheSet(url, metadata);
|
||||
|
||||
const { cachedAt, ...result } = metadata;
|
||||
|
||||
res.json(result);
|
||||
} catch (err) {
|
||||
const url = String(req.query.url || '');
|
||||
|
||||
if (url) {
|
||||
cacheSet(url, { failed: true, cachedAt: Date.now() });
|
||||
}
|
||||
|
||||
if ((err as { name?: string })?.name === 'AbortError') {
|
||||
return res.json({ failed: true });
|
||||
}
|
||||
|
||||
console.error('Link metadata error:', err);
|
||||
res.json({ failed: true });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
Reference in New Issue
Block a user