diff --git a/server.js b/server.js index cddb5b77..a3a109f8 100644 --- a/server.js +++ b/server.js @@ -25,6 +25,7 @@ const { const { clientIpFromRequest } = require("./security/ip"); const { createRedisClient } = require("./security/redis"); const authRoutes = require("./src/routes/authRoutes"); +const { validateURLForSSRF, validateRedirectForSSRF, createSSRFSafeAxiosConfig } = require("./src/utils/ssrfValidation"); const RAG_SERVICE_URL = process.env.RAG_SERVICE_URL || "http://localhost:5000"; const getInternalRagToken = () => (process.env.INTERNAL_RAG_TOKEN || "").trim(); @@ -1295,23 +1296,6 @@ app.post("/process-from-url", uploadLimiter, requireSupabaseAuth, async (req, re if (!url || typeof url !== "string") { return res.status(400).json({ error: "Missing or invalid 'url' field." }); } - - // SSRF Protection: Validate URL format, protocol, and hostname. - let parsedUrl; - try { - parsedUrl = new URL(url.trim()); - } catch (err) { - return res.status(400).json({ error: "Invalid URL format." }); - } - - if (parsedUrl.protocol !== "https:") { - return res.status(400).json({ error: "Only HTTPS URLs are allowed." }); - } - - const trustedSupabaseOrigin = getTrustedSupabaseOrigin(parsedUrl.hostname); - if (!trustedSupabaseOrigin) { - return res.status(403).json({ error: "URL host is not allowed." }); - } if (!filename || typeof filename !== "string") { return res.status(400).json({ error: "Missing or invalid 'filename' field." }); @@ -1324,19 +1308,52 @@ app.post("/process-from-url", uploadLimiter, requireSupabaseAuth, async (req, re .slice(0, 200); try { + // SSRF Protection: Validate URL format, protocol, hostname, and resolved IPs + let validatedURL; + try { + validatedURL = await validateURLForSSRF(url); + } catch (ssrfErr) { + console.error("SSRF validation failed:", ssrfErr.message); + return res.status(403).json({ error: ssrfErr.message }); + } + // Download the PDF from the remote URL into a Buffer let pdfBuffer; try { - const downloadUrl = new URL(trustedSupabaseOrigin); - downloadUrl.pathname = parsedUrl.pathname; - downloadUrl.search = parsedUrl.search; + const downloadUrl = new URL(validatedURL.url.toString()); const dlResponse = await axios.get(downloadUrl.toString(), { responseType: "arraybuffer", timeout: 30000, maxContentLength: 50 * 1024 * 1024, // 50 MB cap + maxRedirects: 0, // Disable automatic redirects - SSRF protection }); - pdfBuffer = Buffer.from(dlResponse.data); + + // Check for redirect status codes + if (dlResponse.status >= 300 && dlResponse.status < 400) { + const redirectUrl = dlResponse.headers.location; + if (redirectUrl) { + // Validate redirect target before following + try { + await validateRedirectForSSRF(redirectUrl); + // Follow the validated redirect + const redirectResponse = await axios.get(redirectUrl, { + responseType: "arraybuffer", + timeout: 30000, + maxContentLength: 50 * 1024 * 1024, + maxRedirects: 0, + }); + pdfBuffer = Buffer.from(redirectResponse.data); + } catch (redirectErr) { + console.error("Redirect validation failed:", redirectErr.message); + return res.status(403).json({ error: "Redirect target is not allowed" }); + } + } else { + return res.status(502).json({ error: "Server returned redirect without location header" }); + } + } else { + pdfBuffer = Buffer.from(dlResponse.data); + } } catch (dlErr) { console.error("Failed to download PDF from URL:", dlErr.message); return res.status(502).json({ error: "Could not download PDF from the provided URL." }); diff --git a/server.test.js b/server.test.js index 2454893f..92898ffa 100644 --- a/server.test.js +++ b/server.test.js @@ -495,8 +495,15 @@ describe("route error responses", () => { test("POST /process-from-url keeps protocol-relative paths on the trusted host", async () => { const originalGet = axios.get; const originalPost = axios.post; + const dns = require('dns').promises; + const originalResolve4 = dns.resolve4; + const originalResolve6 = dns.resolve6; let requestedDownloadUrl = null; + // Mock DNS resolution to return a public IP + dns.resolve4 = async () => ['1.2.3.4']; + dns.resolve6 = async () => []; + axios.get = async (url) => { requestedDownloadUrl = url; return { data: Buffer.from("%PDF-1.4\n%%EOF") }; @@ -535,14 +542,23 @@ describe("route error responses", () => { } finally { axios.get = originalGet; axios.post = originalPost; + dns.resolve4 = originalResolve4; + dns.resolve6 = originalResolve6; } }); test("POST /process-from-url accepts whitespace-trimmed Supabase URLs", async () => { const originalGet = axios.get; const originalPost = axios.post; + const dns = require('dns').promises; + const originalResolve4 = dns.resolve4; + const originalResolve6 = dns.resolve6; let requestedDownloadUrl = null; + // Mock DNS resolution to return a public IP + dns.resolve4 = async () => ['1.2.3.4']; + dns.resolve6 = async () => []; + axios.get = async (url) => { requestedDownloadUrl = url; return { data: Buffer.from("%PDF-1.4\n%%EOF") }; @@ -579,6 +595,8 @@ describe("route error responses", () => { } finally { axios.get = originalGet; axios.post = originalPost; + dns.resolve4 = originalResolve4; + dns.resolve6 = originalResolve6; } }); @@ -1331,3 +1349,146 @@ describe("requireSupabaseAuth", () => { assert.notEqual(res.status, 401, "Valid token should not be rejected"); }); }); + +describe("SSRF Validation", () => { + let validateURLForSSRF, validateRedirectForSSRF, isPrivateIP, SSRFValidationError; + + before(() => { + const ssrfModule = require("./src/utils/ssrfValidation"); + validateURLForSSRF = ssrfModule.validateURLForSSRF; + validateRedirectForSSRF = ssrfModule.validateRedirectForSSRF; + isPrivateIP = ssrfModule.isPrivateIP; + SSRFValidationError = ssrfModule.SSRFValidationError; + }); + + test("rejects HTTP protocol (non-HTTPS)", async () => { + await assert.rejects( + validateURLForSSRF("http://example.supabase.co/test.pdf"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "Only HTTPS URLs are allowed"); + return true; + } + ); + }); + + test("rejects invalid URL format", async () => { + await assert.rejects( + validateURLForSSRF("not-a-valid-url"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "Invalid URL format"); + return true; + } + ); + }); + + test("rejects disallowed hostname", async () => { + await assert.rejects( + validateURLForSSRF("https://evil.com/test.pdf"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "URL host is not allowed"); + return true; + } + ); + }); + + test("rejects hostname without subdomain", async () => { + await assert.rejects( + validateURLForSSRF("https://supabase.co/test.pdf"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "URL host is not allowed"); + return true; + } + ); + }); + + test("rejects hostname with trailing dot bypass attempt", async () => { + await assert.rejects( + validateURLForSSRF("https://evil.com.supabase.co./test.pdf"), + (err) => { + assert(err instanceof SSRFValidationError); + // The hostname normalization removes the trailing dot, so it becomes + // "evil.com.supabase.co" which is not in the allowlist + assert.match(err.message, /not allowed|DNS resolution/); + return true; + } + ); + }); + + test("rejects private IPv4 addresses - loopback", () => { + assert.equal(isPrivateIP("127.0.0.1"), true); + assert.equal(isPrivateIP("127.0.0.2"), true); + assert.equal(isPrivateIP("127.255.255.255"), true); + }); + + test("rejects private IPv4 addresses - Class A", () => { + assert.equal(isPrivateIP("10.0.0.1"), true); + assert.equal(isPrivateIP("10.255.255.255"), true); + }); + + test("rejects private IPv4 addresses - Class B", () => { + assert.equal(isPrivateIP("172.16.0.1"), true); + assert.equal(isPrivateIP("172.31.255.255"), true); + assert.equal(isPrivateIP("172.32.0.1"), false); // Outside range + }); + + test("rejects private IPv4 addresses - Class C", () => { + assert.equal(isPrivateIP("192.168.0.1"), true); + assert.equal(isPrivateIP("192.168.255.255"), true); + }); + + test("rejects private IPv4 addresses - link-local", () => { + assert.equal(isPrivateIP("169.254.0.1"), true); + assert.equal(isPrivateIP("169.254.255.255"), true); + }); + + test("accepts public IPv4 addresses", () => { + assert.equal(isPrivateIP("8.8.8.8"), false); + assert.equal(isPrivateIP("1.1.1.1"), false); + assert.equal(isPrivateIP("172.32.0.1"), false); + }); + + test("rejects private IPv6 addresses - loopback", () => { + assert.equal(isPrivateIP("::1"), true); + }); + + test("rejects private IPv6 addresses - unique local", () => { + assert.equal(isPrivateIP("fc00::1"), true); + assert.equal(isPrivateIP("fd00::1"), true); + }); + + test("rejects private IPv6 addresses - link-local", () => { + assert.equal(isPrivateIP("fe80::1"), true); + assert.equal(isPrivateIP("febf::ffff"), true); + }); + + test("accepts public IPv6 addresses", () => { + assert.equal(isPrivateIP("2001:4860:4860::8888"), false); + assert.equal(isPrivateIP("2606:4700:4700::1111"), false); + }); + + test("validateRedirectForSSRF uses same validation as validateURLForSSRF", async () => { + await assert.rejects( + validateRedirectForSSRF("https://evil.com/redirect"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "URL host is not allowed"); + return true; + } + ); + }); + + test("validateRedirectForSSRF rejects HTTP redirects", async () => { + await assert.rejects( + validateRedirectForSSRF("http://example.supabase.co/redirect"), + (err) => { + assert(err instanceof SSRFValidationError); + assert.equal(err.message, "Only HTTPS URLs are allowed"); + return true; + } + ); + }); +}); diff --git a/src/data/users.json b/src/data/users.json index adb2d281..04cabddd 100644 --- a/src/data/users.json +++ b/src/data/users.json @@ -62,5 +62,29 @@ { "email": "testuser2-1780859135237@example.com", "password": "$2b$10$URdT8Tm/Xg7Kwi/UnSiinOWzjsqFc8QCUgpY/TeURTj2T3R1wYW8S" + }, + { + "email": "testuser-1781540716124@example.com", + "password": "$2b$10$9iNV0yiIpNM.gJT.lsOa5OpuMxL7hKB.84wloniEj2O.bBpZ7MUjW" + }, + { + "email": "testuser2-1781540716507@example.com", + "password": "$2b$10$UcaYkbpnvmLOxUnYIMPBhOx9WM/m0eefhPXDFwcmE0V22fswv5XNa" + }, + { + "email": "testuser-1781541360847@example.com", + "password": "$2b$10$1UiWTuwl3W5HispqyqPxDuLYP.f.zZjzINI4wnjoZaUuR3UbY5pcS" + }, + { + "email": "testuser2-1781541360962@example.com", + "password": "$2b$10$zMTjccFLerjb5izOiRcIw.KIWr3EiNI98iUTtUDeCthvJc5k3127W" + }, + { + "email": "testuser-1781541583462@example.com", + "password": "$2b$10$NZzoL2q8S2sDFN8bM/KUhuUfHcmgekG/uwuoext4Sev80smUIxPoe" + }, + { + "email": "testuser2-1781541583647@example.com", + "password": "$2b$10$2DIeGgeCjgcJ37QnmfR5M.kkwFh7MC5XJcddEir68UtqJFDyQV61." } ] \ No newline at end of file diff --git a/src/utils/ssrfValidation.js b/src/utils/ssrfValidation.js new file mode 100644 index 00000000..3e65077b --- /dev/null +++ b/src/utils/ssrfValidation.js @@ -0,0 +1,324 @@ +const dns = require('dns').promises; +const { URL } = require('url'); +const { domainToASCII } = require('url'); + +// ─── Configuration ───────────────────────────────────────────────────────────── + +const ALLOWED_HOST_SUFFIXES = new Set(['supabase.co', 'supabase.in']); + +// Private IPv4 ranges to block +const PRIVATE_IPV4_RANGES = [ + { start: '127.0.0.0', prefix: 8 }, // Loopback + { start: '10.0.0.0', prefix: 8 }, // Private Class A + { start: '172.16.0.0', prefix: 12 }, // Private Class B + { start: '192.168.0.0', prefix: 16 }, // Private Class C + { start: '169.254.0.0', prefix: 16 }, // Link-local +]; + +// Private IPv6 ranges to block +const PRIVATE_IPV6_RANGES = [ + { start: '::1', prefix: 128 }, // Loopback + { start: 'fc00::', prefix: 7 }, // Unique local + { start: 'fe80::', prefix: 10 }, // Link-local +]; + +// ─── Error Classes ───────────────────────────────────────────────────────────── + +class SSRFValidationError extends Error { + constructor(message) { + super(message); + this.name = 'SSRFValidationError'; + } +} + +// ─── Helper Functions ───────────────────────────────────────────────────────── + +/** + * Normalize hostname for allowlist checking + * Prevents bypass through trailing dots, mixed case, or IDN homograph attacks + */ +function normalizeHostname(hostname) { + if (typeof hostname !== 'string') return null; + + const trimmed = hostname.trim().toLowerCase().replace(/\.+$/, ''); + if (!trimmed) return null; + + const ascii = domainToASCII(trimmed); + if (!ascii) return null; + + return ascii.toLowerCase().replace(/\.+$/, ''); +} + +/** + * Check if hostname matches allowed suffixes + * Uses strict label-by-label comparison to prevent bypass + */ +function isHostnameAllowed(hostname) { + const normalized = normalizeHostname(hostname); + if (!normalized) return false; + + const labels = normalized.split('.'); + + for (const suffix of ALLOWED_HOST_SUFFIXES) { + const suffixLabels = suffix.split('.'); + if (labels.length < suffixLabels.length + 1) continue; + + const hostnameSuffix = labels.slice(-suffixLabels.length).join('.'); + if (hostnameSuffix === suffix) { + return true; + } + } + + return false; +} + +/** + * Convert IPv4 address to integer for range comparison + */ +function ipv4ToInt(ip) { + const parts = ip.split('.').map(Number); + if (parts.length !== 4 || parts.some(isNaN)) { + return null; + } + return ((parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]) >>> 0; +} + +/** + * Check if IPv4 address is in a CIDR range + */ +function isIPv4InRange(ip, rangeStart, prefix) { + const ipInt = ipv4ToInt(ip); + const startInt = ipv4ToInt(rangeStart); + + if (ipInt === null || startInt === null) return false; + + const mask = (0xFFFFFFFF << (32 - prefix)) >>> 0; + return (ipInt & mask) === (startInt & mask); +} + +/** + * Check if IPv4 address is in any private range + */ +function isPrivateIPv4(ip) { + return PRIVATE_IPV4_RANGES.some(range => + isIPv4InRange(ip, range.start, range.prefix) + ); +} + +/** + * Parse IPv6 address to byte array + */ +function ipv6ToBytes(ip) { + // Handle :: notation (compressed zeros) + if (ip === '::') { + return new Uint8Array(16).fill(0); + } + + // Split by : and handle :: expansion + const parts = ip.split(':'); + const expanded = []; + + // Find the :: (empty string in parts) + const doubleColonIndex = parts.indexOf(''); + + if (doubleColonIndex !== -1) { + // Count non-empty parts before and after :: + const before = parts.slice(0, doubleColonIndex).filter(p => p !== ''); + const after = parts.slice(doubleColonIndex + 1).filter(p => p !== ''); + const missing = 8 - (before.length + after.length); + + // Add parts before :: + expanded.push(...before); + + // Add missing zeros + for (let i = 0; i < missing; i++) { + expanded.push('0'); + } + + // Add parts after :: + expanded.push(...after); + } else { + // No ::, just use the parts as-is + for (let i = 0; i < parts.length; i++) { + expanded.push(parts[i] || '0'); + } + } + + if (expanded.length !== 8) return null; + + const bytes = new Uint8Array(16); + for (let i = 0; i < 8; i++) { + const value = parseInt(expanded[i], 16); + if (isNaN(value)) return null; + bytes[i * 2] = (value >> 8) & 0xFF; + bytes[i * 2 + 1] = value & 0xFF; + } + + return bytes; +} + +/** + * Check if IPv6 address is in a CIDR range + */ +function isIPv6InRange(ip, rangeStart, prefix) { + const ipBytes = ipv6ToBytes(ip); + const startBytes = ipv6ToBytes(rangeStart); + + if (!ipBytes || !startBytes) { + return false; + } + + const fullBytes = Math.floor(prefix / 8); + const remainingBits = prefix % 8; + + for (let i = 0; i < fullBytes; i++) { + if (ipBytes[i] !== startBytes[i]) return false; + } + + if (remainingBits > 0) { + const mask = (0xFF << (8 - remainingBits)) & 0xFF; + if ((ipBytes[fullBytes] & mask) !== (startBytes[fullBytes] & mask)) { + return false; + } + } + + return true; +} + +/** + * Check if IPv6 address is in any private range + */ +function isPrivateIPv6(ip) { + return PRIVATE_IPV6_RANGES.some(range => + isIPv6InRange(ip, range.start, range.prefix) + ); +} + +/** + * Check if IP address is private/internal + */ +function isPrivateIP(ip) { + if (ip.includes(':')) { + return isPrivateIPv6(ip); + } + return isPrivateIPv4(ip); +} + +/** + * Resolve hostname to IP addresses using DNS + * Returns both A (IPv4) and AAAA (IPv6) records + */ +async function resolveHostname(hostname) { + try { + const [aRecords, aaaaRecords] = await Promise.allSettled([ + dns.resolve4(hostname), + dns.resolve6(hostname).catch(() => []), // AAAA may not exist + ]); + + const ips = []; + + if (aRecords.status === 'fulfilled') { + ips.push(...aRecords.value); + } + + if (aaaaRecords.status === 'fulfilled') { + ips.push(...aaaaRecords.value); + } + + return ips; + } catch (err) { + throw new SSRFValidationError('DNS resolution failed'); + } +} + +/** + * Validate that all resolved IPs are not private/internal + */ +function validateResolvedIPs(ips) { + for (const ip of ips) { + if (isPrivateIP(ip)) { + throw new SSRFValidationError('Private IP addresses are not allowed'); + } + } + return true; +} + +// ─── Main Validation Functions ───────────────────────────────────────────────── + +/** + * Validate URL for SSRF protection + * Checks protocol, hostname, and resolves DNS to validate IP addresses + * + * @param {string} urlString - The URL to validate + * @returns {Promise<{url: URL, hostname: string, ips: string[]}>} + * @throws {SSRFValidationError} + */ +async function validateURLForSSRF(urlString) { + // Parse URL + let url; + try { + url = new URL(urlString.trim()); + } catch (err) { + throw new SSRFValidationError('Invalid URL format'); + } + + // Validate protocol - HTTPS only + if (url.protocol !== 'https:') { + throw new SSRFValidationError('Only HTTPS URLs are allowed'); + } + + // Validate hostname against allowlist + const hostname = url.hostname; + if (!isHostnameAllowed(hostname)) { + throw new SSRFValidationError('URL host is not allowed'); + } + + // Resolve DNS and validate IPs + const ips = await resolveHostname(hostname); + if (ips.length === 0) { + throw new SSRFValidationError('DNS resolution returned no addresses'); + } + + validateResolvedIPs(ips); + + return { url, hostname, ips }; +} + +/** + * Validate redirect target for SSRF protection + * Used when following HTTP redirects to ensure redirect destinations are safe + * + * @param {string} redirectUrl - The redirect URL to validate + * @returns {Promise<{url: URL, hostname: string, ips: string[]}>} + * @throws {SSRFValidationError} + */ +async function validateRedirectForSSRF(redirectUrl) { + // Redirects must use the same validation as initial URLs + return validateURLForSSRF(redirectUrl); +} + +/** + * Create axios config with SSRF-safe redirect handling + * Disables automatic redirects and provides manual redirect validation + */ +function createSSRFSafeAxiosConfig(baseConfig = {}) { + return { + ...baseConfig, + maxRedirects: 0, // Disable automatic redirects + validateStatus: (status) => status < 400, // Accept all non-error statuses + }; +} + +// ─── Exports ───────────────────────────────────────────────────────────────── + +module.exports = { + validateURLForSSRF, + validateRedirectForSSRF, + createSSRFSafeAxiosConfig, + isHostnameAllowed, + isPrivateIP, + SSRFValidationError, + ALLOWED_HOST_SUFFIXES, + PRIVATE_IPV4_RANGES, + PRIVATE_IPV6_RANGES, +};