const express = require('express'); const fetch = require('node-fetch'); const app = express(); const TARGET_URL = 'https://rhknk53jznw37un7.us-east-1.aws.endpoints.huggingface.cloud'; const MAX_PARALLEL = 1; const MAX_WAIT_MS = 10 * 60 * 1000; // 10 minutes let activeRequests = 0; const queue = []; async function processQueue() { while (queue.length > 0 && activeRequests < MAX_PARALLEL) { const { req, res, next } = queue.shift(); activeRequests++; handleRequest(req, res, next).finally(() => { activeRequests--; processQueue(); }); } } async function retryWith503Backoff(url, options, startTime) { let attempt = 0; let lastResponse = null; while (true) { const elapsed = Date.now() - startTime; if (elapsed > MAX_WAIT_MS) { throw new Error('Max wait time exceeded (10 minutes)'); } const response = await fetch(url, options); lastResponse = response; // If successful (2xx), return immediately if (response.ok) { return response; } // Don't retry 401 (Unauthorized) - return immediately if (response.status === 401) { return response; } // For 503: retry with exponential backoff until max time // For other errors: retry up to 3 times, then return the error if (response.status === 503) { // Exponential backoff for 503: 1s, 2s, 4s, 8s, 16s, 32s, 64s... const delay = Math.min(1000 * Math.pow(2, attempt), 64000); attempt++; // Check if waiting would exceed max time if (elapsed + delay > MAX_WAIT_MS) { return response; // Return 503 if we'd exceed max time } await new Promise(resolve => setTimeout(resolve, delay)); } else { // For non-503 errors, retry up to 3 times with shorter delays if (attempt >= 3) { return response; // Return the error after 3 attempts } // Short delay for non-503 errors: 1s, 2s, 3s const delay = (attempt + 1) * 1000; attempt++; // Check if waiting would exceed max time if (elapsed + delay > MAX_WAIT_MS) { return response; } await new Promise(resolve => setTimeout(resolve, delay)); } } } async function handleRequest(req, res, next) { try { const startTime = Date.now(); const targetUrl = TARGET_URL + req.url; const options = { method: req.method, headers: { ...req.headers, host: new URL(TARGET_URL).host }, body: req.method !== 'GET' && req.method !== 'HEAD' ? req.body : undefined }; const response = await retryWith503Backoff(targetUrl, options, startTime); res.status(response.status); response.headers.forEach((value, key) => { res.setHeader(key, value); }); response.body.pipe(res); } catch (error) { res.status(504).json({ error: error.message }); } } app.use(express.raw({ type: '*/*', limit: '50mb' })); app.use((req, res, next) => { if (activeRequests < MAX_PARALLEL) { activeRequests++; handleRequest(req, res, next).finally(() => { activeRequests--; processQueue(); }); } else { queue.push({ req, res, next }); } }); const PORT = process.env.PORT || 7860; app.listen(PORT, () => { console.log(`Reverse proxy listening on port ${PORT}`); console.log(`Proxying to: ${TARGET_URL}`); console.log(`Max parallel requests: ${MAX_PARALLEL}`); });