multimodalart's picture
Create proxy.js
f478a41 verified
raw
history blame
2.52 kB
const express = require('express');
const fetch = require('node-fetch');
const app = express();
const TARGET_URL = 'https://rhknk53jznw37un7.us-east-1.aws.endpoints.huggingface.cloud';
const MAX_PARALLEL = 4;
const MAX_WAIT_MS = 10 * 60 * 1000; // 10 minutes
let activeRequests = 0;
const queue = [];
async function processQueue() {
while (queue.length > 0 && activeRequests < MAX_PARALLEL) {
const { req, res, next } = queue.shift();
activeRequests++;
handleRequest(req, res, next).finally(() => {
activeRequests--;
processQueue();
});
}
}
async function retryWith503Backoff(url, options, startTime) {
let attempt = 0;
while (true) {
const elapsed = Date.now() - startTime;
if (elapsed > MAX_WAIT_MS) {
throw new Error('Max wait time exceeded (10 minutes)');
}
const response = await fetch(url, options);
if (response.status !== 503) {
return response;
}
// Exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s, 64s...
const delay = Math.min(1000 * Math.pow(2, attempt), 64000);
attempt++;
// Check if waiting would exceed max time
if (elapsed + delay > MAX_WAIT_MS) {
throw new Error('Max wait time would be exceeded');
}
await new Promise(resolve => setTimeout(resolve, delay));
}
}
async function handleRequest(req, res, next) {
try {
const startTime = Date.now();
const targetUrl = TARGET_URL + req.url;
const options = {
method: req.method,
headers: { ...req.headers, host: new URL(TARGET_URL).host },
body: req.method !== 'GET' && req.method !== 'HEAD' ? req.body : undefined
};
const response = await retryWith503Backoff(targetUrl, options, startTime);
res.status(response.status);
response.headers.forEach((value, key) => {
res.setHeader(key, value);
});
response.body.pipe(res);
} catch (error) {
res.status(504).json({ error: error.message });
}
}
app.use(express.raw({ type: '*/*', limit: '50mb' }));
app.use((req, res, next) => {
if (activeRequests < MAX_PARALLEL) {
activeRequests++;
handleRequest(req, res, next).finally(() => {
activeRequests--;
processQueue();
});
} else {
queue.push({ req, res, next });
}
});
const PORT = process.env.PORT || 7860;
app.listen(PORT, () => {
console.log(`Reverse proxy listening on port ${PORT}`);
console.log(`Proxying to: ${TARGET_URL}`);
console.log(`Max parallel requests: ${MAX_PARALLEL}`);
});