File size: 2,517 Bytes
f478a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
const express = require('express');
const fetch = require('node-fetch');

const app = express();
const TARGET_URL = 'https://rhknk53jznw37un7.us-east-1.aws.endpoints.huggingface.cloud';
const MAX_PARALLEL = 4;
const MAX_WAIT_MS = 10 * 60 * 1000; // 10 minutes

let activeRequests = 0;
const queue = [];

async function processQueue() {
  while (queue.length > 0 && activeRequests < MAX_PARALLEL) {
    const { req, res, next } = queue.shift();
    activeRequests++;
    handleRequest(req, res, next).finally(() => {
      activeRequests--;
      processQueue();
    });
  }
}

async function retryWith503Backoff(url, options, startTime) {
  let attempt = 0;
  
  while (true) {
    const elapsed = Date.now() - startTime;
    if (elapsed > MAX_WAIT_MS) {
      throw new Error('Max wait time exceeded (10 minutes)');
    }
    
    const response = await fetch(url, options);
    
    if (response.status !== 503) {
      return response;
    }
    
    // Exponential backoff: 1s, 2s, 4s, 8s, 16s, 32s, 64s...
    const delay = Math.min(1000 * Math.pow(2, attempt), 64000);
    attempt++;
    
    // Check if waiting would exceed max time
    if (elapsed + delay > MAX_WAIT_MS) {
      throw new Error('Max wait time would be exceeded');
    }
    
    await new Promise(resolve => setTimeout(resolve, delay));
  }
}

async function handleRequest(req, res, next) {
  try {
    const startTime = Date.now();
    const targetUrl = TARGET_URL + req.url;
    
    const options = {
      method: req.method,
      headers: { ...req.headers, host: new URL(TARGET_URL).host },
      body: req.method !== 'GET' && req.method !== 'HEAD' ? req.body : undefined
    };
    
    const response = await retryWith503Backoff(targetUrl, options, startTime);
    
    res.status(response.status);
    response.headers.forEach((value, key) => {
      res.setHeader(key, value);
    });
    
    response.body.pipe(res);
  } catch (error) {
    res.status(504).json({ error: error.message });
  }
}

app.use(express.raw({ type: '*/*', limit: '50mb' }));

app.use((req, res, next) => {
  if (activeRequests < MAX_PARALLEL) {
    activeRequests++;
    handleRequest(req, res, next).finally(() => {
      activeRequests--;
      processQueue();
    });
  } else {
    queue.push({ req, res, next });
  }
});

const PORT = process.env.PORT || 7860;
app.listen(PORT, () => {
  console.log(`Reverse proxy listening on port ${PORT}`);
  console.log(`Proxying to: ${TARGET_URL}`);
  console.log(`Max parallel requests: ${MAX_PARALLEL}`);
});