GLM-4.6-FP8-API / Dockerfile
AARANHA's picture
Create Dockerfile
01aa868 verified
raw
history blame contribute delete
794 Bytes
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
# Install Python and dependencies
RUN apt-get update && apt-get install -y \
python3.11 \
python3-pip \
git \
wget \
curl \
build-essential \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Copy requirements
COPY requirements.txt .
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy app files
COPY app.py .
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV CUDA_VISIBLE_DEVICES=0
ENV HF_HOME=/tmp/hf_cache
# Expose port
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Run app
CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]