Spaces:
Runtime error
Runtime error
Commit ·
2516e02
0
Parent(s):
Duplicate from limcheekin/orca_mini_v3_13B-GGML
Browse files- .gitattributes +35 -0
- Dockerfile +35 -0
- README.md +20 -0
- index.html +37 -0
- main.py +28 -0
- start_server.sh +6 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Grab a fresh copy of the Python image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Install build and runtime dependencies
|
| 5 |
+
RUN apt-get update && \
|
| 6 |
+
apt-get install -y \
|
| 7 |
+
libopenblas-dev \
|
| 8 |
+
ninja-build \
|
| 9 |
+
build-essential \
|
| 10 |
+
pkg-config \
|
| 11 |
+
curl
|
| 12 |
+
|
| 13 |
+
RUN pip install -U pip setuptools wheel && \
|
| 14 |
+
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
|
| 15 |
+
|
| 16 |
+
# Download model
|
| 17 |
+
RUN mkdir model && \
|
| 18 |
+
curl -L https://huggingface.co/TheBloke/orca_mini_v3_13B-GGML/resolve/main/orca_mini_v3_13b.ggmlv3.q5_K_S.bin -o model/ggmlv3-model.bin
|
| 19 |
+
|
| 20 |
+
COPY ./start_server.sh ./
|
| 21 |
+
COPY ./main.py ./
|
| 22 |
+
COPY ./index.html ./
|
| 23 |
+
|
| 24 |
+
# Make the server start script executable
|
| 25 |
+
RUN chmod +x ./start_server.sh
|
| 26 |
+
|
| 27 |
+
# Set environment variable for the host
|
| 28 |
+
ENV HOST=0.0.0.0
|
| 29 |
+
ENV PORT=7860
|
| 30 |
+
|
| 31 |
+
# Expose a port for the server
|
| 32 |
+
EXPOSE ${PORT}
|
| 33 |
+
|
| 34 |
+
# Run the server start script
|
| 35 |
+
CMD ["/bin/sh", "./start_server.sh"]
|
README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: orca_mini_v3_13B-GGML (q5_K_S)
|
| 3 |
+
colorFrom: purple
|
| 4 |
+
colorTo: blue
|
| 5 |
+
sdk: docker
|
| 6 |
+
models:
|
| 7 |
+
- TheBloke/orca_mini_v3_13B-GGML
|
| 8 |
+
tags:
|
| 9 |
+
- inference api
|
| 10 |
+
- openai-api compatible
|
| 11 |
+
- llama-cpp-python
|
| 12 |
+
- orca_mini_v3_13B
|
| 13 |
+
- ggml
|
| 14 |
+
pinned: false
|
| 15 |
+
duplicated_from: limcheekin/orca_mini_v3_13B-GGML
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# orca_mini_v3_13B-GGML (q5_K_S)
|
| 19 |
+
|
| 20 |
+
Please refer to the [index.html](index.html) for more information.
|
index.html
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html>
|
| 3 |
+
<head>
|
| 4 |
+
<title>orca_mini_v3_13B-GGML (q5_K_S)</title>
|
| 5 |
+
</head>
|
| 6 |
+
<body>
|
| 7 |
+
<h1>orca_mini_v3_13B-GGML (q5_K_S)</h1>
|
| 8 |
+
<p>
|
| 9 |
+
With the utilization of the
|
| 10 |
+
<a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
|
| 11 |
+
package, we are excited to introduce the GGML model hosted in the Hugging
|
| 12 |
+
Face Docker Spaces, made accessible through an OpenAI-compatible API. This
|
| 13 |
+
space includes comprehensive API documentation to facilitate seamless
|
| 14 |
+
integration.
|
| 15 |
+
</p>
|
| 16 |
+
<ul>
|
| 17 |
+
<li>
|
| 18 |
+
The API endpoint:
|
| 19 |
+
<a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1"
|
| 20 |
+
>https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1</a
|
| 21 |
+
>
|
| 22 |
+
</li>
|
| 23 |
+
<li>
|
| 24 |
+
The API doc:
|
| 25 |
+
<a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs"
|
| 26 |
+
>https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs</a
|
| 27 |
+
>
|
| 28 |
+
</li>
|
| 29 |
+
</ul>
|
| 30 |
+
<p>
|
| 31 |
+
If you find this resource valuable, your support in the form of starring
|
| 32 |
+
the space would be greatly appreciated. Your engagement plays a vital role
|
| 33 |
+
in furthering the application for a community GPU grant, ultimately
|
| 34 |
+
enhancing the capabilities and accessibility of this space.
|
| 35 |
+
</p>
|
| 36 |
+
</body>
|
| 37 |
+
</html>
|
main.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from llama_cpp.server.app import create_app, Settings
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
print("os.cpu_count()", os.cpu_count())
|
| 6 |
+
app = create_app(
|
| 7 |
+
Settings(
|
| 8 |
+
n_threads=os.cpu_count(),
|
| 9 |
+
model="model/ggmlv3-model.bin",
|
| 10 |
+
embedding=False
|
| 11 |
+
)
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
# Read the content of index.html once and store it in memory
|
| 15 |
+
with open("index.html", "r") as f:
|
| 16 |
+
content = f.read()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@app.get("/", response_class=HTMLResponse)
|
| 20 |
+
async def read_items():
|
| 21 |
+
return content
|
| 22 |
+
|
| 23 |
+
if __name__ == "__main__":
|
| 24 |
+
import uvicorn
|
| 25 |
+
uvicorn.run(app,
|
| 26 |
+
host=os.environ["HOST"],
|
| 27 |
+
port=int(os.environ["PORT"])
|
| 28 |
+
)
|
start_server.sh
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
|
| 3 |
+
# For mlock support
|
| 4 |
+
ulimit -l unlimited
|
| 5 |
+
|
| 6 |
+
python3 -B main.py
|