ToolBench-ToolLLaMA-2-7b-GGML

Runtime error

App Files Files Community

limcheekin commited on Aug 17, 2023

Commit

2516e02

0 Parent(s):

Duplicate from limcheekin/orca_mini_v3_13B-GGML

Browse files

Files changed (6) hide show

.gitattributes +35 -0
Dockerfile +35 -0
README.md +20 -0
index.html +37 -0
main.py +28 -0
start_server.sh +6 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+# Grab a fresh copy of the Python image
+FROM python:3.10-slim
+# Install build and runtime dependencies
+RUN apt-get update && \
+    apt-get install -y \
+    libopenblas-dev \
+    ninja-build \
+    build-essential \
+    pkg-config \
+    curl
+RUN pip install -U pip setuptools wheel && \
+    CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 pip install --verbose llama-cpp-python[server]
+# Download model
+RUN mkdir model && \
+    curl -L https://huggingface.co/TheBloke/orca_mini_v3_13B-GGML/resolve/main/orca_mini_v3_13b.ggmlv3.q5_K_S.bin -o model/ggmlv3-model.bin
+COPY ./start_server.sh ./
+COPY ./main.py ./
+COPY ./index.html ./
+# Make the server start script executable
+RUN chmod +x ./start_server.sh
+# Set environment variable for the host
+ENV HOST=0.0.0.0
+ENV PORT=7860
+# Expose a port for the server
+EXPOSE ${PORT}
+# Run the server start script
+CMD ["/bin/sh", "./start_server.sh"]

README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+---
+title: orca_mini_v3_13B-GGML (q5_K_S)
+colorFrom: purple
+colorTo: blue
+sdk: docker
+models:
+- TheBloke/orca_mini_v3_13B-GGML
+tags:
+- inference api
+- openai-api compatible
+- llama-cpp-python
+- orca_mini_v3_13B
+- ggml
+pinned: false
+duplicated_from: limcheekin/orca_mini_v3_13B-GGML
+---
+# orca_mini_v3_13B-GGML (q5_K_S)
+Please refer to the [index.html](index.html) for more information.

index.html ADDED Viewed

	@@ -0,0 +1,37 @@

+<!DOCTYPE html>
+<html>
+  <head>
+    <title>orca_mini_v3_13B-GGML (q5_K_S)</title>
+  </head>
+  <body>
+    <h1>orca_mini_v3_13B-GGML (q5_K_S)</h1>
+    <p>
+      With the utilization of the
+      <a href="https://github.com/abetlen/llama-cpp-python">llama-cpp-python</a>
+      package, we are excited to introduce the GGML model hosted in the Hugging
+      Face Docker Spaces, made accessible through an OpenAI-compatible API. This
+      space includes comprehensive API documentation to facilitate seamless
+      integration.
+    </p>
+    <ul>
+      <li>
+        The API endpoint:
+        <a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1"
+          >https://limcheekin-orca-mini-v3-13b-ggml.hf.space/v1</a
+        >
+      </li>
+      <li>
+        The API doc:
+        <a href="https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs"
+          >https://limcheekin-orca-mini-v3-13b-ggml.hf.space/docs</a
+        >
+      </li>
+    </ul>
+    <p>
+      If you find this resource valuable, your support in the form of starring
+      the space would be greatly appreciated. Your engagement plays a vital role
+      in furthering the application for a community GPU grant, ultimately
+      enhancing the capabilities and accessibility of this space.
+    </p>
+  </body>
+</html>

main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from llama_cpp.server.app import create_app, Settings
+from fastapi.responses import HTMLResponse
+import os
+print("os.cpu_count()", os.cpu_count())
+app = create_app(
+    Settings(
+        n_threads=os.cpu_count(),
+        model="model/ggmlv3-model.bin",
+        embedding=False
+    )
+)
+# Read the content of index.html once and store it in memory
+with open("index.html", "r") as f:
+    content = f.read()
+@app.get("/", response_class=HTMLResponse)
+async def read_items():
+    return content
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app,
+                host=os.environ["HOST"],
+                port=int(os.environ["PORT"])
+                )

start_server.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/bin/sh
+# For mlock support
+ulimit -l unlimited
+python3 -B main.py