|
|
import os
|
|
|
import io
|
|
|
import json
|
|
|
import shutil
|
|
|
|
|
|
import sqlite3
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from fastapi import APIRouter, UploadFile, File, Query, HTTPException
|
|
|
from fastapi.responses import FileResponse, JSONResponse
|
|
|
|
|
|
|
|
|
from storage.files.file_manager import FileManager
|
|
|
from storage.common import validate_token
|
|
|
|
|
|
router = APIRouter(prefix="/embeddings", tags=["Embeddings Manager"])
|
|
|
EMBEDDINGS_ROOT = Path("/data/embeddings")
|
|
|
file_manager = FileManager(EMBEDDINGS_ROOT)
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
@router.get("/list_embeddings", tags=["Embeddings Manager"])
|
|
|
def list_all_embeddings(
|
|
|
token: str = Query(..., description="Token required for authorization")
|
|
|
):
|
|
|
"""
|
|
|
List all embeddings stored under /data/embeddings.
|
|
|
|
|
|
For each video hash folder, returns:
|
|
|
- video: folder name (hash)
|
|
|
- faces: true/false depending on whether faces/embeddings.json exists
|
|
|
- voices: true/false depending on whether voices/embeddings.json exists
|
|
|
|
|
|
Notes:
|
|
|
- A video folder may contain only faces, only voices, or neither.
|
|
|
- Missing folders are treated as false.
|
|
|
"""
|
|
|
validate_token(token)
|
|
|
|
|
|
results = []
|
|
|
|
|
|
|
|
|
if not EMBEDDINGS_ROOT.exists():
|
|
|
return []
|
|
|
|
|
|
for video_dir in EMBEDDINGS_ROOT.iterdir():
|
|
|
if not video_dir.is_dir():
|
|
|
continue
|
|
|
|
|
|
faces_path = video_dir / "faces" / "embeddings.json"
|
|
|
voices_path = video_dir / "voices" / "embeddings.json"
|
|
|
|
|
|
results.append({
|
|
|
"video": video_dir.name,
|
|
|
"faces": faces_path.exists(),
|
|
|
"voices": voices_path.exists()
|
|
|
})
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
@router.post("/upload_embeddings", tags=["Embeddings Manager"])
|
|
|
async def upload_embeddings(
|
|
|
file: UploadFile = File(...),
|
|
|
embedding_type: str = Query(..., description="faces or voices"),
|
|
|
video_hash: str = Query(..., description="Hash of the video"),
|
|
|
token: str = Query(..., description="Token required for authorization")
|
|
|
):
|
|
|
"""
|
|
|
Upload embeddings JSON for a given video and type (faces or voices).
|
|
|
|
|
|
Behavior:
|
|
|
- Validate the token.
|
|
|
- Validate embedding_type.
|
|
|
- Ensure directory structure: /data/embeddings/<video_hash>/<embedding_type>/
|
|
|
- Delete any existing .json file inside that folder.
|
|
|
- Save the uploaded embeddings as embeddings.json.
|
|
|
"""
|
|
|
validate_token(token)
|
|
|
|
|
|
|
|
|
if embedding_type not in ("faces", "voices"):
|
|
|
raise HTTPException(status_code=400, detail="embedding_type must be 'faces' or 'voices'")
|
|
|
|
|
|
|
|
|
video_path = EMBEDDINGS_ROOT / video_hash
|
|
|
type_path = video_path / embedding_type
|
|
|
|
|
|
|
|
|
type_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
for existing in type_path.glob("*.json"):
|
|
|
try:
|
|
|
existing.unlink()
|
|
|
except Exception as exc:
|
|
|
raise HTTPException(status_code=500, detail=f"Failed to delete old embeddings: {exc}")
|
|
|
|
|
|
|
|
|
final_path = type_path / "embeddings.json"
|
|
|
|
|
|
try:
|
|
|
file_bytes = await file.read()
|
|
|
with open(final_path, "wb") as f:
|
|
|
f.write(file_bytes)
|
|
|
except Exception as exc:
|
|
|
raise HTTPException(status_code=500, detail=f"Failed to save embeddings: {exc}")
|
|
|
|
|
|
return JSONResponse(
|
|
|
status_code=200,
|
|
|
content={
|
|
|
"status": "ok",
|
|
|
"saved_to": str(final_path)
|
|
|
}
|
|
|
)
|
|
|
|
|
|
def get_embeddings_json(video_hash: str, embedding_type: str):
|
|
|
"""
|
|
|
Returns the parsed embeddings.json for a given video and type.
|
|
|
|
|
|
Behavior:
|
|
|
- Validate embedding_type.
|
|
|
- Build the file path: /data/embeddings/<video_hash>/<embedding_type>/embeddings.json
|
|
|
- Raise HTTPException if missing.
|
|
|
- Load and return parsed JSON.
|
|
|
"""
|
|
|
|
|
|
if embedding_type not in ("faces", "voices"):
|
|
|
raise HTTPException(status_code=400, detail="embedding_type must be 'faces' or 'voices'")
|
|
|
|
|
|
target_file = EMBEDDINGS_ROOT / video_hash / embedding_type / "embeddings.json"
|
|
|
|
|
|
if not target_file.exists():
|
|
|
raise HTTPException(
|
|
|
status_code=404,
|
|
|
detail=f"embeddings.json not found for video={video_hash}, type={embedding_type}"
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
with open(target_file, "r", encoding="utf-8") as f:
|
|
|
data = json.load(f)
|
|
|
except Exception as exc:
|
|
|
raise HTTPException(status_code=500, detail=f"Failed to read embeddings: {exc}")
|
|
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
@router.get("/get_embedding", tags=["Embeddings Manager"])
|
|
|
def get_embeddings(
|
|
|
video_hash: str = Query(..., description="Hash of the video"),
|
|
|
embedding_type: str = Query(..., description="faces or voices"),
|
|
|
token: str = Query(..., description="Token required for authorization")
|
|
|
):
|
|
|
"""
|
|
|
Endpoint to retrieve embeddings.json for a given video hash and type.
|
|
|
"""
|
|
|
validate_token(token)
|
|
|
|
|
|
data = get_embeddings_json(video_hash, embedding_type)
|
|
|
|
|
|
return data |