engine / config.yaml
VeuReu's picture
Update config.yaml
cc083dd verified
# ===========================
# Veureu Engine – config.yaml
# ===========================
engine:
output_root: "results"
api:
cors_allow_origins: ["*"]
sync_timeout_seconds: 3600
database:
enabled: true
persist_directory: "chroma_db"
enable_face_recognition: true
enable_voice_recognition: true
face_collection: "index_faces"
voice_collection: "index_voices"
jobs:
enabled: false # si activas cola async, cámbialo a true y añade JobManager en main_api.py
max_workers: 1
result_ttl_seconds: 86400
video_processing:
keyframes:
conditional_extraction:
enable: true
min_scene_length_seconds: 1.5
difference_threshold: 28.0
frames_per_second:
enable: true
fps: 1.0 # Frecuencia de frames de análisis
ocr:
engine: "tesseract" # "tesseract" | "easyocr"
language_hint: "spa"
tesseract_cmd: "" # si no está en PATH, deja la ruta
faces:
detector_model: "mtcnn" # ajusta a tu vision_tools
embedding_model: "Facenet512" # usado por FaceOfImageEmbedding
min_face_size: 32
detection_confidence: 0.85
ocr_clustering:
method: "sequential_similarity"
sentence_transformer: "all-MiniLM-L6-v2"
similarity_threshold: 0.60 # mayor ⇒ menos clusters
audio_processing:
sample_rate: 16000
format: "wav"
diarization:
enabled: true
force_silence_only: true # Use silence-based segmentation (no pyannote)
min_segment_duration: 0.5 # en segundos (clips cortos)
max_segment_duration: 10.0
silence_thresh: -40 # dBFS threshold for silence detection
min_silence_len: 500 # milliseconds
enable_voice_embeddings: true # SpeechBrain ECAPA
speaker_embedding:
enabled: true
# Identificación de hablantes (clustering + Chroma)
voice_processing:
speaker_identification:
enabled: true
find_optimal_clusters: true
min_speakers: 1
max_speakers: 5
distance_threshold: 0.40
asr:
# Controla la transcripción del audio completo además de los clips (útil para contexto global)
enable_full_transcription: true
background_descriptor:
montage:
enable: true
max_frames: 12
grid: "auto"
description:
model: "salamandra-vision" # o "gpt-4o-mini"
max_tokens: 512
temperature: 0.2
identity:
timeline_mapping:
per_second_frames_source: "frames_per_second"
attach_faces_to:
- "keyframes"
- "audio_segments"
out_key: "persona"
narration:
model: "salamandra-instruct" # "salamandra-instruct" | "gpt-4o-mini"
une_guidelines_path: "UNE_153010.txt"
timing:
max_ad_duration_ratio: 0.60
min_gap_seconds: 1.20
min_ad_seconds: 0.80
llm:
max_tokens: 1024
temperature: 0.2
subtitles:
max_chars_per_line: 42
max_lines_per_cue: 10
speaker_display: "brackets" # "brackets" | "prefix" | "none"
models:
# alias de tarea → modelo
instruct: "salamandra-instruct"
vision: "salamandra-vision"
tools: "salamandra-tools"
asr: "whisper-catalan" # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)
routing:
use_remote_for:
- "salamandra-instruct"
- "salamandra-vision"
- "salamandra-tools"
- "whisper-catalan"
remote_spaces:
user: "veureu"
endpoints:
salamandra-instruct:
space: "schat"
base_url: "https://veureu-schat.hf.space"
client: "gradio"
predict_route: "/predict"
salamandra-vision:
space: "svision"
base_url: "https://veureu-svision.hf.space"
client: "gradio"
predict_route: "/predict"
salamandra-tools:
space: "stools"
base_url: "https://veureu-stools.hf.space"
client: "gradio"
predict_route: "/predict"
whisper-catalan:
space: "asr"
base_url: "https://veureu-asr.hf.space"
client: "gradio"
predict_route: "/predict"
http:
timeout_seconds: 180
retries: 3
backoff_seconds: 2.0
security:
use_hf_token: true
hf_token_env: "HF_TOKEN"
allow_insecure_tls: false
logging:
level: "INFO"
json: false
stools: false