Spaces:

VeuReu
/

engine

Running

File size: 4,117 Bytes

cc083dd

# ===========================
# Veureu Engine – config.yaml
# ===========================

engine:
  output_root: "results"

api:
  cors_allow_origins: ["*"]
  sync_timeout_seconds: 3600

database:
  enabled: true
  persist_directory: "chroma_db"
  enable_face_recognition: true
  enable_voice_recognition: true
  face_collection: "index_faces"
  voice_collection: "index_voices"

jobs:
  enabled: false    # si activas cola async, cámbialo a true y añade JobManager en main_api.py
  max_workers: 1
  result_ttl_seconds: 86400

video_processing:
  keyframes:
    conditional_extraction:
      enable: true
      min_scene_length_seconds: 1.5
      difference_threshold: 28.0

  frames_per_second:
    enable: true
    fps: 1.0   # Frecuencia de frames de análisis

  ocr:
    engine: "tesseract"   # "tesseract" | "easyocr"
    language_hint: "spa"
    tesseract_cmd: ""     # si no está en PATH, deja la ruta

  faces:
    detector_model: "mtcnn"        # ajusta a tu vision_tools
    embedding_model: "Facenet512"  # usado por FaceOfImageEmbedding
    min_face_size: 32
    detection_confidence: 0.85

  ocr_clustering:
    method: "sequential_similarity"
    sentence_transformer: "all-MiniLM-L6-v2"
    similarity_threshold: 0.60     # mayor ⇒ menos clusters

audio_processing:
  sample_rate: 16000
  format: "wav"

  diarization:
    enabled: true
    force_silence_only: true       # Use silence-based segmentation (no pyannote)
    min_segment_duration: 0.5      # en segundos (clips cortos)
    max_segment_duration: 10.0
    silence_thresh: -40            # dBFS threshold for silence detection
    min_silence_len: 500           # milliseconds

  enable_voice_embeddings: true     # SpeechBrain ECAPA
  speaker_embedding:
    enabled: true

  # Identificación de hablantes (clustering + Chroma)
  voice_processing:
    speaker_identification:
      enabled: true
      find_optimal_clusters: true
      min_speakers: 1
      max_speakers: 5
      distance_threshold: 0.40

asr:
  # Controla la transcripción del audio completo además de los clips (útil para contexto global)
  enable_full_transcription: true

background_descriptor:
  montage:
    enable: true
    max_frames: 12
    grid: "auto"

  description:
    model: "salamandra-vision"  # o "gpt-4o-mini"
    max_tokens: 512
    temperature: 0.2

identity:
  timeline_mapping:
    per_second_frames_source: "frames_per_second"
    attach_faces_to:
      - "keyframes"
      - "audio_segments"
    out_key: "persona"

narration:
  model: "salamandra-instruct"   # "salamandra-instruct" | "gpt-4o-mini"
  une_guidelines_path: "UNE_153010.txt"
  timing:
    max_ad_duration_ratio: 0.60
    min_gap_seconds: 1.20
    min_ad_seconds: 0.80
  llm:
    max_tokens: 1024
    temperature: 0.2

subtitles:
  max_chars_per_line: 42
  max_lines_per_cue: 10
  speaker_display: "brackets"  # "brackets" | "prefix" | "none"

models:
  # alias de tarea → modelo
  instruct: "salamandra-instruct"
  vision: "salamandra-vision"
  tools: "salamandra-tools"
  asr: "whisper-catalan"  # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)

  routing:
    use_remote_for:
      - "salamandra-instruct"
      - "salamandra-vision"
      - "salamandra-tools"
      - "whisper-catalan"

remote_spaces:
  user: "veureu"

  endpoints:
    salamandra-instruct:
      space: "schat"
      base_url: "https://veureu-schat.hf.space"
      client: "gradio"
      predict_route: "/predict"

    salamandra-vision:
      space: "svision"
      base_url: "https://veureu-svision.hf.space"
      client: "gradio"
      predict_route: "/predict"

    salamandra-tools:
      space: "stools"
      base_url: "https://veureu-stools.hf.space"
      client: "gradio"
      predict_route: "/predict"

    whisper-catalan:
      space: "asr"
      base_url: "https://veureu-asr.hf.space"
      client: "gradio"
      predict_route: "/predict"

  http:
    timeout_seconds: 180
    retries: 3
    backoff_seconds: 2.0

security:
  use_hf_token: true
  hf_token_env: "HF_TOKEN"
  allow_insecure_tls: false

logging:
  level: "INFO"
  json: false

stools: false