Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on 15 days ago

Commit

cc083dd

verified ·

1 Parent(s): 14e190b

Update config.yaml

Browse files

Files changed (1) hide show

config.yaml +172 -170

config.yaml CHANGED Viewed

@@ -1,170 +1,172 @@
-# ===========================
-# Veureu Engine – config.yaml
-# ===========================
-engine:
-  output_root: "results"
-api:
-  cors_allow_origins: ["*"]
-  sync_timeout_seconds: 3600
-database:
-  enabled: true
-  persist_directory: "chroma_db"
-  enable_face_recognition: true
-  enable_voice_recognition: true
-  face_collection: "index_faces"
-  voice_collection: "index_voices"
-jobs:
-  enabled: false    # si activas cola async, cámbialo a true y añade JobManager en main_api.py
-  max_workers: 1
-  result_ttl_seconds: 86400
-video_processing:
-  keyframes:
-    conditional_extraction:
-      enable: true
-      min_scene_length_seconds: 1.5
-      difference_threshold: 28.0
-  frames_per_second:
-    enable: true
-    fps: 1.0   # Frecuencia de frames de análisis
-  ocr:
-    engine: "tesseract"   # "tesseract" | "easyocr"
-    language_hint: "spa"
-    tesseract_cmd: ""     # si no está en PATH, deja la ruta
-  faces:
-    detector_model: "mtcnn"        # ajusta a tu vision_tools
-    embedding_model: "Facenet512"  # usado por FaceOfImageEmbedding
-    min_face_size: 32
-    detection_confidence: 0.85
-  ocr_clustering:
-    method: "sequential_similarity"
-    sentence_transformer: "all-MiniLM-L6-v2"
-    similarity_threshold: 0.60     # mayor ⇒ menos clusters
-audio_processing:
-  sample_rate: 16000
-  format: "wav"
-  diarization:
-    enabled: true
-    force_silence_only: true       # Use silence-based segmentation (no pyannote)
-    min_segment_duration: 0.5      # en segundos (clips cortos)
-    max_segment_duration: 10.0
-    silence_thresh: -40            # dBFS threshold for silence detection
-    min_silence_len: 500           # milliseconds
-  enable_voice_embeddings: true     # SpeechBrain ECAPA
-  speaker_embedding:
-    enabled: true
-  # Identificación de hablantes (clustering + Chroma)
-  voice_processing:
-    speaker_identification:
-      enabled: true
-      find_optimal_clusters: true
-      min_speakers: 1
-      max_speakers: 5
-      distance_threshold: 0.40
-asr:
-  # Controla la transcripción del audio completo además de los clips (útil para contexto global)
-  enable_full_transcription: true
-background_descriptor:
-  montage:
-    enable: true
-    max_frames: 12
-    grid: "auto"
-  description:
-    model: "salamandra-vision"  # o "gpt-4o-mini"
-    max_tokens: 512
-    temperature: 0.2
-identity:
-  timeline_mapping:
-    per_second_frames_source: "frames_per_second"
-    attach_faces_to:
-      - "keyframes"
-      - "audio_segments"
-    out_key: "persona"
-narration:
-  model: "salamandra-instruct"   # "salamandra-instruct" | "gpt-4o-mini"
-  une_guidelines_path: "UNE_153010.txt"
-  timing:
-    max_ad_duration_ratio: 0.60
-    min_gap_seconds: 1.20
-    min_ad_seconds: 0.80
-  llm:
-    max_tokens: 1024
-    temperature: 0.2
-subtitles:
-  max_chars_per_line: 42
-  max_lines_per_cue: 10
-  speaker_display: "brackets"  # "brackets" | "prefix" | "none"
-models:
-  # alias de tarea → modelo
-  instruct: "salamandra-instruct"
-  vision: "salamandra-vision"
-  tools: "salamandra-tools"
-  asr: "whisper-catalan"  # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)
-  routing:
-    use_remote_for:
-      - "salamandra-instruct"
-      - "salamandra-vision"
-      - "salamandra-tools"
-      - "whisper-catalan"
-remote_spaces:
-  user: "veureu"
-  endpoints:
-    salamandra-instruct:
-      space: "schat"
-      base_url: "https://veureu-schat.hf.space"
-      client: "gradio"
-      predict_route: "/predict"
-    salamandra-vision:
-      space: "svision"
-      base_url: "https://veureu-svision.hf.space"
-      client: "gradio"
-      predict_route: "/predict"
-    salamandra-tools:
-      space: "stools"
-      base_url: "https://veureu-stools.hf.space"
-      client: "gradio"
-      predict_route: "/predict"
-    whisper-catalan:
-      space: "asr"
-      base_url: "https://veureu-asr.hf.space"
-      client: "gradio"
-      predict_route: "/predict"
-  http:
-    timeout_seconds: 180
-    retries: 3
-    backoff_seconds: 2.0
-security:
-  use_hf_token: true
-  hf_token_env: "HF_TOKEN"
-  allow_insecure_tls: false
-logging:
-  level: "INFO"
-  json: false

+# ===========================
+# Veureu Engine – config.yaml
+# ===========================
+engine:
+  output_root: "results"
+api:
+  cors_allow_origins: ["*"]
+  sync_timeout_seconds: 3600
+database:
+  enabled: true
+  persist_directory: "chroma_db"
+  enable_face_recognition: true
+  enable_voice_recognition: true
+  face_collection: "index_faces"
+  voice_collection: "index_voices"
+jobs:
+  enabled: false    # si activas cola async, cámbialo a true y añade JobManager en main_api.py
+  max_workers: 1
+  result_ttl_seconds: 86400
+video_processing:
+  keyframes:
+    conditional_extraction:
+      enable: true
+      min_scene_length_seconds: 1.5
+      difference_threshold: 28.0
+  frames_per_second:
+    enable: true
+    fps: 1.0   # Frecuencia de frames de análisis
+  ocr:
+    engine: "tesseract"   # "tesseract" | "easyocr"
+    language_hint: "spa"
+    tesseract_cmd: ""     # si no está en PATH, deja la ruta
+  faces:
+    detector_model: "mtcnn"        # ajusta a tu vision_tools
+    embedding_model: "Facenet512"  # usado por FaceOfImageEmbedding
+    min_face_size: 32
+    detection_confidence: 0.85
+  ocr_clustering:
+    method: "sequential_similarity"
+    sentence_transformer: "all-MiniLM-L6-v2"
+    similarity_threshold: 0.60     # mayor ⇒ menos clusters
+audio_processing:
+  sample_rate: 16000
+  format: "wav"
+  diarization:
+    enabled: true
+    force_silence_only: true       # Use silence-based segmentation (no pyannote)
+    min_segment_duration: 0.5      # en segundos (clips cortos)
+    max_segment_duration: 10.0
+    silence_thresh: -40            # dBFS threshold for silence detection
+    min_silence_len: 500           # milliseconds
+  enable_voice_embeddings: true     # SpeechBrain ECAPA
+  speaker_embedding:
+    enabled: true
+  # Identificación de hablantes (clustering + Chroma)
+  voice_processing:
+    speaker_identification:
+      enabled: true
+      find_optimal_clusters: true
+      min_speakers: 1
+      max_speakers: 5
+      distance_threshold: 0.40
+asr:
+  # Controla la transcripción del audio completo además de los clips (útil para contexto global)
+  enable_full_transcription: true
+background_descriptor:
+  montage:
+    enable: true
+    max_frames: 12
+    grid: "auto"
+  description:
+    model: "salamandra-vision"  # o "gpt-4o-mini"
+    max_tokens: 512
+    temperature: 0.2
+identity:
+  timeline_mapping:
+    per_second_frames_source: "frames_per_second"
+    attach_faces_to:
+      - "keyframes"
+      - "audio_segments"
+    out_key: "persona"
+narration:
+  model: "salamandra-instruct"   # "salamandra-instruct" | "gpt-4o-mini"
+  une_guidelines_path: "UNE_153010.txt"
+  timing:
+    max_ad_duration_ratio: 0.60
+    min_gap_seconds: 1.20
+    min_ad_seconds: 0.80
+  llm:
+    max_tokens: 1024
+    temperature: 0.2
+subtitles:
+  max_chars_per_line: 42
+  max_lines_per_cue: 10
+  speaker_display: "brackets"  # "brackets" | "prefix" | "none"
+models:
+  # alias de tarea → modelo
+  instruct: "salamandra-instruct"
+  vision: "salamandra-vision"
+  tools: "salamandra-tools"
+  asr: "whisper-catalan"  # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)
+  routing:
+    use_remote_for:
+      - "salamandra-instruct"
+      - "salamandra-vision"
+      - "salamandra-tools"
+      - "whisper-catalan"
+remote_spaces:
+  user: "veureu"
+  endpoints:
+    salamandra-instruct:
+      space: "schat"
+      base_url: "https://veureu-schat.hf.space"
+      client: "gradio"
+      predict_route: "/predict"
+    salamandra-vision:
+      space: "svision"
+      base_url: "https://veureu-svision.hf.space"
+      client: "gradio"
+      predict_route: "/predict"
+    salamandra-tools:
+      space: "stools"
+      base_url: "https://veureu-stools.hf.space"
+      client: "gradio"
+      predict_route: "/predict"
+    whisper-catalan:
+      space: "asr"
+      base_url: "https://veureu-asr.hf.space"
+      client: "gradio"
+      predict_route: "/predict"
+  http:
+    timeout_seconds: 180
+    retries: 3
+    backoff_seconds: 2.0
+security:
+  use_hf_token: true
+  hf_token_env: "HF_TOKEN"
+  allow_insecure_tls: false
+logging:
+  level: "INFO"
+  json: false
+stools: false