|
|
|
|
|
|
|
|
|
|
|
|
|
|
engine: |
|
|
output_root: "results" |
|
|
|
|
|
api: |
|
|
cors_allow_origins: ["*"] |
|
|
sync_timeout_seconds: 3600 |
|
|
|
|
|
database: |
|
|
enabled: true |
|
|
persist_directory: "chroma_db" |
|
|
enable_face_recognition: true |
|
|
enable_voice_recognition: true |
|
|
face_collection: "index_faces" |
|
|
voice_collection: "index_voices" |
|
|
|
|
|
jobs: |
|
|
enabled: false |
|
|
max_workers: 1 |
|
|
result_ttl_seconds: 86400 |
|
|
|
|
|
video_processing: |
|
|
keyframes: |
|
|
conditional_extraction: |
|
|
enable: true |
|
|
min_scene_length_seconds: 1.5 |
|
|
difference_threshold: 28.0 |
|
|
|
|
|
frames_per_second: |
|
|
enable: true |
|
|
fps: 1.0 |
|
|
|
|
|
ocr: |
|
|
engine: "tesseract" |
|
|
language_hint: "spa" |
|
|
tesseract_cmd: "" |
|
|
|
|
|
faces: |
|
|
detector_model: "mtcnn" |
|
|
embedding_model: "Facenet512" |
|
|
min_face_size: 32 |
|
|
detection_confidence: 0.85 |
|
|
|
|
|
ocr_clustering: |
|
|
method: "sequential_similarity" |
|
|
sentence_transformer: "all-MiniLM-L6-v2" |
|
|
similarity_threshold: 0.60 |
|
|
|
|
|
audio_processing: |
|
|
sample_rate: 16000 |
|
|
format: "wav" |
|
|
|
|
|
diarization: |
|
|
enabled: true |
|
|
force_silence_only: true |
|
|
min_segment_duration: 0.5 |
|
|
max_segment_duration: 10.0 |
|
|
silence_thresh: -40 |
|
|
min_silence_len: 500 |
|
|
|
|
|
enable_voice_embeddings: true |
|
|
speaker_embedding: |
|
|
enabled: true |
|
|
|
|
|
|
|
|
voice_processing: |
|
|
speaker_identification: |
|
|
enabled: true |
|
|
find_optimal_clusters: true |
|
|
min_speakers: 1 |
|
|
max_speakers: 5 |
|
|
distance_threshold: 0.40 |
|
|
|
|
|
asr: |
|
|
|
|
|
enable_full_transcription: true |
|
|
|
|
|
background_descriptor: |
|
|
montage: |
|
|
enable: true |
|
|
max_frames: 12 |
|
|
grid: "auto" |
|
|
|
|
|
description: |
|
|
model: "salamandra-vision" |
|
|
max_tokens: 512 |
|
|
temperature: 0.2 |
|
|
|
|
|
identity: |
|
|
timeline_mapping: |
|
|
per_second_frames_source: "frames_per_second" |
|
|
attach_faces_to: |
|
|
- "keyframes" |
|
|
- "audio_segments" |
|
|
out_key: "persona" |
|
|
|
|
|
narration: |
|
|
model: "salamandra-instruct" |
|
|
une_guidelines_path: "UNE_153010.txt" |
|
|
timing: |
|
|
max_ad_duration_ratio: 0.60 |
|
|
min_gap_seconds: 1.20 |
|
|
min_ad_seconds: 0.80 |
|
|
llm: |
|
|
max_tokens: 1024 |
|
|
temperature: 0.2 |
|
|
|
|
|
subtitles: |
|
|
max_chars_per_line: 42 |
|
|
max_lines_per_cue: 10 |
|
|
speaker_display: "brackets" |
|
|
|
|
|
models: |
|
|
|
|
|
instruct: "salamandra-instruct" |
|
|
vision: "salamandra-vision" |
|
|
tools: "salamandra-tools" |
|
|
asr: "whisper-catalan" |
|
|
|
|
|
routing: |
|
|
use_remote_for: |
|
|
- "salamandra-instruct" |
|
|
- "salamandra-vision" |
|
|
- "salamandra-tools" |
|
|
- "whisper-catalan" |
|
|
|
|
|
remote_spaces: |
|
|
user: "veureu" |
|
|
|
|
|
endpoints: |
|
|
salamandra-instruct: |
|
|
space: "schat" |
|
|
base_url: "https://veureu-schat.hf.space" |
|
|
client: "gradio" |
|
|
predict_route: "/predict" |
|
|
|
|
|
salamandra-vision: |
|
|
space: "svision" |
|
|
base_url: "https://veureu-svision.hf.space" |
|
|
client: "gradio" |
|
|
predict_route: "/predict" |
|
|
|
|
|
salamandra-tools: |
|
|
space: "stools" |
|
|
base_url: "https://veureu-stools.hf.space" |
|
|
client: "gradio" |
|
|
predict_route: "/predict" |
|
|
|
|
|
whisper-catalan: |
|
|
space: "asr" |
|
|
base_url: "https://veureu-asr.hf.space" |
|
|
client: "gradio" |
|
|
predict_route: "/predict" |
|
|
|
|
|
http: |
|
|
timeout_seconds: 180 |
|
|
retries: 3 |
|
|
backoff_seconds: 2.0 |
|
|
|
|
|
security: |
|
|
use_hf_token: true |
|
|
hf_token_env: "HF_TOKEN" |
|
|
allow_insecure_tls: false |
|
|
|
|
|
logging: |
|
|
level: "INFO" |
|
|
json: false |
|
|
|
|
|
stools: false |
|
|
|