VeuReu commited on
Commit
cc083dd
·
verified ·
1 Parent(s): 14e190b

Update config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +172 -170
config.yaml CHANGED
@@ -1,170 +1,172 @@
1
- # ===========================
2
- # Veureu Engine – config.yaml
3
- # ===========================
4
-
5
- engine:
6
- output_root: "results"
7
-
8
- api:
9
- cors_allow_origins: ["*"]
10
- sync_timeout_seconds: 3600
11
-
12
- database:
13
- enabled: true
14
- persist_directory: "chroma_db"
15
- enable_face_recognition: true
16
- enable_voice_recognition: true
17
- face_collection: "index_faces"
18
- voice_collection: "index_voices"
19
-
20
- jobs:
21
- enabled: false # si activas cola async, cámbialo a true y añade JobManager en main_api.py
22
- max_workers: 1
23
- result_ttl_seconds: 86400
24
-
25
- video_processing:
26
- keyframes:
27
- conditional_extraction:
28
- enable: true
29
- min_scene_length_seconds: 1.5
30
- difference_threshold: 28.0
31
-
32
- frames_per_second:
33
- enable: true
34
- fps: 1.0 # Frecuencia de frames de análisis
35
-
36
- ocr:
37
- engine: "tesseract" # "tesseract" | "easyocr"
38
- language_hint: "spa"
39
- tesseract_cmd: "" # si no está en PATH, deja la ruta
40
-
41
- faces:
42
- detector_model: "mtcnn" # ajusta a tu vision_tools
43
- embedding_model: "Facenet512" # usado por FaceOfImageEmbedding
44
- min_face_size: 32
45
- detection_confidence: 0.85
46
-
47
- ocr_clustering:
48
- method: "sequential_similarity"
49
- sentence_transformer: "all-MiniLM-L6-v2"
50
- similarity_threshold: 0.60 # mayor ⇒ menos clusters
51
-
52
- audio_processing:
53
- sample_rate: 16000
54
- format: "wav"
55
-
56
- diarization:
57
- enabled: true
58
- force_silence_only: true # Use silence-based segmentation (no pyannote)
59
- min_segment_duration: 0.5 # en segundos (clips cortos)
60
- max_segment_duration: 10.0
61
- silence_thresh: -40 # dBFS threshold for silence detection
62
- min_silence_len: 500 # milliseconds
63
-
64
- enable_voice_embeddings: true # SpeechBrain ECAPA
65
- speaker_embedding:
66
- enabled: true
67
-
68
- # Identificación de hablantes (clustering + Chroma)
69
- voice_processing:
70
- speaker_identification:
71
- enabled: true
72
- find_optimal_clusters: true
73
- min_speakers: 1
74
- max_speakers: 5
75
- distance_threshold: 0.40
76
-
77
- asr:
78
- # Controla la transcripción del audio completo además de los clips (útil para contexto global)
79
- enable_full_transcription: true
80
-
81
- background_descriptor:
82
- montage:
83
- enable: true
84
- max_frames: 12
85
- grid: "auto"
86
-
87
- description:
88
- model: "salamandra-vision" # o "gpt-4o-mini"
89
- max_tokens: 512
90
- temperature: 0.2
91
-
92
- identity:
93
- timeline_mapping:
94
- per_second_frames_source: "frames_per_second"
95
- attach_faces_to:
96
- - "keyframes"
97
- - "audio_segments"
98
- out_key: "persona"
99
-
100
- narration:
101
- model: "salamandra-instruct" # "salamandra-instruct" | "gpt-4o-mini"
102
- une_guidelines_path: "UNE_153010.txt"
103
- timing:
104
- max_ad_duration_ratio: 0.60
105
- min_gap_seconds: 1.20
106
- min_ad_seconds: 0.80
107
- llm:
108
- max_tokens: 1024
109
- temperature: 0.2
110
-
111
- subtitles:
112
- max_chars_per_line: 42
113
- max_lines_per_cue: 10
114
- speaker_display: "brackets" # "brackets" | "prefix" | "none"
115
-
116
- models:
117
- # alias de tarea → modelo
118
- instruct: "salamandra-instruct"
119
- vision: "salamandra-vision"
120
- tools: "salamandra-tools"
121
- asr: "whisper-catalan" # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)
122
-
123
- routing:
124
- use_remote_for:
125
- - "salamandra-instruct"
126
- - "salamandra-vision"
127
- - "salamandra-tools"
128
- - "whisper-catalan"
129
-
130
- remote_spaces:
131
- user: "veureu"
132
-
133
- endpoints:
134
- salamandra-instruct:
135
- space: "schat"
136
- base_url: "https://veureu-schat.hf.space"
137
- client: "gradio"
138
- predict_route: "/predict"
139
-
140
- salamandra-vision:
141
- space: "svision"
142
- base_url: "https://veureu-svision.hf.space"
143
- client: "gradio"
144
- predict_route: "/predict"
145
-
146
- salamandra-tools:
147
- space: "stools"
148
- base_url: "https://veureu-stools.hf.space"
149
- client: "gradio"
150
- predict_route: "/predict"
151
-
152
- whisper-catalan:
153
- space: "asr"
154
- base_url: "https://veureu-asr.hf.space"
155
- client: "gradio"
156
- predict_route: "/predict"
157
-
158
- http:
159
- timeout_seconds: 180
160
- retries: 3
161
- backoff_seconds: 2.0
162
-
163
- security:
164
- use_hf_token: true
165
- hf_token_env: "HF_TOKEN"
166
- allow_insecure_tls: false
167
-
168
- logging:
169
- level: "INFO"
170
- json: false
 
 
 
1
+ # ===========================
2
+ # Veureu Engine – config.yaml
3
+ # ===========================
4
+
5
+ engine:
6
+ output_root: "results"
7
+
8
+ api:
9
+ cors_allow_origins: ["*"]
10
+ sync_timeout_seconds: 3600
11
+
12
+ database:
13
+ enabled: true
14
+ persist_directory: "chroma_db"
15
+ enable_face_recognition: true
16
+ enable_voice_recognition: true
17
+ face_collection: "index_faces"
18
+ voice_collection: "index_voices"
19
+
20
+ jobs:
21
+ enabled: false # si activas cola async, cámbialo a true y añade JobManager en main_api.py
22
+ max_workers: 1
23
+ result_ttl_seconds: 86400
24
+
25
+ video_processing:
26
+ keyframes:
27
+ conditional_extraction:
28
+ enable: true
29
+ min_scene_length_seconds: 1.5
30
+ difference_threshold: 28.0
31
+
32
+ frames_per_second:
33
+ enable: true
34
+ fps: 1.0 # Frecuencia de frames de análisis
35
+
36
+ ocr:
37
+ engine: "tesseract" # "tesseract" | "easyocr"
38
+ language_hint: "spa"
39
+ tesseract_cmd: "" # si no está en PATH, deja la ruta
40
+
41
+ faces:
42
+ detector_model: "mtcnn" # ajusta a tu vision_tools
43
+ embedding_model: "Facenet512" # usado por FaceOfImageEmbedding
44
+ min_face_size: 32
45
+ detection_confidence: 0.85
46
+
47
+ ocr_clustering:
48
+ method: "sequential_similarity"
49
+ sentence_transformer: "all-MiniLM-L6-v2"
50
+ similarity_threshold: 0.60 # mayor ⇒ menos clusters
51
+
52
+ audio_processing:
53
+ sample_rate: 16000
54
+ format: "wav"
55
+
56
+ diarization:
57
+ enabled: true
58
+ force_silence_only: true # Use silence-based segmentation (no pyannote)
59
+ min_segment_duration: 0.5 # en segundos (clips cortos)
60
+ max_segment_duration: 10.0
61
+ silence_thresh: -40 # dBFS threshold for silence detection
62
+ min_silence_len: 500 # milliseconds
63
+
64
+ enable_voice_embeddings: true # SpeechBrain ECAPA
65
+ speaker_embedding:
66
+ enabled: true
67
+
68
+ # Identificación de hablantes (clustering + Chroma)
69
+ voice_processing:
70
+ speaker_identification:
71
+ enabled: true
72
+ find_optimal_clusters: true
73
+ min_speakers: 1
74
+ max_speakers: 5
75
+ distance_threshold: 0.40
76
+
77
+ asr:
78
+ # Controla la transcripción del audio completo además de los clips (útil para contexto global)
79
+ enable_full_transcription: true
80
+
81
+ background_descriptor:
82
+ montage:
83
+ enable: true
84
+ max_frames: 12
85
+ grid: "auto"
86
+
87
+ description:
88
+ model: "salamandra-vision" # o "gpt-4o-mini"
89
+ max_tokens: 512
90
+ temperature: 0.2
91
+
92
+ identity:
93
+ timeline_mapping:
94
+ per_second_frames_source: "frames_per_second"
95
+ attach_faces_to:
96
+ - "keyframes"
97
+ - "audio_segments"
98
+ out_key: "persona"
99
+
100
+ narration:
101
+ model: "salamandra-instruct" # "salamandra-instruct" | "gpt-4o-mini"
102
+ une_guidelines_path: "UNE_153010.txt"
103
+ timing:
104
+ max_ad_duration_ratio: 0.60
105
+ min_gap_seconds: 1.20
106
+ min_ad_seconds: 0.80
107
+ llm:
108
+ max_tokens: 1024
109
+ temperature: 0.2
110
+
111
+ subtitles:
112
+ max_chars_per_line: 42
113
+ max_lines_per_cue: 10
114
+ speaker_display: "brackets" # "brackets" | "prefix" | "none"
115
+
116
+ models:
117
+ # alias de tarea → modelo
118
+ instruct: "salamandra-instruct"
119
+ vision: "salamandra-vision"
120
+ tools: "salamandra-tools"
121
+ asr: "whisper-catalan" # apunta al Space veureu/asr (Aina: faster-whisper-large-v3-ca-3catparla)
122
+
123
+ routing:
124
+ use_remote_for:
125
+ - "salamandra-instruct"
126
+ - "salamandra-vision"
127
+ - "salamandra-tools"
128
+ - "whisper-catalan"
129
+
130
+ remote_spaces:
131
+ user: "veureu"
132
+
133
+ endpoints:
134
+ salamandra-instruct:
135
+ space: "schat"
136
+ base_url: "https://veureu-schat.hf.space"
137
+ client: "gradio"
138
+ predict_route: "/predict"
139
+
140
+ salamandra-vision:
141
+ space: "svision"
142
+ base_url: "https://veureu-svision.hf.space"
143
+ client: "gradio"
144
+ predict_route: "/predict"
145
+
146
+ salamandra-tools:
147
+ space: "stools"
148
+ base_url: "https://veureu-stools.hf.space"
149
+ client: "gradio"
150
+ predict_route: "/predict"
151
+
152
+ whisper-catalan:
153
+ space: "asr"
154
+ base_url: "https://veureu-asr.hf.space"
155
+ client: "gradio"
156
+ predict_route: "/predict"
157
+
158
+ http:
159
+ timeout_seconds: 180
160
+ retries: 3
161
+ backoff_seconds: 2.0
162
+
163
+ security:
164
+ use_hf_token: true
165
+ hf_token_env: "HF_TOKEN"
166
+ allow_insecure_tls: false
167
+
168
+ logging:
169
+ level: "INFO"
170
+ json: false
171
+
172
+ stools: false