VeuReu commited on
Commit
d350726
·
verified ·
1 Parent(s): b396be9

Upload 2 files

Browse files
Files changed (1) hide show
  1. character_detection.py +33 -42
character_detection.py CHANGED
@@ -18,20 +18,12 @@ from typing import List, Dict, Any, Tuple
18
 
19
  # Imports de las herramientas de vision y audio desde los módulos de la raíz
20
  try:
21
- # Vision tools del engine (ya incluye DeepFace y face_recognition)
22
- from vision_tools import FaceOfImageEmbedding
23
- VISION_TOOLS_AVAILABLE = True
24
  except Exception as e:
25
- VISION_TOOLS_AVAILABLE = False
26
- logging.warning(f"Vision tools no disponibles: {e}")
27
-
28
- try:
29
- # Audio tools del engine
30
- from audio_tools import extract_audio_ffmpeg_simple, diarize_with_pyannote, get_speaker_embeddings
31
- AUDIO_TOOLS_AVAILABLE = True
32
- except Exception as e:
33
- AUDIO_TOOLS_AVAILABLE = False
34
- logging.warning(f"Audio tools no disponibles: {e}")
35
 
36
  logging.basicConfig(level=logging.INFO)
37
  logger = logging.getLogger(__name__)
@@ -62,29 +54,29 @@ class CharacterDetector:
62
 
63
  def extract_faces_embeddings(self) -> List[Dict[str, Any]]:
64
  """
65
- Extrae caras del vídeo y calcula sus embeddings usando FaceOfImageEmbedding.
66
 
67
  Returns:
68
  Lista de dicts con {"embeddings": [...], "path": "..."}
69
  """
70
- if not VISION_TOOLS_AVAILABLE:
71
- logger.warning("Vision tools no disponibles, retornando lista vacía")
72
  return []
73
 
74
- logger.info("Extrayendo caras del vídeo...")
75
-
76
- # Inicializar el embedder (usa face_recognition o DeepFace automáticamente)
77
- embedder = FaceOfImageEmbedding(deepface_model='Facenet512')
78
 
79
  extract_every = 1.0 # segundos
80
  video = cv2.VideoCapture(self.video_path)
81
  fps = int(video.get(cv2.CAP_PROP_FPS))
 
82
  frame_interval = int(fps * extract_every)
83
  frame_count = 0
84
  saved_count = 0
85
 
86
  embeddings_caras = []
87
 
 
 
88
  while True:
89
  ret, frame = video.read()
90
  if not ret:
@@ -95,35 +87,34 @@ class CharacterDetector:
95
  cv2.imwrite(str(temp_path), frame)
96
 
97
  try:
98
- # Extraer embeddings usando FaceOfImageEmbedding
99
- # Devuelve una lista de embeddings (uno por cada cara detectada)
100
- embeddings_list = embedder.encode_image(temp_path)
 
 
 
 
 
101
 
102
- if embeddings_list:
103
- # Si es una lista de listas (múltiples caras)
104
- if isinstance(embeddings_list[0], list):
105
- for i, embedding in enumerate(embeddings_list):
106
- save_path = self.faces_dir / f"frame_{saved_count:04d}.jpg"
107
- # Guardar el frame completo (la extracción de cara ya se hizo internamente)
108
- cv2.imwrite(str(save_path), frame)
109
-
110
- embeddings_caras.append({
111
- "embeddings": embedding,
112
- "path": str(save_path),
113
- "frame": frame_count
114
- })
115
- saved_count += 1
116
- else:
117
- # Si es un solo embedding
118
  save_path = self.faces_dir / f"frame_{saved_count:04d}.jpg"
119
  cv2.imwrite(str(save_path), frame)
120
 
121
  embeddings_caras.append({
122
- "embeddings": embeddings_list,
123
  "path": str(save_path),
124
- "frame": frame_count
 
125
  })
126
  saved_count += 1
 
 
 
127
 
128
  except Exception as e:
129
  logger.debug(f"No se detectaron caras en frame {frame_count}: {e}")
@@ -134,7 +125,7 @@ class CharacterDetector:
134
  frame_count += 1
135
 
136
  video.release()
137
- logger.info(f"Caras extraídas: {len(embeddings_caras)}")
138
  return embeddings_caras
139
 
140
  def extract_voices_embeddings(self) -> List[Dict[str, Any]]:
 
18
 
19
  # Imports de las herramientas de vision y audio desde los módulos de la raíz
20
  try:
21
+ # DeepFace para detección y embeddings de caras
22
+ from deepface import DeepFace
23
+ DEEPFACE_AVAILABLE = True
24
  except Exception as e:
25
+ DEEPFACE_AVAILABLE = False
26
+ logging.warning(f"DeepFace no disponible: {e}")
 
 
 
 
 
 
 
 
27
 
28
  logging.basicConfig(level=logging.INFO)
29
  logger = logging.getLogger(__name__)
 
54
 
55
  def extract_faces_embeddings(self) -> List[Dict[str, Any]]:
56
  """
57
+ Extrae caras del vídeo y calcula sus embeddings usando DeepFace directamente.
58
 
59
  Returns:
60
  Lista de dicts con {"embeddings": [...], "path": "..."}
61
  """
62
+ if not DEEPFACE_AVAILABLE:
63
+ logger.warning("DeepFace no disponible, retornando lista vacía")
64
  return []
65
 
66
+ logger.info("Extrayendo caras del vídeo con DeepFace...")
 
 
 
67
 
68
  extract_every = 1.0 # segundos
69
  video = cv2.VideoCapture(self.video_path)
70
  fps = int(video.get(cv2.CAP_PROP_FPS))
71
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
72
  frame_interval = int(fps * extract_every)
73
  frame_count = 0
74
  saved_count = 0
75
 
76
  embeddings_caras = []
77
 
78
+ logger.info(f"Total frames: {total_frames}, FPS: {fps}, Procesando cada {frame_interval} frames")
79
+
80
  while True:
81
  ret, frame = video.read()
82
  if not ret:
 
87
  cv2.imwrite(str(temp_path), frame)
88
 
89
  try:
90
+ # Extraer embeddings con DeepFace
91
+ # represent() devuelve una lista de dicts, uno por cada cara detectada
92
+ face_objs = DeepFace.represent(
93
+ img_path=str(temp_path),
94
+ model_name='Facenet512',
95
+ detector_backend='opencv',
96
+ enforce_detection=False
97
+ )
98
 
99
+ if face_objs:
100
+ for i, face_obj in enumerate(face_objs):
101
+ embedding = face_obj['embedding']
102
+ facial_area = face_obj.get('facial_area', {})
103
+
104
+ # Guardar el frame completo
 
 
 
 
 
 
 
 
 
 
105
  save_path = self.faces_dir / f"frame_{saved_count:04d}.jpg"
106
  cv2.imwrite(str(save_path), frame)
107
 
108
  embeddings_caras.append({
109
+ "embeddings": embedding,
110
  "path": str(save_path),
111
+ "frame": frame_count,
112
+ "facial_area": facial_area
113
  })
114
  saved_count += 1
115
+
116
+ if frame_count % (frame_interval * 10) == 0:
117
+ logger.info(f"Progreso: frame {frame_count}/{total_frames}, caras detectadas: {saved_count}")
118
 
119
  except Exception as e:
120
  logger.debug(f"No se detectaron caras en frame {frame_count}: {e}")
 
125
  frame_count += 1
126
 
127
  video.release()
128
+ logger.info(f"Caras extraídas: {len(embeddings_caras)}")
129
  return embeddings_caras
130
 
131
  def extract_voices_embeddings(self) -> List[Dict[str, Any]]: