VeuReu commited on
Commit
1eaad0c
·
verified ·
1 Parent(s): 648c0b6

Upload 2 files

Browse files
Files changed (1) hide show
  1. audio_tools.py +17 -7
audio_tools.py CHANGED
@@ -115,7 +115,7 @@ def transcribe_audio_remote(audio_path: str | Path, cfg: Dict[str, Any]) -> Dict
115
  model_name = (cfg.get("models", {}).get("asr") or "whisper-catalan")
116
  params = {
117
  "language": "ca",
118
- "model": "faster-whisper-large-v3-ca-3catparla",
119
  "timestamps": True,
120
  "diarization": False, # diarization stays local
121
  }
@@ -252,6 +252,12 @@ class VoiceEmbedder:
252
  self.model.eval()
253
 
254
  def embed(self, wav_path: str) -> List[float]:
 
 
 
 
 
 
255
  if HAS_TORCHAUDIO:
256
  waveform, sr = ta.load(wav_path)
257
  target_sr = 16000
@@ -262,9 +268,12 @@ class VoiceEmbedder:
262
  min_samples = int(0.2 * target_sr)
263
  if waveform.shape[1] < min_samples:
264
  pad = min_samples - waveform.shape[1]
265
- import torch
266
- waveform = torch.cat([waveform, torch.zeros((1, pad))], dim=1)
267
- with torch.no_grad(): # type: ignore
 
 
 
268
  emb = self.model.encode_batch(waveform).squeeze().cpu().numpy().astype(float)
269
  return emb.tolist()
270
  else:
@@ -272,9 +281,10 @@ class VoiceEmbedder:
272
  min_len = int(0.2 * 16000)
273
  if len(y) < min_len:
274
  y = np.pad(y, (0, min_len - len(y)))
275
- import torch
276
- w = torch.from_numpy(y).unsqueeze(0).unsqueeze(0)
277
- with torch.no_grad(): # type: ignore
 
278
  emb = self.model.encode_batch(w).squeeze().cpu().numpy().astype(float)
279
  return emb.tolist()
280
 
 
115
  model_name = (cfg.get("models", {}).get("asr") or "whisper-catalan")
116
  params = {
117
  "language": "ca",
118
+ # remote ASR model is configured server-side; avoid 'model' to not clash with router arg
119
  "timestamps": True,
120
  "diarization": False, # diarization stays local
121
  }
 
252
  self.model.eval()
253
 
254
  def embed(self, wav_path: str) -> List[float]:
255
+ # ensure we have a torch handle without creating a local var that shadows outer scope
256
+ try:
257
+ import torch as _torch # local alias, avoids scoping issues
258
+ except Exception:
259
+ _torch = None # type: ignore
260
+
261
  if HAS_TORCHAUDIO:
262
  waveform, sr = ta.load(wav_path)
263
  target_sr = 16000
 
268
  min_samples = int(0.2 * target_sr)
269
  if waveform.shape[1] < min_samples:
270
  pad = min_samples - waveform.shape[1]
271
+ if _torch is None:
272
+ raise RuntimeError("Torch not available for padding")
273
+ waveform = _torch.cat([waveform, _torch.zeros((1, pad))], dim=1)
274
+ if _torch is None:
275
+ raise RuntimeError("Torch not available for inference")
276
+ with _torch.no_grad(): # type: ignore
277
  emb = self.model.encode_batch(waveform).squeeze().cpu().numpy().astype(float)
278
  return emb.tolist()
279
  else:
 
281
  min_len = int(0.2 * 16000)
282
  if len(y) < min_len:
283
  y = np.pad(y, (0, min_len - len(y)))
284
+ if _torch is None:
285
+ raise RuntimeError("Torch not available for inference")
286
+ w = _torch.from_numpy(y).unsqueeze(0).unsqueeze(0)
287
+ with _torch.no_grad(): # type: ignore
288
  emb = self.model.encode_batch(w).squeeze().cpu().numpy().astype(float)
289
  return emb.tolist()
290