update dialect pho
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
Dialect Intelligence Engine
|
| 3 |
Author: Nisa Çayır
|
| 4 |
-
Core: Whisper Large-v3 + Türkçe Bölgesel Fonetik Analiz
|
| 5 |
Focus: Turkish dialects — vowel shifts, markers, prosody, phonetic signatures
|
| 6 |
"""
|
| 7 |
|
|
@@ -23,9 +23,15 @@ import librosa
|
|
| 23 |
import soundfile as sf
|
| 24 |
import torch
|
| 25 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
|
|
|
|
|
|
| 26 |
|
| 27 |
import gradio as gr
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# =========================================
|
| 30 |
# LOGGING SETUP
|
| 31 |
# =========================================
|
|
@@ -59,11 +65,11 @@ logger.info(f"Using device: {DEVICE}, dtype: {DTYPE}")
|
|
| 59 |
# MODEL INITIALIZATION
|
| 60 |
# =========================================
|
| 61 |
try:
|
| 62 |
-
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
)
|
| 68 |
|
| 69 |
model = model.to(DEVICE)
|
|
@@ -224,16 +230,16 @@ def run_asr(audio_data: np.ndarray, sample_rate: int) -> str:
|
|
| 224 |
# Ensure audio is float32 (Whisper expects fp32 input)
|
| 225 |
audio_float = audio_data.astype(np.float32)
|
| 226 |
|
| 227 |
-
|
| 228 |
audio_float,
|
| 229 |
sampling_rate=sample_rate,
|
| 230 |
-
|
| 231 |
)
|
| 232 |
|
| 233 |
# Move to device and cast to target dtype (fp16 on GPU, fp32 on CPU)
|
| 234 |
input_features = inputs.input_features.to(device=DEVICE, dtype=DTYPE)
|
| 235 |
|
| 236 |
-
|
| 237 |
generated_ids = model.generate(
|
| 238 |
input_features,
|
| 239 |
max_length=400,
|
|
@@ -485,25 +491,28 @@ def dialect_similarity(
|
|
| 485 |
# MAIN PROCESSING FUNCTION
|
| 486 |
# =========================================
|
| 487 |
def full_process(
|
| 488 |
-
audio: Optional[Tuple[int, np.ndarray]]
|
| 489 |
-
|
| 490 |
-
) -> Tuple[str, str, str, str, Dict[str, float]]:
|
| 491 |
"""
|
| 492 |
Full processing pipeline: audio → ASR → dialect analysis.
|
| 493 |
|
| 494 |
Args:
|
| 495 |
audio: Audio input (sample_rate, data) or None
|
| 496 |
-
region: Selected reference region
|
| 497 |
|
| 498 |
Returns:
|
| 499 |
-
Tuple of (transcription, reference, predicted_dialect, similarity, detail_scores)
|
| 500 |
"""
|
|
|
|
|
|
|
|
|
|
| 501 |
# ===========================
|
| 502 |
# AUDIO INPUT FIX (HF Spaces - Filepath Mode)
|
| 503 |
# ===========================
|
| 504 |
if audio is None:
|
| 505 |
logger.warning("Audio input is None - HF Spaces audio bug")
|
| 506 |
-
|
|
|
|
|
|
|
| 507 |
|
| 508 |
# Handle filepath (most stable for HF Spaces)
|
| 509 |
if isinstance(audio, str):
|
|
@@ -516,7 +525,9 @@ def full_process(
|
|
| 516 |
audio_data = librosa.to_mono(audio_data)
|
| 517 |
except Exception as e:
|
| 518 |
logger.error(f"Error reading audio file: {e}")
|
| 519 |
-
|
|
|
|
|
|
|
| 520 |
# Fallback: Handle dict format (for compatibility)
|
| 521 |
elif isinstance(audio, dict):
|
| 522 |
sample_rate = audio.get("sampling_rate", 16000)
|
|
@@ -529,12 +540,16 @@ def full_process(
|
|
| 529 |
logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
|
| 530 |
else:
|
| 531 |
logger.error(f"Unknown audio format: {type(audio)}")
|
| 532 |
-
|
|
|
|
|
|
|
| 533 |
|
| 534 |
# Short sanity check
|
| 535 |
if audio_data is None or len(audio_data) == 0:
|
| 536 |
logger.warning("Audio data is empty")
|
| 537 |
-
|
|
|
|
|
|
|
| 538 |
|
| 539 |
try:
|
| 540 |
logger.info(f"Processing audio: sr={sample_rate}, len={len(audio_data)}")
|
|
@@ -545,14 +560,18 @@ def full_process(
|
|
| 545 |
logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
|
| 546 |
except ValueError as e:
|
| 547 |
logger.error(f"Audio processing error: {e}")
|
| 548 |
-
|
|
|
|
|
|
|
| 549 |
|
| 550 |
# Validate duration
|
| 551 |
try:
|
| 552 |
validate_audio(audio_data, sample_rate)
|
| 553 |
except ValueError as e:
|
| 554 |
logger.error(f"Audio validation error: {e}")
|
| 555 |
-
|
|
|
|
|
|
|
| 556 |
|
| 557 |
# Run ASR
|
| 558 |
logger.info("Running ASR...")
|
|
@@ -561,10 +580,12 @@ def full_process(
|
|
| 561 |
logger.info(f"ASR output: {transcription}")
|
| 562 |
except ValueError as e:
|
| 563 |
logger.error(f"ASR error: {e}")
|
| 564 |
-
|
|
|
|
|
|
|
| 565 |
|
| 566 |
-
#
|
| 567 |
-
reference =
|
| 568 |
|
| 569 |
# Dialect analysis
|
| 570 |
logger.info("Running dialect classifier...")
|
|
@@ -576,12 +597,15 @@ def full_process(
|
|
| 576 |
)
|
| 577 |
except Exception as e:
|
| 578 |
logger.error(f"Dialect analysis error: {e}")
|
|
|
|
|
|
|
| 579 |
return (
|
| 580 |
transcription,
|
| 581 |
reference,
|
| 582 |
"Analiz hatası",
|
| 583 |
"0%",
|
| 584 |
-
{}
|
|
|
|
| 585 |
)
|
| 586 |
|
| 587 |
# Get best prediction
|
|
@@ -594,17 +618,23 @@ def full_process(
|
|
| 594 |
|
| 595 |
logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
|
| 596 |
|
|
|
|
|
|
|
|
|
|
| 597 |
return (
|
| 598 |
transcription,
|
| 599 |
reference,
|
| 600 |
best_region,
|
| 601 |
similarity_percent,
|
| 602 |
-
detail_scores
|
|
|
|
| 603 |
)
|
| 604 |
except Exception as e:
|
| 605 |
error_msg = f"Beklenmeyen hata: {e}"
|
| 606 |
logger.error(error_msg, exc_info=True)
|
| 607 |
-
|
|
|
|
|
|
|
| 608 |
|
| 609 |
|
| 610 |
# =========================================
|
|
@@ -940,24 +970,28 @@ def build_ui() -> gr.Blocks:
|
|
| 940 |
"""
|
| 941 |
<div style="text-align:center; margin:80px 0 60px 0; padding: 0 20px;">
|
| 942 |
<h1 style="font-size:4rem; font-weight:800; letter-spacing:-3px; margin-bottom:20px; line-height:1.1; background: linear-gradient(135deg, #1D1D1F 0%, #4A5568 50%, #1D1D1F 100%); background-size: 200% auto; -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; animation: shimmer 4s linear infinite;">
|
| 943 |
-
|
| 944 |
</h1>
|
| 945 |
<p style="color: #6E6E73; font-size:1.25rem; font-weight:400; letter-spacing:-0.3px; opacity:0.95; margin-top:12px;">
|
| 946 |
-
Whisper Large-v3
|
| 947 |
</p>
|
| 948 |
</div>
|
| 949 |
"""
|
| 950 |
)
|
| 951 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 952 |
with gr.Row(equal_height=False):
|
| 953 |
with gr.Column(scale=1, min_width=400):
|
| 954 |
with gr.Group(elem_classes="card"):
|
| 955 |
-
region_dropdown = gr.Dropdown(
|
| 956 |
-
choices=list(DIALECT_PROFILES.keys()),
|
| 957 |
-
value="Marmara",
|
| 958 |
-
label="Referans Bölge"
|
| 959 |
-
)
|
| 960 |
-
|
| 961 |
audio_input = gr.Audio(
|
| 962 |
sources=["microphone", "upload"],
|
| 963 |
type="filepath",
|
|
@@ -982,14 +1016,6 @@ def build_ui() -> gr.Blocks:
|
|
| 982 |
)
|
| 983 |
|
| 984 |
with gr.Row():
|
| 985 |
-
with gr.Column(scale=1):
|
| 986 |
-
with gr.Group(elem_classes="card"):
|
| 987 |
-
reference_output = gr.Textbox(
|
| 988 |
-
label="Referans Cümle",
|
| 989 |
-
interactive=False,
|
| 990 |
-
lines=2
|
| 991 |
-
)
|
| 992 |
-
|
| 993 |
with gr.Column(scale=1):
|
| 994 |
with gr.Group(elem_classes="card"):
|
| 995 |
predicted_dialect = gr.Textbox(
|
|
@@ -1010,16 +1036,22 @@ def build_ui() -> gr.Blocks:
|
|
| 1010 |
detailed_scores = gr.JSON(
|
| 1011 |
label="Bölgesel Skorlar"
|
| 1012 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1013 |
|
| 1014 |
start_button.click(
|
| 1015 |
fn=full_process,
|
| 1016 |
-
inputs=[audio_input
|
| 1017 |
outputs=[
|
| 1018 |
ASR_output,
|
| 1019 |
reference_output,
|
| 1020 |
predicted_dialect,
|
| 1021 |
similarity_percent,
|
| 1022 |
-
detailed_scores
|
|
|
|
| 1023 |
]
|
| 1024 |
)
|
| 1025 |
|
|
@@ -1269,133 +1301,98 @@ def dialect_similarity(
|
|
| 1269 |
|
| 1270 |
|
| 1271 |
# =========================================
|
| 1272 |
-
#
|
| 1273 |
# =========================================
|
| 1274 |
-
def
|
| 1275 |
-
audio: Optional[Tuple[int, np.ndarray]],
|
| 1276 |
-
region: str
|
| 1277 |
-
) -> Tuple[str, str, str, str, Dict[str, float]]:
|
| 1278 |
"""
|
| 1279 |
-
|
| 1280 |
|
| 1281 |
Args:
|
| 1282 |
-
|
| 1283 |
-
region: Selected reference region
|
| 1284 |
|
| 1285 |
Returns:
|
| 1286 |
-
|
| 1287 |
"""
|
| 1288 |
-
# ===========================
|
| 1289 |
-
# AUDIO INPUT FIX (HF Spaces - Filepath Mode)
|
| 1290 |
-
# ===========================
|
| 1291 |
-
if audio is None:
|
| 1292 |
-
logger.warning("Audio input is None - HF Spaces audio bug")
|
| 1293 |
-
return "Ses alınamadı. Lütfen tekrar deneyin.", "", "", "", {}
|
| 1294 |
-
|
| 1295 |
-
# Handle filepath (most stable for HF Spaces)
|
| 1296 |
-
if isinstance(audio, str):
|
| 1297 |
-
# Filepath mode - read with soundfile
|
| 1298 |
-
try:
|
| 1299 |
-
audio_data, sample_rate = sf.read(audio)
|
| 1300 |
-
logger.info(f"Received audio as filepath: {audio}, sr={sample_rate}, len={len(audio_data)}")
|
| 1301 |
-
# Convert to mono if stereo
|
| 1302 |
-
if len(audio_data.shape) > 1:
|
| 1303 |
-
audio_data = librosa.to_mono(audio_data)
|
| 1304 |
-
except Exception as e:
|
| 1305 |
-
logger.error(f"Error reading audio file: {e}")
|
| 1306 |
-
return f"Ses dosyası okunamadı: {e}", "", "", "", {}
|
| 1307 |
-
# Fallback: Handle dict format (for compatibility)
|
| 1308 |
-
elif isinstance(audio, dict):
|
| 1309 |
-
sample_rate = audio.get("sampling_rate", 16000)
|
| 1310 |
-
audio_data = np.array(audio.get("data"), dtype=np.float32)
|
| 1311 |
-
logger.info(f"Received audio as dict: sr={sample_rate}, len={len(audio_data)}")
|
| 1312 |
-
# Fallback: Handle tuple format (for compatibility)
|
| 1313 |
-
elif isinstance(audio, (list, tuple)) and len(audio) == 2:
|
| 1314 |
-
sample_rate, audio_data = audio
|
| 1315 |
-
audio_data = np.array(audio_data, dtype=np.float32)
|
| 1316 |
-
logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
|
| 1317 |
-
else:
|
| 1318 |
-
logger.error(f"Unknown audio format: {type(audio)}")
|
| 1319 |
-
return "Ses formatı tanınamadı.", "", "", "", {}
|
| 1320 |
-
|
| 1321 |
-
# Short sanity check
|
| 1322 |
-
if audio_data is None or len(audio_data) == 0:
|
| 1323 |
-
logger.warning("Audio data is empty")
|
| 1324 |
-
return "Ses içeriği boş görünüyor.", "", "", "", {}
|
| 1325 |
-
|
| 1326 |
try:
|
| 1327 |
-
|
|
|
|
|
|
|
|
|
|
| 1328 |
|
| 1329 |
-
#
|
| 1330 |
-
|
| 1331 |
-
audio_data, sample_rate = process_audio(audio_data, sample_rate)
|
| 1332 |
-
logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
|
| 1333 |
-
except ValueError as e:
|
| 1334 |
-
logger.error(f"Audio processing error: {e}")
|
| 1335 |
-
return str(e), "", "", "", {}
|
| 1336 |
-
|
| 1337 |
-
# Validate duration
|
| 1338 |
-
try:
|
| 1339 |
-
validate_audio(audio_data, sample_rate)
|
| 1340 |
-
except ValueError as e:
|
| 1341 |
-
logger.error(f"Audio validation error: {e}")
|
| 1342 |
-
return str(e), "", "", "", {}
|
| 1343 |
-
|
| 1344 |
-
# Run ASR
|
| 1345 |
-
logger.info("Running ASR...")
|
| 1346 |
-
try:
|
| 1347 |
-
transcription = run_asr(audio_data, sample_rate)
|
| 1348 |
-
logger.info(f"ASR output: {transcription}")
|
| 1349 |
-
except ValueError as e:
|
| 1350 |
-
logger.error(f"ASR error: {e}")
|
| 1351 |
-
return str(e), "", "", "", {}
|
| 1352 |
|
| 1353 |
-
#
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
|
| 1359 |
-
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
"0
|
| 1371 |
-
|
| 1372 |
-
|
| 1373 |
-
|
| 1374 |
-
|
| 1375 |
-
|
| 1376 |
-
|
| 1377 |
-
|
| 1378 |
-
|
| 1379 |
-
|
| 1380 |
-
similarity_percent = "0%"
|
| 1381 |
-
|
| 1382 |
-
logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
|
| 1383 |
|
| 1384 |
-
|
| 1385 |
-
|
| 1386 |
-
|
| 1387 |
-
|
| 1388 |
-
|
| 1389 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1390 |
)
|
|
|
|
|
|
|
| 1391 |
except Exception as e:
|
| 1392 |
-
|
| 1393 |
-
|
| 1394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1395 |
|
| 1396 |
|
| 1397 |
# =========================================
|
| 1398 |
-
# UI —
|
| 1399 |
# =========================================
|
| 1400 |
CSS = """
|
| 1401 |
* {
|
|
@@ -1559,4 +1556,6 @@ button.primary:active {
|
|
| 1559 |
::-webkit-scrollbar-thumb:hover {
|
| 1560 |
background: rgba(0, 0, 0, 0.3);
|
| 1561 |
}
|
| 1562 |
-
"""
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
Dialect Intelligence Engine
|
| 3 |
Author: Nisa Çayır
|
| 4 |
+
Core: Meta Omnilingual ASR + Whisper Large-v3 + Türkçe Bölgesel Fonetik Analiz
|
| 5 |
Focus: Turkish dialects — vowel shifts, markers, prosody, phonetic signatures
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 23 |
import soundfile as sf
|
| 24 |
import torch
|
| 25 |
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
| 26 |
+
import plotly.graph_objects as go
|
| 27 |
+
import plotly.express as px
|
| 28 |
|
| 29 |
import gradio as gr
|
| 30 |
|
| 31 |
+
# Import region data
|
| 32 |
+
from regions_geojson import TURKEY_REGIONS_GEOJSON
|
| 33 |
+
from region_assets import REGION_COLORS
|
| 34 |
+
|
| 35 |
# =========================================
|
| 36 |
# LOGGING SETUP
|
| 37 |
# =========================================
|
|
|
|
| 65 |
# MODEL INITIALIZATION
|
| 66 |
# =========================================
|
| 67 |
try:
|
| 68 |
+
processor = AutoProcessor.from_pretrained(MODEL_ID)
|
| 69 |
|
| 70 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
| 71 |
+
MODEL_ID,
|
| 72 |
+
torch_dtype=DTYPE
|
| 73 |
)
|
| 74 |
|
| 75 |
model = model.to(DEVICE)
|
|
|
|
| 230 |
# Ensure audio is float32 (Whisper expects fp32 input)
|
| 231 |
audio_float = audio_data.astype(np.float32)
|
| 232 |
|
| 233 |
+
inputs = processor(
|
| 234 |
audio_float,
|
| 235 |
sampling_rate=sample_rate,
|
| 236 |
+
return_tensors="pt"
|
| 237 |
)
|
| 238 |
|
| 239 |
# Move to device and cast to target dtype (fp16 on GPU, fp32 on CPU)
|
| 240 |
input_features = inputs.input_features.to(device=DEVICE, dtype=DTYPE)
|
| 241 |
|
| 242 |
+
with torch.no_grad():
|
| 243 |
generated_ids = model.generate(
|
| 244 |
input_features,
|
| 245 |
max_length=400,
|
|
|
|
| 491 |
# MAIN PROCESSING FUNCTION
|
| 492 |
# =========================================
|
| 493 |
def full_process(
|
| 494 |
+
audio: Optional[Tuple[int, np.ndarray]]
|
| 495 |
+
) -> Tuple[str, str, str, str, Dict[str, float], go.Figure]:
|
|
|
|
| 496 |
"""
|
| 497 |
Full processing pipeline: audio → ASR → dialect analysis.
|
| 498 |
|
| 499 |
Args:
|
| 500 |
audio: Audio input (sample_rate, data) or None
|
|
|
|
| 501 |
|
| 502 |
Returns:
|
| 503 |
+
Tuple of (transcription, reference, predicted_dialect, similarity, detail_scores, heatmap_figure)
|
| 504 |
"""
|
| 505 |
+
# Default reference sentence (always show Marmara reference)
|
| 506 |
+
default_reference = TEST_SENTENCES.get("Marmara", "")
|
| 507 |
+
|
| 508 |
# ===========================
|
| 509 |
# AUDIO INPUT FIX (HF Spaces - Filepath Mode)
|
| 510 |
# ===========================
|
| 511 |
if audio is None:
|
| 512 |
logger.warning("Audio input is None - HF Spaces audio bug")
|
| 513 |
+
empty_fig = go.Figure()
|
| 514 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 515 |
+
return "Ses alınamadı. Lütfen tekrar deneyin.", default_reference, "", "", {}, empty_fig
|
| 516 |
|
| 517 |
# Handle filepath (most stable for HF Spaces)
|
| 518 |
if isinstance(audio, str):
|
|
|
|
| 525 |
audio_data = librosa.to_mono(audio_data)
|
| 526 |
except Exception as e:
|
| 527 |
logger.error(f"Error reading audio file: {e}")
|
| 528 |
+
empty_fig = go.Figure()
|
| 529 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 530 |
+
return f"Ses dosyası okunamadı: {e}", default_reference, "", "", {}, empty_fig
|
| 531 |
# Fallback: Handle dict format (for compatibility)
|
| 532 |
elif isinstance(audio, dict):
|
| 533 |
sample_rate = audio.get("sampling_rate", 16000)
|
|
|
|
| 540 |
logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
|
| 541 |
else:
|
| 542 |
logger.error(f"Unknown audio format: {type(audio)}")
|
| 543 |
+
empty_fig = go.Figure()
|
| 544 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 545 |
+
return "Ses formatı tanınamadı.", default_reference, "", "", {}, empty_fig
|
| 546 |
|
| 547 |
# Short sanity check
|
| 548 |
if audio_data is None or len(audio_data) == 0:
|
| 549 |
logger.warning("Audio data is empty")
|
| 550 |
+
empty_fig = go.Figure()
|
| 551 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 552 |
+
return "Ses içeriği boş görünüyor.", default_reference, "", "", {}, empty_fig
|
| 553 |
|
| 554 |
try:
|
| 555 |
logger.info(f"Processing audio: sr={sample_rate}, len={len(audio_data)}")
|
|
|
|
| 560 |
logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
|
| 561 |
except ValueError as e:
|
| 562 |
logger.error(f"Audio processing error: {e}")
|
| 563 |
+
empty_fig = go.Figure()
|
| 564 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 565 |
+
return str(e), default_reference, "", "", {}, empty_fig
|
| 566 |
|
| 567 |
# Validate duration
|
| 568 |
try:
|
| 569 |
validate_audio(audio_data, sample_rate)
|
| 570 |
except ValueError as e:
|
| 571 |
logger.error(f"Audio validation error: {e}")
|
| 572 |
+
empty_fig = go.Figure()
|
| 573 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 574 |
+
return str(e), default_reference, "", "", {}, empty_fig
|
| 575 |
|
| 576 |
# Run ASR
|
| 577 |
logger.info("Running ASR...")
|
|
|
|
| 580 |
logger.info(f"ASR output: {transcription}")
|
| 581 |
except ValueError as e:
|
| 582 |
logger.error(f"ASR error: {e}")
|
| 583 |
+
empty_fig = go.Figure()
|
| 584 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 585 |
+
return str(e), default_reference, "", "", {}, empty_fig
|
| 586 |
|
| 587 |
+
# Always use default reference sentence
|
| 588 |
+
reference = default_reference
|
| 589 |
|
| 590 |
# Dialect analysis
|
| 591 |
logger.info("Running dialect classifier...")
|
|
|
|
| 597 |
)
|
| 598 |
except Exception as e:
|
| 599 |
logger.error(f"Dialect analysis error: {e}")
|
| 600 |
+
empty_fig = go.Figure()
|
| 601 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 602 |
return (
|
| 603 |
transcription,
|
| 604 |
reference,
|
| 605 |
"Analiz hatası",
|
| 606 |
"0%",
|
| 607 |
+
{},
|
| 608 |
+
empty_fig
|
| 609 |
)
|
| 610 |
|
| 611 |
# Get best prediction
|
|
|
|
| 618 |
|
| 619 |
logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
|
| 620 |
|
| 621 |
+
# Create heatmap
|
| 622 |
+
heatmap_fig = plot_region_heatmap(detail_scores)
|
| 623 |
+
|
| 624 |
return (
|
| 625 |
transcription,
|
| 626 |
reference,
|
| 627 |
best_region,
|
| 628 |
similarity_percent,
|
| 629 |
+
detail_scores,
|
| 630 |
+
heatmap_fig
|
| 631 |
)
|
| 632 |
except Exception as e:
|
| 633 |
error_msg = f"Beklenmeyen hata: {e}"
|
| 634 |
logger.error(error_msg, exc_info=True)
|
| 635 |
+
empty_fig = go.Figure()
|
| 636 |
+
empty_fig.update_layout(title="Harita yüklenemedi", height=600)
|
| 637 |
+
return error_msg, default_reference, "", "", {}, empty_fig
|
| 638 |
|
| 639 |
|
| 640 |
# =========================================
|
|
|
|
| 970 |
"""
|
| 971 |
<div style="text-align:center; margin:80px 0 60px 0; padding: 0 20px;">
|
| 972 |
<h1 style="font-size:4rem; font-weight:800; letter-spacing:-3px; margin-bottom:20px; line-height:1.1; background: linear-gradient(135deg, #1D1D1F 0%, #4A5568 50%, #1D1D1F 100%); background-size: 200% auto; -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; animation: shimmer 4s linear infinite;">
|
| 973 |
+
<span style="background: none; -webkit-text-fill-color: initial; color: #1D1D1F;">🇹🇷</span> Dialect Intelligence Engine
|
| 974 |
</h1>
|
| 975 |
<p style="color: #6E6E73; font-size:1.25rem; font-weight:400; letter-spacing:-0.3px; opacity:0.95; margin-top:12px;">
|
| 976 |
+
Powered by Meta Omnilingual ASR & Whisper Large-v3
|
| 977 |
</p>
|
| 978 |
</div>
|
| 979 |
"""
|
| 980 |
)
|
| 981 |
|
| 982 |
+
# Reference sentence (always visible, at top)
|
| 983 |
+
default_ref = TEST_SENTENCES.get("Marmara", "")
|
| 984 |
+
with gr.Group(elem_classes="card"):
|
| 985 |
+
reference_output = gr.Textbox(
|
| 986 |
+
label="Referans Cümle",
|
| 987 |
+
value=default_ref,
|
| 988 |
+
interactive=False,
|
| 989 |
+
lines=2
|
| 990 |
+
)
|
| 991 |
+
|
| 992 |
with gr.Row(equal_height=False):
|
| 993 |
with gr.Column(scale=1, min_width=400):
|
| 994 |
with gr.Group(elem_classes="card"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
audio_input = gr.Audio(
|
| 996 |
sources=["microphone", "upload"],
|
| 997 |
type="filepath",
|
|
|
|
| 1016 |
)
|
| 1017 |
|
| 1018 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1019 |
with gr.Column(scale=1):
|
| 1020 |
with gr.Group(elem_classes="card"):
|
| 1021 |
predicted_dialect = gr.Textbox(
|
|
|
|
| 1036 |
detailed_scores = gr.JSON(
|
| 1037 |
label="Bölgesel Skorlar"
|
| 1038 |
)
|
| 1039 |
+
|
| 1040 |
+
with gr.Group(elem_classes="card"):
|
| 1041 |
+
region_map = gr.Plot(
|
| 1042 |
+
label="Bölgesel Harita"
|
| 1043 |
+
)
|
| 1044 |
|
| 1045 |
start_button.click(
|
| 1046 |
fn=full_process,
|
| 1047 |
+
inputs=[audio_input],
|
| 1048 |
outputs=[
|
| 1049 |
ASR_output,
|
| 1050 |
reference_output,
|
| 1051 |
predicted_dialect,
|
| 1052 |
similarity_percent,
|
| 1053 |
+
detailed_scores,
|
| 1054 |
+
region_map
|
| 1055 |
]
|
| 1056 |
)
|
| 1057 |
|
|
|
|
| 1301 |
|
| 1302 |
|
| 1303 |
# =========================================
|
| 1304 |
+
# VISUALIZATION
|
| 1305 |
# =========================================
|
| 1306 |
+
def plot_region_heatmap(scores: Dict[str, float]) -> go.Figure:
|
|
|
|
|
|
|
|
|
|
| 1307 |
"""
|
| 1308 |
+
Create a Plotly heatmap map of Turkey with regional dialect scores.
|
| 1309 |
|
| 1310 |
Args:
|
| 1311 |
+
scores: Dictionary of region names to similarity scores
|
|
|
|
| 1312 |
|
| 1313 |
Returns:
|
| 1314 |
+
Plotly figure object
|
| 1315 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1316 |
try:
|
| 1317 |
+
# Normalize scores to 0-1 range for better visualization
|
| 1318 |
+
max_score = max(scores.values()) if scores.values() else 1.0
|
| 1319 |
+
min_score = min(scores.values()) if scores.values() else 0.0
|
| 1320 |
+
score_range = max_score - min_score if max_score > min_score else 1.0
|
| 1321 |
|
| 1322 |
+
# Create figure
|
| 1323 |
+
fig = go.Figure()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
|
| 1325 |
+
# Add each region as a choropleth feature
|
| 1326 |
+
for feature in TURKEY_REGIONS_GEOJSON["features"]:
|
| 1327 |
+
region_name = feature["properties"]["name"]
|
| 1328 |
+
score = scores.get(region_name, 0.0)
|
| 1329 |
+
|
| 1330 |
+
# Normalize score to 0-1
|
| 1331 |
+
normalized = (score - min_score) / score_range if score_range > 0 else 0.0
|
| 1332 |
+
|
| 1333 |
+
# Get base color for region
|
| 1334 |
+
base_color = REGION_COLORS.get(region_name, "#CCCCCC")
|
| 1335 |
+
|
| 1336 |
+
# Convert hex to rgb and add opacity based on score
|
| 1337 |
+
rgb = tuple(int(base_color[j:j+2], 16) for j in (1, 3, 5))
|
| 1338 |
+
rgba = f"rgba({rgb[0]}, {rgb[1]}, {rgb[2]}, {0.3 + normalized * 0.7})"
|
| 1339 |
+
|
| 1340 |
+
fig.add_trace(go.Scattergeo(
|
| 1341 |
+
lon=[coord[0] for coord in feature["geometry"]["coordinates"][0]],
|
| 1342 |
+
lat=[coord[1] for coord in feature["geometry"]["coordinates"][0]],
|
| 1343 |
+
mode='lines',
|
| 1344 |
+
fill='toself',
|
| 1345 |
+
fillcolor=rgba,
|
| 1346 |
+
line=dict(color=base_color, width=2),
|
| 1347 |
+
name=region_name,
|
| 1348 |
+
text=f"{region_name}<br>Benzerlik: {score*100:.1f}%",
|
| 1349 |
+
hovertemplate='<b>%{text}</b><extra></extra>',
|
| 1350 |
+
showlegend=False
|
| 1351 |
+
))
|
|
|
|
|
|
|
|
|
|
| 1352 |
|
| 1353 |
+
# Update layout
|
| 1354 |
+
fig.update_layout(
|
| 1355 |
+
title={
|
| 1356 |
+
'text': 'Türkiye Bölgesel Şive Benzerlik Haritası',
|
| 1357 |
+
'x': 0.5,
|
| 1358 |
+
'xanchor': 'center',
|
| 1359 |
+
'font': {'size': 20, 'family': 'Arial, sans-serif'}
|
| 1360 |
+
},
|
| 1361 |
+
geo=dict(
|
| 1362 |
+
projection_type='mercator',
|
| 1363 |
+
showland=True,
|
| 1364 |
+
landcolor='rgb(243, 243, 243)',
|
| 1365 |
+
showocean=True,
|
| 1366 |
+
oceancolor='rgb(230, 240, 255)',
|
| 1367 |
+
showlakes=True,
|
| 1368 |
+
lakecolor='rgb(230, 240, 255)',
|
| 1369 |
+
showcountries=True,
|
| 1370 |
+
countrycolor='rgb(200, 200, 200)',
|
| 1371 |
+
lonaxis=dict(range=[25, 45]),
|
| 1372 |
+
lataxis=dict(range=[35, 43]),
|
| 1373 |
+
bgcolor='rgba(0,0,0,0)'
|
| 1374 |
+
),
|
| 1375 |
+
height=600,
|
| 1376 |
+
margin=dict(l=0, r=0, t=50, b=0),
|
| 1377 |
+
paper_bgcolor='rgba(0,0,0,0)',
|
| 1378 |
+
plot_bgcolor='rgba(0,0,0,0)',
|
| 1379 |
+
font=dict(family="Arial, sans-serif", size=12)
|
| 1380 |
)
|
| 1381 |
+
|
| 1382 |
+
return fig
|
| 1383 |
except Exception as e:
|
| 1384 |
+
logger.error(f"Error creating heatmap: {e}")
|
| 1385 |
+
# Return empty figure on error
|
| 1386 |
+
fig = go.Figure()
|
| 1387 |
+
fig.update_layout(
|
| 1388 |
+
title="Harita yüklenemedi",
|
| 1389 |
+
height=600
|
| 1390 |
+
)
|
| 1391 |
+
return fig
|
| 1392 |
|
| 1393 |
|
| 1394 |
# =========================================
|
| 1395 |
+
# UI — Ultra Modern Apple Glassmorphism Design
|
| 1396 |
# =========================================
|
| 1397 |
CSS = """
|
| 1398 |
* {
|
|
|
|
| 1556 |
::-webkit-scrollbar-thumb:hover {
|
| 1557 |
background: rgba(0, 0, 0, 0.3);
|
| 1558 |
}
|
| 1559 |
+
"""
|
| 1560 |
+
|
| 1561 |
+
|