Spaces:

nisacayir
/

dialect-map-turkiye

Paused

App Files Files Community

nisacayir commited on Nov 13

Commit

2dbcd48

verified ·

1 Parent(s): 7739564

update dialect pho

Browse files

Files changed (1) hide show

app.py +154 -155

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Dialect Intelligence Engine
 Author: Nisa Çayır
-Core: Whisper Large-v3 + Türkçe Bölgesel Fonetik Analiz
 Focus: Turkish dialects — vowel shifts, markers, prosody, phonetic signatures
 """
@@ -23,9 +23,15 @@ import librosa
 import soundfile as sf
 import torch
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 import gradio as gr
 # =========================================
 # LOGGING SETUP
 # =========================================
@@ -59,11 +65,11 @@ logger.info(f"Using device: {DEVICE}, dtype: {DTYPE}")
 # MODEL INITIALIZATION
 # =========================================
 try:
-    processor = AutoProcessor.from_pretrained(MODEL_ID)
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(
-        MODEL_ID,
-        torch_dtype=DTYPE
     )
     model = model.to(DEVICE)
@@ -224,16 +230,16 @@ def run_asr(audio_data: np.ndarray, sample_rate: int) -> str:
         # Ensure audio is float32 (Whisper expects fp32 input)
         audio_float = audio_data.astype(np.float32)
-        inputs = processor(
             audio_float,
             sampling_rate=sample_rate,
-            return_tensors="pt"
         )
         # Move to device and cast to target dtype (fp16 on GPU, fp32 on CPU)
         input_features = inputs.input_features.to(device=DEVICE, dtype=DTYPE)
-        with torch.no_grad():
             generated_ids = model.generate(
                 input_features,
                 max_length=400,
@@ -485,25 +491,28 @@ def dialect_similarity(
 # MAIN PROCESSING FUNCTION
 # =========================================
 def full_process(
-    audio: Optional[Tuple[int, np.ndarray]],
-    region: str
-) -> Tuple[str, str, str, str, Dict[str, float]]:
     """
     Full processing pipeline: audio → ASR → dialect analysis.
     Args:
         audio: Audio input (sample_rate, data) or None
-        region: Selected reference region
     Returns:
-        Tuple of (transcription, reference, predicted_dialect, similarity, detail_scores)
     """
     # ===========================
     # AUDIO INPUT FIX (HF Spaces - Filepath Mode)
     # ===========================
     if audio is None:
         logger.warning("Audio input is None - HF Spaces audio bug")
-        return "Ses alınamadı. Lütfen tekrar deneyin.", "", "", "", {}
     # Handle filepath (most stable for HF Spaces)
     if isinstance(audio, str):
@@ -516,7 +525,9 @@ def full_process(
                 audio_data = librosa.to_mono(audio_data)
         except Exception as e:
             logger.error(f"Error reading audio file: {e}")
-            return f"Ses dosyası okunamadı: {e}", "", "", "", {}
     # Fallback: Handle dict format (for compatibility)
     elif isinstance(audio, dict):
         sample_rate = audio.get("sampling_rate", 16000)
@@ -529,12 +540,16 @@ def full_process(
         logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
     else:
         logger.error(f"Unknown audio format: {type(audio)}")
-        return "Ses formatı tanınamadı.", "", "", "", {}
     # Short sanity check
     if audio_data is None or len(audio_data) == 0:
         logger.warning("Audio data is empty")
-        return "Ses içeriği boş görünüyor.", "", "", "", {}
     try:
         logger.info(f"Processing audio: sr={sample_rate}, len={len(audio_data)}")
@@ -545,14 +560,18 @@ def full_process(
             logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
         except ValueError as e:
             logger.error(f"Audio processing error: {e}")
-            return str(e), "", "", "", {}
         # Validate duration
         try:
             validate_audio(audio_data, sample_rate)
         except ValueError as e:
             logger.error(f"Audio validation error: {e}")
-            return str(e), "", "", "", {}
         # Run ASR
         logger.info("Running ASR...")
@@ -561,10 +580,12 @@ def full_process(
             logger.info(f"ASR output: {transcription}")
         except ValueError as e:
             logger.error(f"ASR error: {e}")
-            return str(e), "", "", "", {}
-        # Get reference sentence
-        reference = TEST_SENTENCES.get(region, "")
         # Dialect analysis
         logger.info("Running dialect classifier...")
@@ -576,12 +597,15 @@ def full_process(
             )
         except Exception as e:
             logger.error(f"Dialect analysis error: {e}")
             return (
                 transcription,
                 reference,
                 "Analiz hatası",
                 "0%",
-                {}
             )
         # Get best prediction
@@ -594,17 +618,23 @@ def full_process(
         logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
         return (
             transcription,
             reference,
             best_region,
             similarity_percent,
-            detail_scores
         )
     except Exception as e:
         error_msg = f"Beklenmeyen hata: {e}"
         logger.error(error_msg, exc_info=True)
-        return error_msg, "", "", "", {}
 # =========================================
@@ -940,24 +970,28 @@ def build_ui() -> gr.Blocks:
             """
             <div style="text-align:center; margin:80px 0 60px 0; padding: 0 20px;">
                 <h1 style="font-size:4rem; font-weight:800; letter-spacing:-3px; margin-bottom:20px; line-height:1.1; background: linear-gradient(135deg, #1D1D1F 0%, #4A5568 50%, #1D1D1F 100%); background-size: 200% auto; -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; animation: shimmer 4s linear infinite;">
-                    🇹🇷 Dialect Intelligence Engine
                 </h1>
                 <p style="color: #6E6E73; font-size:1.25rem; font-weight:400; letter-spacing:-0.3px; opacity:0.95; margin-top:12px;">
-                    Whisper Large-v3 ile Türkçe Şive Analizi
                 </p>
             </div>
             """
         )
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=400):
                 with gr.Group(elem_classes="card"):
-                    region_dropdown = gr.Dropdown(
-                        choices=list(DIALECT_PROFILES.keys()),
-                        value="Marmara",
-                        label="Referans Bölge"
-                    )
                     audio_input = gr.Audio(
                         sources=["microphone", "upload"],
                         type="filepath",
@@ -982,14 +1016,6 @@ def build_ui() -> gr.Blocks:
                     )
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        with gr.Group(elem_classes="card"):
-                            reference_output = gr.Textbox(
-                                label="Referans Cümle",
-                                interactive=False,
-                                lines=2
-                            )
                     with gr.Column(scale=1):
                         with gr.Group(elem_classes="card"):
                             predicted_dialect = gr.Textbox(
@@ -1010,16 +1036,22 @@ def build_ui() -> gr.Blocks:
                     detailed_scores = gr.JSON(
                         label="Bölgesel Skorlar"
                     )
         start_button.click(
             fn=full_process,
-            inputs=[audio_input, region_dropdown],
             outputs=[
                 ASR_output,
                 reference_output,
                 predicted_dialect,
                 similarity_percent,
-                detailed_scores
             ]
         )
@@ -1269,133 +1301,98 @@ def dialect_similarity(
 # =========================================
-# MAIN PROCESSING FUNCTION
 # =========================================
-def full_process(
-    audio: Optional[Tuple[int, np.ndarray]],
-    region: str
-) -> Tuple[str, str, str, str, Dict[str, float]]:
     """
-    Full processing pipeline: audio → ASR → dialect analysis.
     Args:
-        audio: Audio input (sample_rate, data) or None
-        region: Selected reference region
     Returns:
-        Tuple of (transcription, reference, predicted_dialect, similarity, detail_scores)
     """
-    # ===========================
-    # AUDIO INPUT FIX (HF Spaces - Filepath Mode)
-    # ===========================
-    if audio is None:
-        logger.warning("Audio input is None - HF Spaces audio bug")
-        return "Ses alınamadı. Lütfen tekrar deneyin.", "", "", "", {}
-    # Handle filepath (most stable for HF Spaces)
-    if isinstance(audio, str):
-        # Filepath mode - read with soundfile
-        try:
-            audio_data, sample_rate = sf.read(audio)
-            logger.info(f"Received audio as filepath: {audio}, sr={sample_rate}, len={len(audio_data)}")
-            # Convert to mono if stereo
-            if len(audio_data.shape) > 1:
-                audio_data = librosa.to_mono(audio_data)
-        except Exception as e:
-            logger.error(f"Error reading audio file: {e}")
-            return f"Ses dosyası okunamadı: {e}", "", "", "", {}
-    # Fallback: Handle dict format (for compatibility)
-    elif isinstance(audio, dict):
-        sample_rate = audio.get("sampling_rate", 16000)
-        audio_data = np.array(audio.get("data"), dtype=np.float32)
-        logger.info(f"Received audio as dict: sr={sample_rate}, len={len(audio_data)}")
-    # Fallback: Handle tuple format (for compatibility)
-    elif isinstance(audio, (list, tuple)) and len(audio) == 2:
-        sample_rate, audio_data = audio
-        audio_data = np.array(audio_data, dtype=np.float32)
-        logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
-    else:
-        logger.error(f"Unknown audio format: {type(audio)}")
-        return "Ses formatı tanınamadı.", "", "", "", {}
-    # Short sanity check
-    if audio_data is None or len(audio_data) == 0:
-        logger.warning("Audio data is empty")
-        return "Ses içeriği boş görünüyor.", "", "", "", {}
     try:
-        logger.info(f"Processing audio: sr={sample_rate}, len={len(audio_data)}")
-        # Process audio
-        try:
-            audio_data, sample_rate = process_audio(audio_data, sample_rate)
-            logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
-        except ValueError as e:
-            logger.error(f"Audio processing error: {e}")
-            return str(e), "", "", "", {}
-        # Validate duration
-        try:
-            validate_audio(audio_data, sample_rate)
-        except ValueError as e:
-            logger.error(f"Audio validation error: {e}")
-            return str(e), "", "", "", {}
-        # Run ASR
-        logger.info("Running ASR...")
-        try:
-            transcription = run_asr(audio_data, sample_rate)
-            logger.info(f"ASR output: {transcription}")
-        except ValueError as e:
-            logger.error(f"ASR error: {e}")
-            return str(e), "", "", "", {}
-        # Get reference sentence
-        reference = TEST_SENTENCES.get(region, "")
-        # Dialect analysis
-        logger.info("Running dialect classifier...")
-        try:
-            detail_scores, sorted_predictions = dialect_similarity(
-                transcription,
-                audio_data,
-                sample_rate
-            )
-        except Exception as e:
-            logger.error(f"Dialect analysis error: {e}")
-            return (
-                transcription,
-                reference,
-                "Analiz hatası",
-                "0%",
-                {}
-            )
-        # Get best prediction
-        if sorted_predictions:
-            best_region, best_score = sorted_predictions[0]
-            similarity_percent = f"{best_score * 100:.1f}%"
-        else:
-            best_region = "Bilinmiyor"
-            similarity_percent = "0%"
-        logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
-        return (
-            transcription,
-            reference,
-            best_region,
-            similarity_percent,
-            detail_scores
         )
     except Exception as e:
-        error_msg = f"Beklenmeyen hata: {e}"
-        logger.error(error_msg, exc_info=True)
-        return error_msg, "", "", "", {}
 # =========================================
-# UI — Apple Minimal White + Smooth Glass Design
 # =========================================
 CSS = """
 * {
@@ -1559,4 +1556,6 @@ button.primary:active {
 ::-webkit-scrollbar-thumb:hover {
     background: rgba(0, 0, 0, 0.3);
 }
-"""

 """
 Dialect Intelligence Engine
 Author: Nisa Çayır
+Core: Meta Omnilingual ASR + Whisper Large-v3 + Türkçe Bölgesel Fonetik Analiz
 Focus: Turkish dialects — vowel shifts, markers, prosody, phonetic signatures
 """
 import soundfile as sf
 import torch
 from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
+import plotly.graph_objects as go
+import plotly.express as px
 import gradio as gr
+# Import region data
+from regions_geojson import TURKEY_REGIONS_GEOJSON
+from region_assets import REGION_COLORS
 # =========================================
 # LOGGING SETUP
 # =========================================
 # MODEL INITIALIZATION
 # =========================================
 try:
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    MODEL_ID,
+    torch_dtype=DTYPE
     )
     model = model.to(DEVICE)
         # Ensure audio is float32 (Whisper expects fp32 input)
         audio_float = audio_data.astype(np.float32)
+    inputs = processor(
             audio_float,
             sampling_rate=sample_rate,
+        return_tensors="pt"
         )
         # Move to device and cast to target dtype (fp16 on GPU, fp32 on CPU)
         input_features = inputs.input_features.to(device=DEVICE, dtype=DTYPE)
+    with torch.no_grad():
             generated_ids = model.generate(
                 input_features,
                 max_length=400,
 # MAIN PROCESSING FUNCTION
 # =========================================
 def full_process(
+    audio: Optional[Tuple[int, np.ndarray]]
+) -> Tuple[str, str, str, str, Dict[str, float], go.Figure]:
     """
     Full processing pipeline: audio → ASR → dialect analysis.
     Args:
         audio: Audio input (sample_rate, data) or None
     Returns:
+        Tuple of (transcription, reference, predicted_dialect, similarity, detail_scores, heatmap_figure)
     """
+    # Default reference sentence (always show Marmara reference)
+    default_reference = TEST_SENTENCES.get("Marmara", "")
     # ===========================
     # AUDIO INPUT FIX (HF Spaces - Filepath Mode)
     # ===========================
     if audio is None:
         logger.warning("Audio input is None - HF Spaces audio bug")
+        empty_fig = go.Figure()
+        empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+        return "Ses alınamadı. Lütfen tekrar deneyin.", default_reference, "", "", {}, empty_fig
     # Handle filepath (most stable for HF Spaces)
     if isinstance(audio, str):
                 audio_data = librosa.to_mono(audio_data)
         except Exception as e:
             logger.error(f"Error reading audio file: {e}")
+            empty_fig = go.Figure()
+            empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+            return f"Ses dosyası okunamadı: {e}", default_reference, "", "", {}, empty_fig
     # Fallback: Handle dict format (for compatibility)
     elif isinstance(audio, dict):
         sample_rate = audio.get("sampling_rate", 16000)
         logger.info(f"Received audio as tuple: sr={sample_rate}, len={len(audio_data)}")
     else:
         logger.error(f"Unknown audio format: {type(audio)}")
+        empty_fig = go.Figure()
+        empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+        return "Ses formatı tanınamadı.", default_reference, "", "", {}, empty_fig
     # Short sanity check
     if audio_data is None or len(audio_data) == 0:
         logger.warning("Audio data is empty")
+        empty_fig = go.Figure()
+        empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+        return "Ses içeriği boş görünüyor.", default_reference, "", "", {}, empty_fig
     try:
         logger.info(f"Processing audio: sr={sample_rate}, len={len(audio_data)}")
             logger.info(f"Audio processed: sr={sample_rate}, shape={audio_data.shape}")
         except ValueError as e:
             logger.error(f"Audio processing error: {e}")
+            empty_fig = go.Figure()
+            empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+            return str(e), default_reference, "", "", {}, empty_fig
         # Validate duration
         try:
             validate_audio(audio_data, sample_rate)
         except ValueError as e:
             logger.error(f"Audio validation error: {e}")
+            empty_fig = go.Figure()
+            empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+            return str(e), default_reference, "", "", {}, empty_fig
         # Run ASR
         logger.info("Running ASR...")
             logger.info(f"ASR output: {transcription}")
         except ValueError as e:
             logger.error(f"ASR error: {e}")
+            empty_fig = go.Figure()
+            empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+            return str(e), default_reference, "", "", {}, empty_fig
+        # Always use default reference sentence
+        reference = default_reference
         # Dialect analysis
         logger.info("Running dialect classifier...")
             )
         except Exception as e:
             logger.error(f"Dialect analysis error: {e}")
+            empty_fig = go.Figure()
+            empty_fig.update_layout(title="Harita yüklenemedi", height=600)
             return (
                 transcription,
                 reference,
                 "Analiz hatası",
                 "0%",
+                {},
+                empty_fig
             )
         # Get best prediction
         logger.info(f"Analysis complete: {best_region} ({similarity_percent})")
+        # Create heatmap
+        heatmap_fig = plot_region_heatmap(detail_scores)
         return (
             transcription,
             reference,
             best_region,
             similarity_percent,
+            detail_scores,
+            heatmap_fig
         )
     except Exception as e:
         error_msg = f"Beklenmeyen hata: {e}"
         logger.error(error_msg, exc_info=True)
+        empty_fig = go.Figure()
+        empty_fig.update_layout(title="Harita yüklenemedi", height=600)
+        return error_msg, default_reference, "", "", {}, empty_fig
 # =========================================
             """
             <div style="text-align:center; margin:80px 0 60px 0; padding: 0 20px;">
                 <h1 style="font-size:4rem; font-weight:800; letter-spacing:-3px; margin-bottom:20px; line-height:1.1; background: linear-gradient(135deg, #1D1D1F 0%, #4A5568 50%, #1D1D1F 100%); background-size: 200% auto; -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; animation: shimmer 4s linear infinite;">
+                    <span style="background: none; -webkit-text-fill-color: initial; color: #1D1D1F;">🇹🇷</span> Dialect Intelligence Engine
                 </h1>
                 <p style="color: #6E6E73; font-size:1.25rem; font-weight:400; letter-spacing:-0.3px; opacity:0.95; margin-top:12px;">
+                    Powered by Meta Omnilingual ASR & Whisper Large-v3
                 </p>
             </div>
             """
         )
+        # Reference sentence (always visible, at top)
+        default_ref = TEST_SENTENCES.get("Marmara", "")
+        with gr.Group(elem_classes="card"):
+            reference_output = gr.Textbox(
+                label="Referans Cümle",
+                value=default_ref,
+                interactive=False,
+                lines=2
+            )
         with gr.Row(equal_height=False):
             with gr.Column(scale=1, min_width=400):
                 with gr.Group(elem_classes="card"):
                     audio_input = gr.Audio(
                         sources=["microphone", "upload"],
                         type="filepath",
                     )
                 with gr.Row():
                     with gr.Column(scale=1):
                         with gr.Group(elem_classes="card"):
                             predicted_dialect = gr.Textbox(
                     detailed_scores = gr.JSON(
                         label="Bölgesel Skorlar"
                     )
+                with gr.Group(elem_classes="card"):
+                    region_map = gr.Plot(
+                        label="Bölgesel Harita"
+                    )
         start_button.click(
             fn=full_process,
+            inputs=[audio_input],
             outputs=[
                 ASR_output,
                 reference_output,
                 predicted_dialect,
                 similarity_percent,
+                detailed_scores,
+                region_map
             ]
         )
 # =========================================
+# VISUALIZATION
 # =========================================
+def plot_region_heatmap(scores: Dict[str, float]) -> go.Figure:
     """
+    Create a Plotly heatmap map of Turkey with regional dialect scores.
     Args:
+        scores: Dictionary of region names to similarity scores
     Returns:
+        Plotly figure object
     """
     try:
+        # Normalize scores to 0-1 range for better visualization
+        max_score = max(scores.values()) if scores.values() else 1.0
+        min_score = min(scores.values()) if scores.values() else 0.0
+        score_range = max_score - min_score if max_score > min_score else 1.0
+        # Create figure
+        fig = go.Figure()
+        # Add each region as a choropleth feature
+        for feature in TURKEY_REGIONS_GEOJSON["features"]:
+            region_name = feature["properties"]["name"]
+            score = scores.get(region_name, 0.0)
+            # Normalize score to 0-1
+            normalized = (score - min_score) / score_range if score_range > 0 else 0.0
+            # Get base color for region
+            base_color = REGION_COLORS.get(region_name, "#CCCCCC")
+            # Convert hex to rgb and add opacity based on score
+            rgb = tuple(int(base_color[j:j+2], 16) for j in (1, 3, 5))
+            rgba = f"rgba({rgb[0]}, {rgb[1]}, {rgb[2]}, {0.3 + normalized * 0.7})"
+            fig.add_trace(go.Scattergeo(
+                lon=[coord[0] for coord in feature["geometry"]["coordinates"][0]],
+                lat=[coord[1] for coord in feature["geometry"]["coordinates"][0]],
+                mode='lines',
+                fill='toself',
+                fillcolor=rgba,
+                line=dict(color=base_color, width=2),
+                name=region_name,
+                text=f"{region_name}<br>Benzerlik: {score*100:.1f}%",
+                hovertemplate='<b>%{text}</b><extra></extra>',
+                showlegend=False
+            ))
+        # Update layout
+        fig.update_layout(
+            title={
+                'text': 'Türkiye Bölgesel Şive Benzerlik Haritası',
+                'x': 0.5,
+                'xanchor': 'center',
+                'font': {'size': 20, 'family': 'Arial, sans-serif'}
+            },
+            geo=dict(
+                projection_type='mercator',
+                showland=True,
+                landcolor='rgb(243, 243, 243)',
+                showocean=True,
+                oceancolor='rgb(230, 240, 255)',
+                showlakes=True,
+                lakecolor='rgb(230, 240, 255)',
+                showcountries=True,
+                countrycolor='rgb(200, 200, 200)',
+                lonaxis=dict(range=[25, 45]),
+                lataxis=dict(range=[35, 43]),
+                bgcolor='rgba(0,0,0,0)'
+            ),
+            height=600,
+            margin=dict(l=0, r=0, t=50, b=0),
+            paper_bgcolor='rgba(0,0,0,0)',
+            plot_bgcolor='rgba(0,0,0,0)',
+            font=dict(family="Arial, sans-serif", size=12)
         )
+        return fig
     except Exception as e:
+        logger.error(f"Error creating heatmap: {e}")
+        # Return empty figure on error
+        fig = go.Figure()
+        fig.update_layout(
+            title="Harita yüklenemedi",
+            height=600
+        )
+        return fig
 # =========================================
+# UI — Ultra Modern Apple Glassmorphism Design
 # =========================================
 CSS = """
 * {
 ::-webkit-scrollbar-thumb:hover {
     background: rgba(0, 0, 0, 0.3);
 }
+"""