import gradio as gr
import torch
import sys
import os
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

# Setup IndicLID if not already done
if not os.path.exists("ai4bharat/IndicLID.py"):
    print("🚀 Setting up IndicLID for the first time...")
    exec(open("setup_indiclid.py").read())

# Import torch safe globals first
try:
    exec(open("torch_safe_globals.py").read())
    print("✅ Torch safe globals loaded")
except:
    print("⚠️ Could not load torch safe globals")

# Add current directory to Python path
sys.path.insert(0, os.getcwd())

# Import required libraries
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from IndicTransToolkit.processor import IndicProcessor

# Import IndicLID - This is crucial for automatic language detection
try:
    from ai4bharat.IndicLID import IndicLID
    INDICLID_AVAILABLE = True
    print("✅ IndicLID imported successfully - Automatic language detection enabled")
except ImportError as e:
    print(f"❌ IndicLID import failed: {e}")
    INDICLID_AVAILABLE = False
    raise Exception("IndicLID is required for automatic language detection!")

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🔧 Using device: {device}")

# Language mapping from IndicLID output to IndicTrans2 codes
LID_TO_TRANS2_MAPPING = {
    'hindi': 'hin_Deva',
    'bengali': 'ben_Beng',
    'gujarati': 'guj_Gujr',
    'kannada': 'kan_Knda',
    'malayalam': 'mal_Mlym',
    'marathi': 'mar_Deva',
    'nepali': 'npi_Deva',
    'odia': 'ory_Orya',
    'punjabi': 'pan_Guru',
    'tamil': 'tam_Taml',
    'telugu': 'tel_Telu',
    'urdu': 'urd_Arab',
    'assamese': 'asm_Beng',
    'kashmiri': 'kas_Arab',
    'sindhi': 'snd_Arab',
    'sanskrit': 'san_Deva',
    'english': 'eng_Latn'
}

# Global model variables
lid_model = None
translation_model = None
tokenizer = None
ip = None

def load_models():
    """Load both IndicLID (for detection) and IndicTrans2 (for translation)"""
    global lid_model, translation_model, tokenizer, ip
    
    try:
        # Step 1: Load IndicLID for automatic language detection
        print("🔍 Loading IndicLID for automatic language detection...")
        lid_model = IndicLID(input_threshold=0.5, roman_lid_threshold=0.6)
        print("✅ IndicLID loaded successfully - Ready for automatic language detection!")
        
        # Step 2: Load IndicTrans2 for translation
        print("🔄 Loading IndicTrans2 for translation...")
        model_name = "ai4bharat/indictrans2-indic-en-1B"
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        translation_model = AutoModelForSeq2SeqLM.from_pretrained(
            model_name,
            trust_remote_code=True,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
        ).to(device)
        
        ip = IndicProcessor(inference=True)
        print("✅ IndicTrans2 loaded successfully - Ready for translation!")
        
        return "✅ Both models loaded successfully!\n🔍 IndicLID: Automatic language detection\n🔄 IndicTrans2: Translation to English"
        
    except Exception as e:
        error_msg = f"❌ Error loading models: {str(e)}"
        print(error_msg)
        return error_msg

def automatic_detect_and_translate(input_text):
    """
    Main function: Automatic language detection using IndicLID + Translation using IndicTrans2
    This is the core pipeline you requested
    """
    if not all([lid_model, translation_model, tokenizer, ip]):
        return "❌ Models not loaded. Please wait for initialization.", "", 0.0
    
    if not input_text.strip():
        return "Please enter text for automatic detection and translation.", "", 0.0
    
    try:
        # STEP 1: AUTOMATIC LANGUAGE DETECTION USING INDICLID
        print(f"🔍 Detecting language for: {input_text[:50]}...")
        lid_result = lid_model.batch_predict([input_text])
        
        # Extract language detection results
        detected_lang = lid_result[0]['langinfo']['text_lang']
        confidence = lid_result[0]['langinfo']['text_lang_score']
        
        print(f"✅ IndicLID detected: {detected_lang} (confidence: {confidence:.3f})")
        
        # STEP 2: TRANSLATION USING INDICTRANS2 (if not English)
        if detected_lang.lower() == 'english':
            translation = input_text
            print("ℹ️ Text is already in English, no translation needed")
        else:
            # Check if detected language is supported by IndicTrans2
            if detected_lang in LID_TO_TRANS2_MAPPING:
                src_lang_code = LID_TO_TRANS2_MAPPING[detected_lang]
                target_lang_code = "eng_Latn"
                
                print(f"🔄 Translating from {src_lang_code} to {target_lang_code}...")
                
                # Preprocess for IndicTrans2
                batch = ip.preprocess_batch(
                    [input_text], 
                    src_lang=src_lang_code, 
                    tgt_lang=target_lang_code
                )
                
                # Tokenize
                inputs = tokenizer(
                    batch,
                    truncation=True,
                    padding="longest",
                    return_tensors="pt",
                    return_attention_mask=True
                ).to(device)
                
                # Generate translation
                with torch.no_grad():
                    generated_tokens = translation_model.generate(
                        **inputs,
                        use_cache=True,
                        min_length=0,
                        max_length=256,
                        num_beams=5,
                        num_return_sequences=1
                    )
                
                # Decode translation
                decoded = tokenizer.batch_decode(
                    generated_tokens,
                    skip_special_tokens=True,
                    clean_up_tokenization_spaces=True
                )
                
                # Postprocess
                translations = ip.postprocess_batch(decoded, lang=target_lang_code)
                translation = translations[0]
                
                print(f"✅ Translation completed: {translation}")
            else:
                translation = f"❌ Language '{detected_lang}' not supported for translation"
                print(f"⚠️ {translation}")
        
        return translation, detected_lang.title(), confidence
        
    except Exception as e:
        error_msg = f"❌ Error in detection/translation pipeline: {str(e)}"
        print(error_msg)
        return error_msg, "", 0.0

def create_interface():
    """Create Gradio interface focused on automatic IndicLID detection + IndicTrans2 translation"""
    with gr.Blocks(
        title="IndicLID → IndicTrans2 Pipeline", 
        theme=gr.themes.Soft()
    ) as demo:
        
        gr.Markdown("""
        # 🔍➡️🔄 Automatic Language Detection + Translation
        
        **Complete Pipeline: IndicLID → IndicTrans2**
        
        1. **🔍 IndicLID**: Automatically detects your input language
        2. **🔄 IndicTrans2**: Translates to English based on detected language
        
        **No manual language selection needed!** Just paste your text and get automatic detection + translation.
        """)
        
        # Status display
        status_display = gr.Textbox(
            value="🚀 Loading IndicLID and IndicTrans2 models...", 
            label="🔧 Pipeline Status", 
            interactive=False,
            lines=3
        )
        
        with gr.Row():
            with gr.Column(scale=1):
                input_text = gr.Textbox(
                    label="📝 Input Text (Any Indian Language)",
                    placeholder="Enter text in Hindi, Bengali, Tamil, Telugu, Gujarati, Kannada, Malayalam, Marathi, Punjabi, Urdu, etc...\n\nIndicLID will automatically detect the language!",
                    lines=6,
                    max_lines=10
                )
                
                translate_btn = gr.Button(
                    "🔍➡️🔄 Auto-Detect & Translate", 
                    variant="primary", 
                    size="lg"
                )
                
            with gr.Column(scale=1):
                translation_output = gr.Textbox(
                    label="🇬🇧 English Translation",
                    lines=6,
                    max_lines=10,
                    interactive=False,
                    placeholder="Automatic translation will appear here..."
                )
                
                with gr.Row():
                    detected_language = gr.Textbox(
                        label="🌐 Auto-Detected Language",
                        interactive=False,
                        scale=2,
                        placeholder="Language will be detected automatically"
                    )
                    confidence_score = gr.Number(
                        label="📊 Detection Confidence",
                        interactive=False,
                        scale=1,
                        precision=3
                    )
        
        # Examples showcasing automatic detection
        gr.Markdown("### 📖 Try These Examples (Automatic Detection!):")
        gr.Examples(
            examples=[
                ["मैं आज बाजार जा रहा हूं।"],  # Hindi
                ["আমি আজ বাজারে যাচ্ছি।"],         # Bengali
                ["நான் இன்று சந்தைக்கு போகிறேன்।"],  # Tamil
                ["ನಾನು ಇಂದು ಮಾರುಕಟ್ಟೆಗೆ ಹೋಗುತ್ತಿದ್ದೇನೆ।"], # Kannada
                ["હું આજે બજારમાં જાઉં છું।"],        # Gujarati
                ["मी आज बाजारात जात आहे।"],         # Marathi
                ["میں آج بازار جا رہا ہوں۔"],        # Urdu
                ["ਮੈਂ ਅੱਜ ਬਾਜ਼ਾਰ ਜਾ ਰਿਹਾ ਹਾਂ।"],      # Punjabi
                ["నేను ఈరోజు మార్కెట్‌కి వెళ్తున్నాను।"], # Telugu
                ["ഞാൻ ഇന്ന് മാർക്കറ്റിൽ പോകുന്നു।"]   # Malayalam
            ],
            inputs=[input_text],
            label="Click any example to test automatic detection!"
        )
        
        # Information about supported languages
        gr.Markdown("""
        ### 🌐 Supported Languages for Auto-Detection:
        **IndicLID can automatically detect:** Hindi, Bengali, Tamil, Telugu, Gujarati, Kannada, Malayalam, Marathi, 
        Punjabi, Urdu, Odia, Assamese, Nepali, Kashmiri, Sindhi, Sanskrit, and English.
        
        ### ✨ How it works:
        1. You paste text in **any** supported Indian language
        2. **IndicLID** automatically identifies the language (no manual selection!)
        3. **IndicTrans2** translates it to English based on the detected language
        """)
        
        # Event handlers for automatic detection + translation
        translate_btn.click(
            fn=automatic_detect_and_translate,
            inputs=[input_text],
            outputs=[translation_output, detected_language, confidence_score]
        )
        
        # Auto-submit on Enter key
        input_text.submit(
            fn=automatic_detect_and_translate,
            inputs=[input_text],
            outputs=[translation_output, detected_language, confidence_score]
        )
        
        # Load models on startup
        demo.load(load_models, outputs=[status_display])
    
    return demo

if __name__ == "__main__":
    print("🚀 Starting IndicLID → IndicTrans2 Automatic Pipeline")
    print("🔍 IndicLID will handle automatic language detection")
    print("🔄 IndicTrans2 will handle translation to English")
    
    demo = create_interface()
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=True
    )