import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# 1. Load Translation Model & Tokenizer Manually
model_name = "Helsinki-NLP/opus-mt-es-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# 2. Create the translation pipeline with the explicit model/tokenizer
translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer)

# 3. Toxicity pipeline (this one usually has no issues with the generic task)
toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier")

def spanish_toxicity_check(text):
    # Step 1: Translate
    # We specify max_length to ensure it doesn't cut off long lyrics
    translation = translator_pipe(text, max_length=512)[0]['translation_text']
    
    # Step 2: Classify
    results = toxicity_pipe(translation)
    
    # Step 3: Format output for gr.Label
    return {item['label']: item['score'] for item in results}

# 4. Interface
demo = gr.Interface(
    fn=spanish_toxicity_check,
    inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."),
    outputs=gr.Label(label="Nivel de Toxicidad"),
    title="Análisis de Toxicidad de Canciones",
    description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)"
)

if __name__ == "__main__":
    demo.launch()