import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # 1. Load Translation Model & Tokenizer Manually model_name = "Helsinki-NLP/opus-mt-es-en" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # 2. Create the translation pipeline with the explicit model/tokenizer translator_pipe = pipeline("translation", model=model, tokenizer=tokenizer) # 3. Toxicity pipeline (this one usually has no issues with the generic task) toxicity_pipe = pipeline("text-classification", model="SkolkovoInstitute/roberta_toxicity_classifier") def spanish_toxicity_check(text): # Step 1: Translate # We specify max_length to ensure it doesn't cut off long lyrics translation = translator_pipe(text, max_length=512)[0]['translation_text'] # Step 2: Classify results = toxicity_pipe(translation) # Step 3: Format output for gr.Label return {item['label']: item['score'] for item in results} # 4. Interface demo = gr.Interface( fn=spanish_toxicity_check, inputs=gr.Textbox(label="Lyrics en Español", placeholder="Escribe aquí..."), outputs=gr.Label(label="Nivel de Toxicidad"), title="Análisis de Toxicidad de Canciones", description="Traducción automática (Helsinki-NLP) + Clasificación (RoBERTa)" ) if __name__ == "__main__": demo.launch()