latin-survey / app.py
raphael-lesmana's picture
Update app.py
31a62f9 verified
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training
import spaces
import torch
from tqdm import tqdm
from transformers import BitsAndBytesConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer, SFTConfig
import re
import gradio as gr
# nf4_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_use_double_quant=True,
# bnb_4bit_compute_dtype=torch.bfloat16
# )
model_name = "raphael-lesmana/mamba2_370_latin3"
tokenizer = AutoTokenizer.from_pretrained(
model_name,
device_map="auto",
use_cache=False,
add_bos_token=True, add_eos_token=False
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map='auto',
# quantization_config=nf4_config,
use_cache=False,
)
model.to('cuda')
@spaces.GPU
def generate_response(prompt):
prompt = f"Latin: {prompt}\nEnglish: "
encoded_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
model_inputs = encoded_input.to('cuda')
generated_ids = model.generate(**model_inputs,
max_new_tokens=100,
min_new_tokens=1,
do_sample=False,
pad_token_id=tokenizer.eos_token_id)
decoded_output = tokenizer.batch_decode(generated_ids)
output = decoded_output[0].replace(prompt, "")
output = re.sub("English: ", "", output)
output = re.sub("<\|endoftext\|>", "", output)
output = re.sub("\s+", " ", output)
return output
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Latin Input", max_length=400),
outputs=gr.Textbox(label="English Output"),
submit_btn="Translate",
flagging_mode="never"
)
demo.launch()