Spaces:
Runtime error
Runtime error
| from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| import spaces | |
| import torch | |
| from tqdm import tqdm | |
| from transformers import BitsAndBytesConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, TrainingArguments | |
| from trl import SFTTrainer, SFTConfig | |
| import re | |
| import gradio as gr | |
| # nf4_config = BitsAndBytesConfig( | |
| # load_in_4bit=True, | |
| # bnb_4bit_quant_type="nf4", | |
| # bnb_4bit_use_double_quant=True, | |
| # bnb_4bit_compute_dtype=torch.bfloat16 | |
| # ) | |
| model_name = "raphael-lesmana/mamba2_370_latin3" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| device_map="auto", | |
| use_cache=False, | |
| add_bos_token=True, add_eos_token=False | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map='auto', | |
| # quantization_config=nf4_config, | |
| use_cache=False, | |
| ) | |
| model.to('cuda') | |
| def generate_response(prompt): | |
| prompt = f"Latin: {prompt}\nEnglish: " | |
| encoded_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True) | |
| model_inputs = encoded_input.to('cuda') | |
| generated_ids = model.generate(**model_inputs, | |
| max_new_tokens=100, | |
| min_new_tokens=1, | |
| do_sample=False, | |
| pad_token_id=tokenizer.eos_token_id) | |
| decoded_output = tokenizer.batch_decode(generated_ids) | |
| output = decoded_output[0].replace(prompt, "") | |
| output = re.sub("English: ", "", output) | |
| output = re.sub("<\|endoftext\|>", "", output) | |
| output = re.sub("\s+", " ", output) | |
| return output | |
| demo = gr.Interface( | |
| fn=generate_response, | |
| inputs=gr.Textbox(label="Latin Input", max_length=400), | |
| outputs=gr.Textbox(label="English Output"), | |
| submit_btn="Translate", | |
| flagging_mode="never" | |
| ) | |
| demo.launch() | |