|
|
--- |
|
|
tags: |
|
|
- autotrain |
|
|
- text-generation-inference |
|
|
- text-generation |
|
|
- peft |
|
|
library_name: transformers |
|
|
base_model: abhishek/llama-2-7b-hf-small-shards |
|
|
widget: |
|
|
- messages: |
|
|
- role: user |
|
|
content: What is your favorite condiment? |
|
|
license: other |
|
|
--- |
|
|
|
|
|
# Model Trained Using AutoTrain |
|
|
|
|
|
This model was trained using AutoTrain. For more information, please visit [AutoTrain](https://hf.co/docs/autotrain). |
|
|
|
|
|
# Usage |
|
|
|
|
|
```python |
|
|
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig |
|
|
from peft import PeftModel, PeftConfig |
|
|
import torch |
|
|
import bitsandbytes as bnb |
|
|
import time |
|
|
|
|
|
model_name = "Punthon/llama2-sdgs" |
|
|
|
|
|
# Load the PEFT configuration |
|
|
peft_config = PeftConfig.from_pretrained(model_name) |
|
|
|
|
|
# Load the tokenizer from the base model |
|
|
# Use the tokenizer associated with the base model or your fine-tuned model if needed |
|
|
tokenizer = AutoTokenizer.from_pretrained(peft_config.base_model_name_or_path) |
|
|
|
|
|
# Load the base model with 8-bit precision |
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
peft_config.base_model_name_or_path, |
|
|
load_in_8bit=True, # Load in 8-bit precision |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
# Resize the base model embeddings to match the tokenizer |
|
|
base_model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
# Load your fine-tuned model |
|
|
model = PeftModel.from_pretrained(base_model, model_name) |
|
|
|
|
|
# Define the instruction and input text |
|
|
instruction = "Identify the Sustainable Development Goals (SDGs) relevant to the passage below. Provide only the SDG numbers and the reason for their relevance. Do not repeat the passage." |
|
|
input_text = "Thailand is considered a leader in tiger conservation in Southeast Asia. Most recently at the 'Sustainable Finance for Tiger Landscapes Conservation' conference in Bhutan, Thailand has been declared as the “Champion for Tiger Conservation in Southeast Asia.”" |
|
|
|
|
|
prompt = f""" |
|
|
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
|
|
|
|
|
### Instruction: |
|
|
{instruction} |
|
|
|
|
|
### Input: |
|
|
{input_text} |
|
|
|
|
|
### Response: |
|
|
""" |
|
|
|
|
|
# Define generation configuration |
|
|
generation_config = GenerationConfig( |
|
|
do_sample=True, |
|
|
top_k=30, |
|
|
temperature=0.7, |
|
|
max_new_tokens=200, |
|
|
repetition_penalty=1.1, |
|
|
pad_token_id=tokenizer.eos_token_id |
|
|
) |
|
|
|
|
|
# Tokenize input |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
|
|
|
|
|
# Generate outputs |
|
|
st_time = time.time() |
|
|
outputs = model.generate(**inputs, generation_config=generation_config) |
|
|
|
|
|
# Decode and print response |
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(f"Response time: {time.time() - st_time} seconds") |
|
|
print(response) |
|
|
|
|
|
``` |