File size: 2,205 Bytes
5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 5919559 b510c45 90c20b2 b510c45 5919559 b510c45 37a5d3c b510c45 90c20b2 5919559 b510c45 37a5d3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
---
base_model: microsoft/phi-2
library_name: peft
model_name: fol-parser-phi2-lora
tags:
- base_model:adapter:microsoft/phi-2
- lora
- sft
- transformers
- trl
licence: license
pipeline_tag: text-generation
---
# code:
```python
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
BASE_MODEL = "microsoft/phi-2"
ADAPTER_MODEL = "MinaGabriel/fol-parser-phi2-lora-adapter"
# tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
device_map="auto",
)
base_model.config.pad_token_id = tokenizer.pad_token_id
base_model.generation_config.pad_token_id = tokenizer.pad_token_id
# attach the adapter
model = PeftModel.from_pretrained(
base_model,
ADAPTER_MODEL,
device_map="auto",
)
model.eval()
def generate(context: str, question: str, max_new_tokens: int = 300) -> str:
prompt = (
"<SYS>\nYou are a precise logic parser. Output [FOL] then [CONCLUSION_FOL].\n</SYS>\n"
"<USER>\n"
f"[CONTEXT]\n{context}\n\n"
f"[QUESTION]\n{question}\n\n"
"Produce the two blocks exactly as specified.\n"
"</USER>\n"
"<ASSISTANT>\n"
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
output_ids = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False,
temperature=0.0,
eos_token_id=tokenizer.eos_token_id, # explicit
pad_token_id=tokenizer.pad_token_id # explicit
)
full_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return full_text.split("<ASSISTANT>\n")[-1].strip()
```
# Usage:
```python
print(
generate(
context="Cats are animal. dogs are animal. human are not animal. animal are awesome",
question="dogs awesome?"
)
)
```
# output:
```C++
[FOL]
cat(animal)
dog(animal)
¬human(animal)
∀x (animal(x) → awesome(x))
[CONCLUSION_FOL]
awesome(dog)
</ASSISTANT>
``` |