Update README.md
Browse files
README.md
CHANGED
|
@@ -4,16 +4,14 @@ base_model:
|
|
| 4 |
library_name: transformers
|
| 5 |
license: mit
|
| 6 |
---
|
| 7 |
-
# A Text-to-Triple Model Trained on WikiOfGraph dataset
|
| 8 |
|
| 9 |
Base Model: Flan-T5-Large
|
| 10 |
|
| 11 |
-
**Example Input:**
|
| 12 |
-
\
|
| 13 |
"William Gerald Standridge (November 27, 1953 – April 12, 2014) was an American stock car racing driver. He was a competitor in the NASCAR Winston Cup Series and Busch Series."
|
| 14 |
|
| 15 |
-
**Output:**
|
| 16 |
-
\
|
| 17 |
(S> William gerald standridge| P> Nationality| O> American),
|
| 18 |
\
|
| 19 |
(S> William gerald standridge| P> Occupation| O> Stock car racing driver),
|
|
@@ -26,7 +24,61 @@ Base Model: Flan-T5-Large
|
|
| 26 |
\
|
| 27 |
(S> William gerald standridge| P> Death date| O> April 12, 2014)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
Daehee Kim et al., "Ontology-Free General-Domain Knowledge Graph-to-Text Generation Dataset Synthesis using Large Language Model", 2024.
|
|
|
|
| 4 |
library_name: transformers
|
| 5 |
license: mit
|
| 6 |
---
|
| 7 |
+
# **A Text-to-Triple Model Trained on WikiOfGraph dataset**
|
| 8 |
|
| 9 |
Base Model: Flan-T5-Large
|
| 10 |
|
| 11 |
+
## **Example Input:**
|
|
|
|
| 12 |
"William Gerald Standridge (November 27, 1953 – April 12, 2014) was an American stock car racing driver. He was a competitor in the NASCAR Winston Cup Series and Busch Series."
|
| 13 |
|
| 14 |
+
## **Output:**
|
|
|
|
| 15 |
(S> William gerald standridge| P> Nationality| O> American),
|
| 16 |
\
|
| 17 |
(S> William gerald standridge| P> Occupation| O> Stock car racing driver),
|
|
|
|
| 24 |
\
|
| 25 |
(S> William gerald standridge| P> Death date| O> April 12, 2014)
|
| 26 |
|
| 27 |
+
## **How to Run?**
|
| 28 |
+
```
|
| 29 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 30 |
+
import torch
|
| 31 |
|
| 32 |
+
def generate_triples(input_text: str, model_path: str = "pat-jj/text2triple-flan-t5"):
|
| 33 |
+
# Initialize tokenizer and model
|
| 34 |
+
tokenizer = T5Tokenizer.from_pretrained(model_path)
|
| 35 |
+
model = T5ForConditionalGeneration.from_pretrained(
|
| 36 |
+
model_path,
|
| 37 |
+
device_map="auto",
|
| 38 |
+
torch_dtype=torch.bfloat16 # Use bfloat16 for efficiency
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Tokenize input with proper padding and attention mask
|
| 42 |
+
inputs = tokenizer(
|
| 43 |
+
input_text,
|
| 44 |
+
max_length=512,
|
| 45 |
+
padding='max_length',
|
| 46 |
+
truncation=True,
|
| 47 |
+
return_tensors="pt"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Move inputs to the same device as model
|
| 51 |
+
input_ids = inputs['input_ids'].to(model.device)
|
| 52 |
+
attention_mask = inputs['attention_mask'].to(model.device)
|
| 53 |
+
|
| 54 |
+
# Generate with better parameters
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
outputs = model.generate(
|
| 57 |
+
input_ids=input_ids,
|
| 58 |
+
attention_mask=attention_mask,
|
| 59 |
+
max_length=512,
|
| 60 |
+
num_beams=4, # Use beam search
|
| 61 |
+
early_stopping=True,
|
| 62 |
+
length_penalty=0.6, # Penalize very long outputs
|
| 63 |
+
use_cache=True # Use KV cache for faster generation
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Decode and return the generated triples
|
| 67 |
+
generated_triples = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 68 |
+
return generated_triples
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
## Example usage
|
| 72 |
+
```
|
| 73 |
+
input_text = """Albert Einstein was born in Ulm, Germany in 1879. He developed the theory of relativity and won the Nobel Prize in Physics in 1921.
|
| 74 |
+
Einstein worked as a professor at Princeton University until his death in 1955."""
|
| 75 |
+
|
| 76 |
+
generated_triples = generate_triples(input_text)
|
| 77 |
+
print("Generated triples:", generated_triples)
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
## **Paper of WikiOfGraph dataset**:
|
| 84 |
Daehee Kim et al., "Ontology-Free General-Domain Knowledge Graph-to-Text Generation Dataset Synthesis using Large Language Model", 2024.
|