openai/gsm8k
Benchmark • Updated • 17.6k • 956k • 1.33k
A small language model fine-tuned for mathematical reasoning on GSM8K.
| Metric | Score |
|---|---|
| Accuracy (Pass@1) | 5.0% |
| Pass@k | 15.0% |
| Majority Voting | 5.0% |
| Consistency | 52.5% |
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("2796gauravc/qwen2.5-0.5b-math")
tokenizer = AutoTokenizer.from_pretrained("2796gauravc/qwen2.5-0.5b-math")
question = "Janet has 10 apples. She gives 3 to her friend. How many does she have left?"
prompt = f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=256)
print(tokenizer.decode(outputs[0]))
@misc{qwen2.5_0.5b_math},
author = {Gaurav Chaudhary},
title = {qwen2.5-0.5b-math: Small Model Math Reasoning},
year = {2026},
publisher = {HuggingFace},
url = {https://huggingface.co/2796gauravc/qwen2.5-0.5b-math}
}