YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
temp
inference
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
os.environ["CUDA_VISIBLE_DEVIES"] = "0"
tokenizer = AutoTokenizer.from_pretrained("/scratch/vladimir_albrekht/projects/smollm/danial_workspace/danial/scripts/models/gemma3-4B-kaz-MORPHBPE-75k/final_checkpoint")
model = AutoModelForCausalLM.from_pretrained(
"/scratch/vladimir_albrekht/projects/smollm/danial_workspace/danial/scripts/models/gemma3-4B-kaz-MORPHBPE-75k/final_checkpoint",
device_map="cuda"
)
prompt = ("Сәлем")
model_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
input_len = model_inputs["input_ids"].shape[-1]
with torch.inference_mode():
generation = model.generate(
input_ids=model_inputs['input_ids'],
attention_mask=model_inputs['attention_mask'],
max_new_tokens=50,
do_sample=True,
#temperature=0.2,
top_p=0.9, # nucleus sampling
#top_k=50, # отфильтровать всё, кроме 50 лучших токенов
repetition_penalty=1.2,
pad_token_id=tokenizer.eos_token_id
)
generation = generation[0][input_len:]
decoded = tokenizer.decode(generation, skip_special_tokens=True)
print(decoded)
- Downloads last month
- 12
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support