MohamedRashad/arabic-english-code-switching
Viewer • Updated • 12.5k • 646 • 34
How to use oddadmix/MasriSwitch-Gemma3n-Transcriber-v1 with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("automatic-speech-recognition", model="oddadmix/MasriSwitch-Gemma3n-Transcriber-v1") # Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("oddadmix/MasriSwitch-Gemma3n-Transcriber-v1")
model = AutoModelForImageTextToText.from_pretrained("oddadmix/MasriSwitch-Gemma3n-Transcriber-v1")How to use oddadmix/MasriSwitch-Gemma3n-Transcriber-v1 with Unsloth Studio:
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for oddadmix/MasriSwitch-Gemma3n-Transcriber-v1 to start chatting
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for oddadmix/MasriSwitch-Gemma3n-Transcriber-v1 to start chatting
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for oddadmix/MasriSwitch-Gemma3n-Transcriber-v1 to start chatting
pip install unsloth
from unsloth import FastModel
model, tokenizer = FastModel.from_pretrained(
model_name="oddadmix/MasriSwitch-Gemma3n-Transcriber-v1",
max_seq_length=2048,
)MasriSwitch-Gemma3n-Transcriber is an automatic speech transcription model specialized for Egyptian Arabic with strong English code-switching capabilities.
This model is one of the very few publicly available systems explicitly optimized for:
The model is trained using:
MasriSwitch-Gemma3n-Transcriber is built on the Gemma3n conditional generation architecture and fine-tuned to understand natural Egyptian speech patterns, including mixed Arabic/English utterances commonly used in daily life, workplaces, and online content.
It is suitable for:
Use this model for:
import torch
from transformers import AutoProcessor, Gemma3nForConditionalGeneration
MODEL_ID = "oddadmix/egyptian-code-switching-b4-g2-merged"
def load_model_and_processor(model_id=MODEL_ID, device=None):
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Loading model {model_id} to device {device}...")
model = Gemma3nForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16 if device == "cuda" else None,
device_map="auto" if device == "cuda" else None,
).eval()
if not any(p.device.type == "cuda" for p in model.parameters()) and device == "cuda":
model.to("cuda")
processor = AutoProcessor.from_pretrained(model_id)
return model, processor, device
def transcribe_file(model, processor, audio_path, max_new_tokens=128):
if not audio_path:
raise ValueError("audio_path must point to an audio file")
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": "You are an assistant that transcribes speech accurately."}
],
},
{
"role": "user",
"content": [
{"type": "audio", "url": audio_path},
{"type": "text", "text": "Please transcribe this audio."}
],
},
]
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
)
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
input_len = inputs["input_ids"].shape[-1]
with torch.inference_mode():
generated = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=False,
)
gen_tokens = generated[0][input_len:]
text = processor.decode(gen_tokens, skip_special_tokens=True)
return text
if __name__ == "__main__":
audio_path = "path/to/audio.wav"
model, processor, device = load_model_and_processor()
transcription = transcribe_file(model, processor, audio_path, max_new_tokens=256)
print("Transcription:", transcription)