from __future__ import annotations import logging import os from functools import lru_cache from threading import Thread from typing import Generator import gradio as gr import regex import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def convert_latex_brackets_to_dollars(text: str) -> str: """Convert LaTeX bracket/paren sections into dollar-based math mode.""" def replace_display_latex(match): return f"\n $$ {match.group(1).strip()} $$ \n" text = regex.sub(r"(?r)\\\[\s*([^\[\]]+?)\s*\\\]", replace_display_latex, text) def replace_paren_latex(match): return f" $ {match.group(1).strip()} $ " text = regex.sub(r"(?r)\\\(\s*(.+?)\s*\\\)", replace_paren_latex, text) return text MODEL_NAME = os.getenv("MODEL_NAME", "Intel/hebrew-math-tutor-v1") @lru_cache(maxsize=1) def load_model_and_tokenizer(): logger.info(f"Loading model: {MODEL_NAME}") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, dtype=torch.bfloat16, device_map="auto", ) logger.info("Model loaded successfully") return model, tokenizer model, tokenizer = load_model_and_tokenizer() DEFAULT_LANG = "he" labels = { "he": { "title": '

מתמטיבוט 🧮

', "intro": ( '
\n\n' "ברוכים הבאים לדמו! 💡 כאן תוכלו להתרשם **ממודל השפה החדש** שלנו; מודל בגודל 4 מיליארד פרמטרים שאומן לענות על שאלות מתמטיות בעברית, על המחשב שלכם, ללא חיבור לרשת.\n\n" "קישור למודל, פרטים נוספים, יצירת קשר ותנאי שימוש:\n\n" "https://huggingface.co/Intel/hebrew-math-tutor-v1\n\n" "-----\n\n" "
" ), "select_label": "בחרו שאלה מוכנה או צרו שאלה חדשה:", "new_question": "שאלה חדשה...", "text_label": "שאלה:", "placeholder": "הזינו את השאלה כאן...", "send": "שלח", "reset": "נקה", "toggle_to": "English 🇬🇧", "predefined": [ "שאלה חדשה...", " מהו סכום הסדרה הבאה: 1 + 1/2 + 1/4 + 1/8 + ...", "פתח את הביטוי: (a-b)^4", "פתרו את המשוואה הבאה: sin(2x) = 0.5", ], "summary_text": "לחץ כדי לראות את תהליך החשיבה", "thinking_prefix": "🤔 חושב", "thinking_done": "🤔 *תהליך החשיבה הושלם, מכין תשובה...*", "final_label": "📝 תשובה סופית:", "answer_label": "תשובה:", }, "en": { "title": "

MathBot 🧮

", "intro": ( """ Welcome to the demo! 💡 Here you can try our **new language model** — a 4-billion-parameter model trained to answer math questions in Hebrew while maintaining its English capabilities. It runs locally on your machine without requiring an internet connection. For the model page and more details see: https://huggingface.co/Intel/hebrew-math-tutor-v1 ----- """ ), "select_label": "Choose a prepared question or create a new one:", "new_question": "New question...", "text_label": "Question:", "placeholder": "Type your question here...", "send": "Send", "reset": "Clear", "toggle_to": "עברית 🇮🇱", "predefined": [ "New question...", "What is the sum of the series: 1 + 1/2 + 1/4 + 1/8 + ...", "Expand the expression: (a-b)^4", "Solve the equation: sin(2x) = 0.5", ], "summary_text": "Click to view the thinking process", "thinking_prefix": "🤔 Thinking", "thinking_done": "🤔 *Thinking complete, preparing answer...*", "final_label": "📝 Final answer:", "answer_label": "Answer:", }, } def dir_and_alignment(lang: str) -> tuple[str, str]: if lang == "he": return "rtl", "right" return "ltr", "left" _details_template = ( '
' "🤔 {summary}" '
{content}
' "
" ) def wrap_text_with_direction(text: str, lang: str, emphasized: bool = False) -> str: direction, align = dir_and_alignment(lang) weight = "font-weight: 600;" if emphasized else "" return f'
{text}
' def build_system_prompt(lang: str) -> str: if lang == "he": return ( "You are a helpful AI assistant specialized in mathematics and problem-solving " "who can answer math questions with the correct answer. Answer shortly, not more than 500 " "tokens, but outline the process step by step. Answer ONLY in Hebrew!" ) return ( "You are a helpful AI assistant specialized in mathematics and problem-solving who can answer " "math questions with the correct answer. Answer shortly, not more than 500 tokens, but outline " "the process step by step." ) def thinking_indicator(lang: str, progress_token_count: int) -> str: direction, align = dir_and_alignment(lang) border_side = "right" if direction == "rtl" else "left" dots = "." * (progress_token_count % 6 or 1) prefix = labels[lang]["thinking_prefix"] return ( f'
' f'

{prefix}{dots}

' "
" ) def build_assistant_markdown( lang: str, final_answer: str, thinking_text: str | None, ) -> str: direction, align = dir_and_alignment(lang) localized = labels[lang] parts = [] if thinking_text: details = _details_template.format( dir=direction, align=align, summary=localized["summary_text"], content=thinking_text, ) parts.append(details) parts.append(wrap_text_with_direction(localized["thinking_done"], lang)) parts.append(wrap_text_with_direction(localized["final_label"], lang, emphasized=True)) converted_answer = convert_latex_brackets_to_dollars(final_answer.strip()) parts.append(wrap_text_with_direction(converted_answer or "…", lang)) return "\n\n".join(parts) @spaces.GPU def handle_user_message( user_input: str, lang: str, ) -> Generator[tuple, None, None]: lang = lang or DEFAULT_LANG localized = labels[lang] prompt = (user_input or "").strip() if not prompt: yield ( "", "", localized["new_question"], ) return dropdown_reset = localized["new_question"] yield "", "", dropdown_reset system_prompt = build_system_prompt(lang) # Apply chat template with just user message chat_messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] input_text = tokenizer.apply_chat_template( chat_messages, tokenize=True, add_generation_prompt=True, enable_thinking=True, ) inputs = torch.tensor([input_text]).to(model.device) thinking_buffer = "" thinking_text: str | None = None final_answer = "" response_fallback = "" in_thinking = True try: streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( inputs=inputs, streamer=streamer, max_new_tokens=2400, temperature=0.6, top_p=0.95, top_k=20, do_sample=True, ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() for delta in streamer: if not delta: continue response_fallback += delta # if "" in delta: # in_thinking = True if in_thinking: thinking_buffer += delta if "" in delta: in_thinking = False thinking_text = ( thinking_buffer.replace("", "").replace("", "").strip() ) current_answer = thinking_indicator(lang, len(thinking_buffer)) else: final_answer += delta current_answer = build_assistant_markdown( lang=lang, final_answer=final_answer, thinking_text=thinking_text, ) yield current_answer, "", dropdown_reset thread.join() except Exception as exc: error_html = wrap_text_with_direction(f"⚠️ Error generating response: {exc}", lang) yield error_html, prompt, dropdown_reset return if not final_answer: final_answer = response_fallback final_markdown = build_assistant_markdown( lang=lang, final_answer=final_answer, thinking_text=thinking_text ) yield final_markdown, "", dropdown_reset def clear_answer(lang: str): localized = labels[lang] return ( "", "", localized["new_question"], ) def sync_question_text(selected_option: str, lang: str): localized = labels[lang] if selected_option == localized["new_question"]: return "" return selected_option def toggle_language_and_rebuild(lang: str): """Toggle language and return new values for all UI components""" new_lang = "en" if lang == "he" else "he" localized = labels[new_lang] # direction, align = dir_and_alignment(new_lang) dropdown_class = "rtl" if new_lang == "he" else "ltr" textbox_class = "rtl" if new_lang == "he" else "ltr" return ( new_lang, localized["title"], localized["intro"], wrap_text_with_direction(localized["select_label"], new_lang, emphasized=True), gr.update( choices=localized["predefined"], value=localized["new_question"], elem_classes=[dropdown_class], ), wrap_text_with_direction(localized["text_label"], new_lang, emphasized=True), gr.update( placeholder=localized["placeholder"], value="", elem_classes=[textbox_class] ), localized["send"], localized["reset"], wrap_text_with_direction(localized["answer_label"], new_lang, emphasized=True), localized["toggle_to"], ) CUSTOM_CSS = """ body { font-family: 'Rubik', 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; } details > summary { cursor: pointer; } .gradio-container .prose p { margin-bottom: 0.5rem; } [dir="rtl"] { direction: rtl; text-align: right; } [dir="ltr"] { direction: ltr; text-align: left; } /* Alignment helpers for inputs and dropdowns using elem_classes */ .rtl textarea, .rtl .gr-dropdown { direction: rtl !important; text-align: right !important; } .ltr textarea, .ltr .gr-dropdown { direction: ltr !important; text-align: left !important; } .rtl input, .rtl select, .rtl .gr-dropdown, .rtl .gradio-dropdown { direction: rtl !important; text-align: right !important; } .ltr input, .ltr select, .ltr .gr-dropdown, .ltr .gradio-dropdown { direction: ltr !important; text-align: left !important; } /* Visual container for the answer area */ .answer-box { background: #ffffff !important; border: 1px solid rgba(0,0,0,0.08); border-radius: 10px; padding: 12px 16px; box-shadow: none; margin-top: 8px; } .answer-box .prose { margin: 0; } """ def build_demo() -> gr.Blocks: localized = labels[DEFAULT_LANG] with gr.Blocks(title="Hebrew Math Tutor") as demo: # Inject custom CSS gr.HTML(f"") lang_state = gr.State(DEFAULT_LANG) title_md = gr.Markdown(localized["title"]) intro_md = gr.Markdown(localized["intro"]) select_label_md = gr.Markdown( wrap_text_with_direction( localized["select_label"], DEFAULT_LANG, emphasized=True ) ) preset_dropdown = gr.Dropdown( label="", choices=localized["predefined"], value=localized["new_question"], interactive=True, show_label=False, elem_classes=("rtl" if DEFAULT_LANG == "he" else "ltr"), ) question_label_md = gr.Markdown( wrap_text_with_direction( localized["text_label"], DEFAULT_LANG, emphasized=True ) ) question_box = gr.Textbox( label="", placeholder=localized["placeholder"], lines=5, show_label=False, elem_classes=("rtl" if DEFAULT_LANG == "he" else "ltr"), ) with gr.Row(): reset_button = gr.Button(localized["reset"], variant="secondary") send_button = gr.Button(localized["send"], variant="primary") lang_button = gr.Button(localized["toggle_to"], variant="secondary") answer_label_md = gr.Markdown( wrap_text_with_direction( localized["answer_label"], DEFAULT_LANG, emphasized=True ) ) with gr.Group(elem_classes="answer-box"): answer_box = gr.Markdown( label="", show_label=False, latex_delimiters=[ {"left": "$$", "right": "$$", "display": True}, {"left": "$", "right": "$", "display": False}, ], ) preset_dropdown.change( fn=sync_question_text, inputs=[preset_dropdown, lang_state], outputs=question_box, ) reset_button.click( fn=clear_answer, inputs=[lang_state], outputs=[answer_box, question_box, preset_dropdown], ) send_button.click( fn=handle_user_message, inputs=[question_box, lang_state], outputs=[answer_box, question_box, preset_dropdown], ) question_box.submit( fn=handle_user_message, inputs=[question_box, lang_state], outputs=[answer_box, question_box, preset_dropdown], ) # Language toggle - updates state and UI labels lang_button.click( fn=toggle_language_and_rebuild, inputs=[lang_state], outputs=[ lang_state, title_md, intro_md, select_label_md, preset_dropdown, question_label_md, question_box, send_button, reset_button, answer_label_md, lang_button, ], ) return demo demo = build_demo() if __name__ == "__main__": demo.queue().launch()