import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import torch import warnings # Suppress warnings warnings.filterwarnings("ignore") print("Initializing Moondream2 (1.6B)...") # --- CONFIGURATION --- model_id = "vikhyatk/moondream2" revision = "2024-08-26" # Pinning version for stability # Load Model (CPU Friendly) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision=revision ) tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) print("Model loaded!") def chat_engine(message, history, image): # 1. Check inputs if image is None: return "Please upload an image first! I need to see something to talk about it." if message.strip() == "": return "Please ask a question about the image." try: # 2. Encode Image enc_image = model.encode_image(image) # 3. Generate Answer # Moondream has a specific method for answering questions answer = model.answer_question(enc_image, message, tokenizer) return answer except Exception as e: return f"Error: {str(e)}" # --- UI --- # Custom CSS for a clean look custom_css = """ .gradio-container { background-color: #f9fafb; } footer { display: none !important; } """ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="Moondream Vision") as app: gr.Markdown( """ # 🌙 Moondream2 Vision **The Tiny Giant (1.6B Params)** Upload an image and ask questions. Runs fast on CPU! """ ) with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="pil", label="Input Image", height=300) with gr.Column(scale=2): chatbot = gr.Chatbot(height=350, type="messages", label="Conversation") msg_input = gr.Textbox(placeholder="What's happening in this picture?", label="Your Question") with gr.Row(): clear_btn = gr.ClearButton([msg_input, chatbot, img_input]) submit_btn = gr.Button("Submit", variant="primary") # Handling the chat logic def respond(message, chat_history, image): if image is None: chat_history.append({"role": "assistant", "content": "Please upload an image first."}) return "", chat_history # Add user message chat_history.append({"role": "user", "content": message}) # Generate Bot Response bot_response = chat_engine(message, chat_history, image) # Add bot response chat_history.append({"role": "assistant", "content": bot_response}) return "", chat_history msg_input.submit(respond, [msg_input, chatbot, img_input], [msg_input, chatbot]) submit_btn.click(respond, [msg_input, chatbot, img_input], [msg_input, chatbot]) if __name__ == "__main__": app.launch()