Spaces:
Running
on
Zero
Running
on
Zero
| import spaces | |
| import gradio as gr | |
| import os | |
| import sys | |
| import subprocess | |
| import numpy as np | |
| from paligemma2 import PaliGemma2Handler, MODELS as PALIGEMMA_MODELS | |
| from gemma import GemmaHandler, MODELS as GEMMA_MODELS | |
| # Initialize model handlers | |
| paligemma_handler = PaliGemma2Handler() | |
| gemma_handler = GemmaHandler() | |
| def process_image_paligemma(model_name, image, progress=gr.Progress()): | |
| """Process a single image with PaliGemma2""" | |
| return paligemma_handler.process_image(model_name, image, progress) | |
| def process_image_gemma(model_name, image, progress=gr.Progress()): | |
| """Process a single image with Gemma""" | |
| return gemma_handler.process_image(model_name, image, progress) | |
| def process_pdf_paligemma(pdf_path, model_name, progress=gr.Progress()): | |
| """Process a PDF file with PaliGemma2""" | |
| return paligemma_handler.process_pdf(pdf_path, model_name, progress) | |
| def process_pdf_gemma(pdf_path, model_name, progress=gr.Progress()): | |
| """Process a PDF file with Gemma""" | |
| return gemma_handler.process_pdf(pdf_path, model_name, progress) | |
| # Example images with descriptions | |
| examples = [ | |
| ["type_1_sl.png", "Typed Dhivehi text sample 1"], | |
| ["type_2_sl.png", "Typed Dhivehi text sample 2"], | |
| ["hw_1_sl.png", "Handwritten Dhivehi text sample 1"], | |
| ["hw_2_sl.jpg", "Handwritten Dhivehi text sample 2"], | |
| ["hw_3_sl.png", "Handwritten Dhivehi text sample 3"], | |
| ["hw_4_sl.png", "Handwritten Dhivehi text sample 4"], | |
| ["ml.png", "Multi-line Dhivehi text sample"] | |
| ] | |
| css = """ | |
| .textbox1 textarea { | |
| font-size: 18px !important; | |
| font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important; | |
| line-height: 1.8 !important; | |
| } | |
| .textbox2 textarea { | |
| display: none; | |
| } | |
| """ | |
| with gr.Blocks(title="Dhivehi Image to Text",css=css) as demo: | |
| gr.Markdown("# Dhivehi Image to Text") | |
| gr.Markdown("Dhivehi Image to Text experimental finetunes") | |
| with gr.Tabs(): | |
| with gr.Tab("PaliGemma2"): | |
| model_dropdown_paligemma = gr.Dropdown( | |
| choices=list(PALIGEMMA_MODELS.keys()), | |
| value=list(PALIGEMMA_MODELS.keys())[0], | |
| label="Select PaliGemma2 Model" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Image Input"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| image_input_paligemma = gr.Image(type="pil", label="Input Image") | |
| image_submit_btn_paligemma = gr.Button("Extract Text") | |
| # Image examples | |
| gr.Examples( | |
| examples=[[img] for img, _ in examples], | |
| inputs=[image_input_paligemma], | |
| label="Example Images", | |
| examples_per_page=8 | |
| ) | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.Tab("Extracted Text"): | |
| image_text_output_paligemma = gr.Textbox( | |
| lines=5, | |
| label="Extracted Text", | |
| show_copy_button=True, | |
| rtl=True, | |
| elem_classes="textbox1" | |
| ) | |
| with gr.Tab("Detected Text Regions"): | |
| image_bbox_output_paligemma = gr.Gallery( | |
| label="Detected Text Regions", | |
| show_label=True, | |
| columns=2 | |
| ) | |
| with gr.Tab("PDF Input"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| pdf_input_paligemma = gr.File( | |
| label="Input PDF", | |
| file_types=[".pdf"] | |
| ) | |
| pdf_submit_btn_paligemma = gr.Button("Extract Text from PDF") | |
| # PDF examples | |
| gr.Examples( | |
| examples=[ | |
| ["example.pdf", "Example 1"], | |
| ], | |
| inputs=[pdf_input_paligemma], | |
| label="Example PDFs", | |
| examples_per_page=8 | |
| ) | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.Tab("Extracted Text"): | |
| pdf_text_output_paligemma = gr.Textbox( | |
| lines=5, | |
| label="Extracted Text", | |
| show_copy_button=True, | |
| rtl=True, | |
| elem_classes="textbox1" | |
| ) | |
| with gr.Tab("Detected Text Regions"): | |
| pdf_bbox_output_paligemma = gr.Gallery( | |
| label="Detected Text Regions", | |
| show_label=True, | |
| columns=2 | |
| ) | |
| with gr.Tab("Gemma"): | |
| model_dropdown_gemma = gr.Dropdown( | |
| choices=list(GEMMA_MODELS.keys()), | |
| value=list(GEMMA_MODELS.keys())[0], | |
| label="Select Gemma Model" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Image Input"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| image_input_gemma = gr.Image(type="pil", label="Input Image") | |
| image_submit_btn_gemma = gr.Button("Extract Text") | |
| # Image examples | |
| gr.Examples( | |
| examples=[[img] for img, _ in examples], | |
| inputs=[image_input_gemma], | |
| label="Example Images", | |
| examples_per_page=8 | |
| ) | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.Tab("Extracted Text"): | |
| image_text_output_gemma = gr.Textbox( | |
| lines=5, | |
| label="Extracted Text", | |
| show_copy_button=True, | |
| rtl=True, | |
| elem_classes="textbox1" | |
| ) | |
| with gr.Tab("Detected Text Regions"): | |
| image_bbox_output_gemma = gr.Gallery( | |
| label="Detected Text Regions", | |
| show_label=True, | |
| columns=2 | |
| ) | |
| with gr.Tab("PDF Input"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| pdf_input_gemma = gr.File( | |
| label="Input PDF", | |
| file_types=[".pdf"] | |
| ) | |
| pdf_submit_btn_gemma = gr.Button("Extract Text from PDF") | |
| # PDF examples | |
| gr.Examples( | |
| examples=[ | |
| ["example.pdf", "Example 1"], | |
| ], | |
| inputs=[pdf_input_gemma], | |
| label="Example PDFs", | |
| examples_per_page=8 | |
| ) | |
| with gr.Column(scale=3): | |
| with gr.Tabs(): | |
| with gr.Tab("Extracted Text"): | |
| pdf_text_output_gemma = gr.Textbox( | |
| lines=5, | |
| label="Extracted Text", | |
| show_copy_button=True, | |
| rtl=True, | |
| elem_classes="textbox1" | |
| ) | |
| with gr.Tab("Detected Text Regions"): | |
| pdf_bbox_output_gemma = gr.Gallery( | |
| label="Detected Text Regions", | |
| show_label=True, | |
| columns=2 | |
| ) | |
| # PaliGemma2 event handlers | |
| image_submit_btn_paligemma.click( | |
| fn=process_image_paligemma, | |
| inputs=[model_dropdown_paligemma, image_input_paligemma], | |
| outputs=[image_text_output_paligemma, image_bbox_output_paligemma] | |
| ) | |
| pdf_submit_btn_paligemma.click( | |
| fn=process_pdf_paligemma, | |
| inputs=[pdf_input_paligemma, model_dropdown_paligemma], | |
| outputs=[pdf_text_output_paligemma, pdf_bbox_output_paligemma] | |
| ) | |
| # Gemma event handlers | |
| image_submit_btn_gemma.click( | |
| fn=process_image_gemma, | |
| inputs=[model_dropdown_gemma, image_input_gemma], | |
| outputs=[image_text_output_gemma, image_bbox_output_gemma] | |
| ) | |
| pdf_submit_btn_gemma.click( | |
| fn=process_pdf_gemma, | |
| inputs=[pdf_input_gemma, model_dropdown_gemma], | |
| outputs=[pdf_text_output_gemma, pdf_bbox_output_gemma] | |
| ) | |
| # Function to install requirements | |
| def install_requirements(): | |
| requirements_path = 'requirements.txt' | |
| # Check if requirements.txt exists | |
| if not os.path.exists(requirements_path): | |
| print("Error: requirements.txt not found") | |
| return False | |
| try: | |
| print("Installing requirements...") | |
| # Using --no-cache-dir to avoid memory issues | |
| subprocess.check_call([ | |
| sys.executable, | |
| "-m", | |
| "pip", | |
| "install", | |
| "-r", | |
| requirements_path, | |
| "--no-cache-dir" | |
| ]) | |
| print("Successfully installed all requirements") | |
| return True | |
| except subprocess.CalledProcessError as e: | |
| print(f"Error installing requirements: {e}") | |
| return False | |
| except Exception as e: | |
| print(f"Unexpected error: {e}") | |
| return False | |
| # Launch the app | |
| if __name__ == "__main__": | |
| # First install requirements | |
| success = install_requirements() | |
| if success: | |
| print("All requirements installed successfully") | |
| from transformers import PaliGemmaForConditionalGeneration, AutoProcessor | |
| from peft import PeftModel, PeftConfig | |
| # Load the first PaliGemma2 model by default | |
| #paligemma_handler.load_model(list(PALIGEMMA_MODELS.keys())[0]) | |
| #demo.launch(server_name="0.0.0.0", server_port=7812) | |
| demo.launch() | |
| else: | |
| print("Failed to install some requirements") |