dhivehi-ocr / app.py
alakxender's picture
g
97bb8f1
raw
history blame
12 kB
import spaces
import gradio as gr
import os
import sys
import subprocess
import numpy as np
from paligemma2 import PaliGemma2Handler, MODELS as PALIGEMMA_MODELS
from gemma import GemmaHandler, MODELS as GEMMA_MODELS
# Initialize model handlers
paligemma_handler = PaliGemma2Handler()
gemma_handler = GemmaHandler()
@spaces.GPU
def process_image_paligemma(model_name, image, progress=gr.Progress()):
"""Process a single image with PaliGemma2"""
return paligemma_handler.process_image(model_name, image, progress)
@spaces.GPU
def process_image_gemma(model_name, image, progress=gr.Progress()):
"""Process a single image with Gemma"""
return gemma_handler.process_image(model_name, image, progress)
@spaces.GPU
def process_pdf_paligemma(pdf_path, model_name, progress=gr.Progress()):
"""Process a PDF file with PaliGemma2"""
return paligemma_handler.process_pdf(pdf_path, model_name, progress)
@spaces.GPU
def process_pdf_gemma(pdf_path, model_name, progress=gr.Progress()):
"""Process a PDF file with Gemma"""
return gemma_handler.process_pdf(pdf_path, model_name, progress)
# Example images with descriptions
examples = [
["type_1_sl.png", "Typed Dhivehi text sample 1"],
["type_2_sl.png", "Typed Dhivehi text sample 2"],
["hw_1_sl.png", "Handwritten Dhivehi text sample 1"],
["hw_2_sl.jpg", "Handwritten Dhivehi text sample 2"],
["hw_3_sl.png", "Handwritten Dhivehi text sample 3"],
["hw_4_sl.png", "Handwritten Dhivehi text sample 4"],
["ml.png", "Multi-line Dhivehi text sample"]
]
css = """
.textbox1 textarea {
font-size: 18px !important;
font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important;
line-height: 1.8 !important;
}
.textbox2 textarea {
display: none;
}
"""
with gr.Blocks(title="Dhivehi Image to Text",css=css) as demo:
gr.Markdown("# Dhivehi Image to Text")
gr.Markdown("Dhivehi Image to Text experimental finetunes")
with gr.Tabs():
with gr.Tab("PaliGemma2"):
model_dropdown_paligemma = gr.Dropdown(
choices=list(PALIGEMMA_MODELS.keys()),
value=list(PALIGEMMA_MODELS.keys())[0],
label="Select PaliGemma2 Model"
)
with gr.Tabs():
with gr.Tab("Image Input"):
with gr.Row():
with gr.Column(scale=2):
image_input_paligemma = gr.Image(type="pil", label="Input Image")
image_submit_btn_paligemma = gr.Button("Extract Text")
# Image examples
gr.Examples(
examples=[[img] for img, _ in examples],
inputs=[image_input_paligemma],
label="Example Images",
examples_per_page=8
)
with gr.Column(scale=3):
with gr.Tabs():
with gr.Tab("Extracted Text"):
image_text_output_paligemma = gr.Textbox(
lines=5,
label="Extracted Text",
show_copy_button=True,
rtl=True,
elem_classes="textbox1"
)
with gr.Tab("Detected Text Regions"):
image_bbox_output_paligemma = gr.Gallery(
label="Detected Text Regions",
show_label=True,
columns=2
)
with gr.Tab("PDF Input"):
with gr.Row():
with gr.Column(scale=2):
pdf_input_paligemma = gr.File(
label="Input PDF",
file_types=[".pdf"]
)
pdf_submit_btn_paligemma = gr.Button("Extract Text from PDF")
# PDF examples
gr.Examples(
examples=[
["example.pdf", "Example 1"],
],
inputs=[pdf_input_paligemma],
label="Example PDFs",
examples_per_page=8
)
with gr.Column(scale=3):
with gr.Tabs():
with gr.Tab("Extracted Text"):
pdf_text_output_paligemma = gr.Textbox(
lines=5,
label="Extracted Text",
show_copy_button=True,
rtl=True,
elem_classes="textbox1"
)
with gr.Tab("Detected Text Regions"):
pdf_bbox_output_paligemma = gr.Gallery(
label="Detected Text Regions",
show_label=True,
columns=2
)
with gr.Tab("Gemma"):
model_dropdown_gemma = gr.Dropdown(
choices=list(GEMMA_MODELS.keys()),
value=list(GEMMA_MODELS.keys())[0],
label="Select Gemma Model"
)
with gr.Tabs():
with gr.Tab("Image Input"):
with gr.Row():
with gr.Column(scale=2):
image_input_gemma = gr.Image(type="pil", label="Input Image")
image_submit_btn_gemma = gr.Button("Extract Text")
# Image examples
gr.Examples(
examples=[[img] for img, _ in examples],
inputs=[image_input_gemma],
label="Example Images",
examples_per_page=8
)
with gr.Column(scale=3):
with gr.Tabs():
with gr.Tab("Extracted Text"):
image_text_output_gemma = gr.Textbox(
lines=5,
label="Extracted Text",
show_copy_button=True,
rtl=True,
elem_classes="textbox1"
)
with gr.Tab("Detected Text Regions"):
image_bbox_output_gemma = gr.Gallery(
label="Detected Text Regions",
show_label=True,
columns=2
)
with gr.Tab("PDF Input"):
with gr.Row():
with gr.Column(scale=2):
pdf_input_gemma = gr.File(
label="Input PDF",
file_types=[".pdf"]
)
pdf_submit_btn_gemma = gr.Button("Extract Text from PDF")
# PDF examples
gr.Examples(
examples=[
["example.pdf", "Example 1"],
],
inputs=[pdf_input_gemma],
label="Example PDFs",
examples_per_page=8
)
with gr.Column(scale=3):
with gr.Tabs():
with gr.Tab("Extracted Text"):
pdf_text_output_gemma = gr.Textbox(
lines=5,
label="Extracted Text",
show_copy_button=True,
rtl=True,
elem_classes="textbox1"
)
with gr.Tab("Detected Text Regions"):
pdf_bbox_output_gemma = gr.Gallery(
label="Detected Text Regions",
show_label=True,
columns=2
)
# PaliGemma2 event handlers
image_submit_btn_paligemma.click(
fn=process_image_paligemma,
inputs=[model_dropdown_paligemma, image_input_paligemma],
outputs=[image_text_output_paligemma, image_bbox_output_paligemma]
)
pdf_submit_btn_paligemma.click(
fn=process_pdf_paligemma,
inputs=[pdf_input_paligemma, model_dropdown_paligemma],
outputs=[pdf_text_output_paligemma, pdf_bbox_output_paligemma]
)
# Gemma event handlers
image_submit_btn_gemma.click(
fn=process_image_gemma,
inputs=[model_dropdown_gemma, image_input_gemma],
outputs=[image_text_output_gemma, image_bbox_output_gemma]
)
pdf_submit_btn_gemma.click(
fn=process_pdf_gemma,
inputs=[pdf_input_gemma, model_dropdown_gemma],
outputs=[pdf_text_output_gemma, pdf_bbox_output_gemma]
)
# Function to install requirements
def install_requirements():
requirements_path = 'requirements.txt'
# Check if requirements.txt exists
if not os.path.exists(requirements_path):
print("Error: requirements.txt not found")
return False
try:
print("Installing requirements...")
# Using --no-cache-dir to avoid memory issues
subprocess.check_call([
sys.executable,
"-m",
"pip",
"install",
"-r",
requirements_path,
"--no-cache-dir"
])
print("Successfully installed all requirements")
return True
except subprocess.CalledProcessError as e:
print(f"Error installing requirements: {e}")
return False
except Exception as e:
print(f"Unexpected error: {e}")
return False
# Launch the app
if __name__ == "__main__":
# First install requirements
success = install_requirements()
if success:
print("All requirements installed successfully")
from transformers import PaliGemmaForConditionalGeneration, AutoProcessor
from peft import PeftModel, PeftConfig
# Load the first PaliGemma2 model by default
#paligemma_handler.load_model(list(PALIGEMMA_MODELS.keys())[0])
#demo.launch(server_name="0.0.0.0", server_port=7812)
demo.launch()
else:
print("Failed to install some requirements")