Spaces:

alakxender
/

dhivehi-ocr

Running on Zero

dhivehi-ocr / app.py

97bb8f1 6 months ago

12 kB

	import spaces
	import gradio as gr
	import os
	import sys
	import subprocess
	import numpy as np
	from paligemma2 import PaliGemma2Handler, MODELS as PALIGEMMA_MODELS
	from gemma import GemmaHandler, MODELS as GEMMA_MODELS

	# Initialize model handlers
	paligemma_handler = PaliGemma2Handler()
	gemma_handler = GemmaHandler()

	@spaces.GPU
	def process_image_paligemma(model_name, image, progress=gr.Progress()):
	"""Process a single image with PaliGemma2"""
	return paligemma_handler.process_image(model_name, image, progress)

	@spaces.GPU
	def process_image_gemma(model_name, image, progress=gr.Progress()):
	"""Process a single image with Gemma"""
	return gemma_handler.process_image(model_name, image, progress)

	@spaces.GPU
	def process_pdf_paligemma(pdf_path, model_name, progress=gr.Progress()):
	"""Process a PDF file with PaliGemma2"""
	return paligemma_handler.process_pdf(pdf_path, model_name, progress)

	@spaces.GPU
	def process_pdf_gemma(pdf_path, model_name, progress=gr.Progress()):
	"""Process a PDF file with Gemma"""
	return gemma_handler.process_pdf(pdf_path, model_name, progress)

	# Example images with descriptions
	examples = [
	["type_1_sl.png", "Typed Dhivehi text sample 1"],
	["type_2_sl.png", "Typed Dhivehi text sample 2"],
	["hw_1_sl.png", "Handwritten Dhivehi text sample 1"],
	["hw_2_sl.jpg", "Handwritten Dhivehi text sample 2"],
	["hw_3_sl.png", "Handwritten Dhivehi text sample 3"],
	["hw_4_sl.png", "Handwritten Dhivehi text sample 4"],
	["ml.png", "Multi-line Dhivehi text sample"]
	]

	css = """
	.textbox1 textarea {
	font-size: 18px !important;
	font-family: 'MV_Faseyha', 'Faruma', 'A_Faruma' !important;
	line-height: 1.8 !important;
	}
	.textbox2 textarea {
	display: none;
	}
	"""

	with gr.Blocks(title="Dhivehi Image to Text",css=css) as demo:
	gr.Markdown("# Dhivehi Image to Text")
	gr.Markdown("Dhivehi Image to Text experimental finetunes")

	with gr.Tabs():
	with gr.Tab("PaliGemma2"):
	model_dropdown_paligemma = gr.Dropdown(
	choices=list(PALIGEMMA_MODELS.keys()),
	value=list(PALIGEMMA_MODELS.keys())[0],
	label="Select PaliGemma2 Model"
	)

	with gr.Tabs():
	with gr.Tab("Image Input"):
	with gr.Row():
	with gr.Column(scale=2):
	image_input_paligemma = gr.Image(type="pil", label="Input Image")
	image_submit_btn_paligemma = gr.Button("Extract Text")

	# Image examples
	gr.Examples(
	examples=[[img] for img, _ in examples],
	inputs=[image_input_paligemma],
	label="Example Images",
	examples_per_page=8
	)

	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("Extracted Text"):
	image_text_output_paligemma = gr.Textbox(
	lines=5,
	label="Extracted Text",
	show_copy_button=True,
	rtl=True,
	elem_classes="textbox1"
	)

	with gr.Tab("Detected Text Regions"):
	image_bbox_output_paligemma = gr.Gallery(
	label="Detected Text Regions",
	show_label=True,
	columns=2
	)

	with gr.Tab("PDF Input"):
	with gr.Row():
	with gr.Column(scale=2):
	pdf_input_paligemma = gr.File(
	label="Input PDF",
	file_types=[".pdf"]
	)
	pdf_submit_btn_paligemma = gr.Button("Extract Text from PDF")

	# PDF examples
	gr.Examples(
	examples=[
	["example.pdf", "Example 1"],
	],
	inputs=[pdf_input_paligemma],
	label="Example PDFs",
	examples_per_page=8
	)

	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("Extracted Text"):
	pdf_text_output_paligemma = gr.Textbox(
	lines=5,
	label="Extracted Text",
	show_copy_button=True,
	rtl=True,
	elem_classes="textbox1"
	)

	with gr.Tab("Detected Text Regions"):
	pdf_bbox_output_paligemma = gr.Gallery(
	label="Detected Text Regions",
	show_label=True,
	columns=2
	)

	with gr.Tab("Gemma"):
	model_dropdown_gemma = gr.Dropdown(
	choices=list(GEMMA_MODELS.keys()),
	value=list(GEMMA_MODELS.keys())[0],
	label="Select Gemma Model"
	)

	with gr.Tabs():
	with gr.Tab("Image Input"):
	with gr.Row():
	with gr.Column(scale=2):
	image_input_gemma = gr.Image(type="pil", label="Input Image")
	image_submit_btn_gemma = gr.Button("Extract Text")

	# Image examples
	gr.Examples(
	examples=[[img] for img, _ in examples],
	inputs=[image_input_gemma],
	label="Example Images",
	examples_per_page=8
	)

	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("Extracted Text"):
	image_text_output_gemma = gr.Textbox(
	lines=5,
	label="Extracted Text",
	show_copy_button=True,
	rtl=True,
	elem_classes="textbox1"
	)

	with gr.Tab("Detected Text Regions"):
	image_bbox_output_gemma = gr.Gallery(
	label="Detected Text Regions",
	show_label=True,
	columns=2
	)

	with gr.Tab("PDF Input"):
	with gr.Row():
	with gr.Column(scale=2):
	pdf_input_gemma = gr.File(
	label="Input PDF",
	file_types=[".pdf"]
	)
	pdf_submit_btn_gemma = gr.Button("Extract Text from PDF")

	# PDF examples
	gr.Examples(
	examples=[
	["example.pdf", "Example 1"],
	],
	inputs=[pdf_input_gemma],
	label="Example PDFs",
	examples_per_page=8
	)

	with gr.Column(scale=3):
	with gr.Tabs():
	with gr.Tab("Extracted Text"):
	pdf_text_output_gemma = gr.Textbox(
	lines=5,
	label="Extracted Text",
	show_copy_button=True,
	rtl=True,
	elem_classes="textbox1"
	)

	with gr.Tab("Detected Text Regions"):
	pdf_bbox_output_gemma = gr.Gallery(
	label="Detected Text Regions",
	show_label=True,
	columns=2
	)

	# PaliGemma2 event handlers
	image_submit_btn_paligemma.click(
	fn=process_image_paligemma,
	inputs=[model_dropdown_paligemma, image_input_paligemma],
	outputs=[image_text_output_paligemma, image_bbox_output_paligemma]
	)

	pdf_submit_btn_paligemma.click(
	fn=process_pdf_paligemma,
	inputs=[pdf_input_paligemma, model_dropdown_paligemma],
	outputs=[pdf_text_output_paligemma, pdf_bbox_output_paligemma]
	)

	# Gemma event handlers
	image_submit_btn_gemma.click(
	fn=process_image_gemma,
	inputs=[model_dropdown_gemma, image_input_gemma],
	outputs=[image_text_output_gemma, image_bbox_output_gemma]
	)

	pdf_submit_btn_gemma.click(
	fn=process_pdf_gemma,
	inputs=[pdf_input_gemma, model_dropdown_gemma],
	outputs=[pdf_text_output_gemma, pdf_bbox_output_gemma]
	)

	# Function to install requirements
	def install_requirements():
	requirements_path = 'requirements.txt'

	# Check if requirements.txt exists
	if not os.path.exists(requirements_path):
	print("Error: requirements.txt not found")
	return False

	try:
	print("Installing requirements...")
	# Using --no-cache-dir to avoid memory issues
	subprocess.check_call([
	sys.executable,
	"-m",
	"pip",
	"install",
	"-r",
	requirements_path,
	"--no-cache-dir"
	])
	print("Successfully installed all requirements")
	return True
	except subprocess.CalledProcessError as e:
	print(f"Error installing requirements: {e}")
	return False
	except Exception as e:
	print(f"Unexpected error: {e}")
	return False

	# Launch the app
	if __name__ == "__main__":
	# First install requirements
	success = install_requirements()
	if success:
	print("All requirements installed successfully")

	from transformers import PaliGemmaForConditionalGeneration, AutoProcessor
	from peft import PeftModel, PeftConfig

	# Load the first PaliGemma2 model by default
	#paligemma_handler.load_model(list(PALIGEMMA_MODELS.keys())[0])

	#demo.launch(server_name="0.0.0.0", server_port=7812)
	demo.launch()
	else:
	print("Failed to install some requirements")