Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,7 +60,7 @@ def stitch_images_in_grid(images: List[Image.Image], num_columns: int, page_orde
|
|
| 60 |
|
| 61 |
return grid_image
|
| 62 |
|
| 63 |
-
def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop_left, crop_right, hide_annotations, page_order, progress=gr.Progress()):
|
| 64 |
pdf_input_source = None
|
| 65 |
is_bytes = False
|
| 66 |
source_name = "document"
|
|
@@ -114,26 +114,43 @@ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop
|
|
| 114 |
else:
|
| 115 |
cropped_images = images
|
| 116 |
|
| 117 |
-
progress(0.7, desc=f"
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
progress(0.9, desc="Saving final image...")
|
| 129 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_") as tmp_file:
|
| 130 |
-
stitched_image.save(tmp_file.name, "PNG")
|
| 131 |
-
output_path = tmp_file.name
|
| 132 |
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
progress(1, desc="Done!")
|
| 135 |
|
| 136 |
-
return
|
| 137 |
|
| 138 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 139 |
gr.Markdown(
|
|
@@ -153,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 153 |
|
| 154 |
dpi_slider = gr.Slider(minimum=100, maximum=600, step=5, value=200, label="Image Resolution (DPI)")
|
| 155 |
columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
|
|
|
|
| 156 |
|
| 157 |
with gr.Accordion("Advanced Options", open=False):
|
| 158 |
hide_annotations_toggle = gr.Checkbox(value=True, label="Hide PDF Annotations (Links/Highlights)", info="Turn this on to remove the colored boxes that can appear around links and references.")
|
|
@@ -168,8 +186,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 168 |
|
| 169 |
with gr.Column(scale=2):
|
| 170 |
gr.Markdown("## Output")
|
| 171 |
-
output_image_preview = gr.
|
| 172 |
-
output_image_download = gr.File(label="Download Stitched Image", interactive=False)
|
| 173 |
|
| 174 |
submit_btn.click(
|
| 175 |
fn=process_pdf,
|
|
@@ -177,7 +195,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 177 |
pdf_file_input,
|
| 178 |
pdf_url_input,
|
| 179 |
dpi_slider,
|
| 180 |
-
columns_slider,
|
|
|
|
| 181 |
crop_top,
|
| 182 |
crop_bottom,
|
| 183 |
crop_left,
|
|
|
|
| 60 |
|
| 61 |
return grid_image
|
| 62 |
|
| 63 |
+
def process_pdf(pdf_file, pdf_url, dpi, num_columns, num_images, crop_top, crop_bottom, crop_left, crop_right, hide_annotations, page_order, progress=gr.Progress()):
|
| 64 |
pdf_input_source = None
|
| 65 |
is_bytes = False
|
| 66 |
source_name = "document"
|
|
|
|
| 114 |
else:
|
| 115 |
cropped_images = images
|
| 116 |
|
| 117 |
+
progress(0.7, desc=f"Splitting {len(cropped_images)} pages into {num_images} image(s)...")
|
| 118 |
+
|
| 119 |
+
total_pages = len(cropped_images)
|
| 120 |
+
effective_num_images = min(num_images, total_pages)
|
| 121 |
|
| 122 |
+
chunk_size = math.ceil(total_pages / effective_num_images)
|
| 123 |
+
image_chunks = [cropped_images[i:i + chunk_size] for i in range(0, total_pages, chunk_size)]
|
| 124 |
+
|
| 125 |
+
output_paths = []
|
| 126 |
+
num_chunks = len(image_chunks)
|
| 127 |
+
|
| 128 |
+
for i, chunk in enumerate(image_chunks):
|
| 129 |
+
progress(0.75 + (0.2 * (i / num_chunks)), desc=f"Stitching image {i+1} of {num_chunks}...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
if not chunk:
|
| 132 |
+
continue
|
| 133 |
+
|
| 134 |
+
if num_columns > 1:
|
| 135 |
+
stitched_image = stitch_images_in_grid(chunk, num_columns, page_order)
|
| 136 |
+
else:
|
| 137 |
+
stitched_image = stitch_images_vertically(chunk)
|
| 138 |
+
|
| 139 |
+
if stitched_image is None:
|
| 140 |
+
logger.warning(f"Image stitching failed for chunk {i+1}.")
|
| 141 |
+
continue
|
| 142 |
+
|
| 143 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_{i+1}_") as tmp_file:
|
| 144 |
+
stitched_image.save(tmp_file.name, "PNG")
|
| 145 |
+
output_paths.append(tmp_file.name)
|
| 146 |
+
|
| 147 |
+
if not output_paths:
|
| 148 |
+
raise gr.Error("Image stitching failed for all pages.")
|
| 149 |
+
|
| 150 |
+
logger.info(f"Final images saved to temporary paths: {output_paths}")
|
| 151 |
progress(1, desc="Done!")
|
| 152 |
|
| 153 |
+
return output_paths, output_paths
|
| 154 |
|
| 155 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 156 |
gr.Markdown(
|
|
|
|
| 170 |
|
| 171 |
dpi_slider = gr.Slider(minimum=100, maximum=600, step=5, value=200, label="Image Resolution (DPI)")
|
| 172 |
columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
|
| 173 |
+
num_images_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Output Images", info="Splits the PDF pages into multiple output images.")
|
| 174 |
|
| 175 |
with gr.Accordion("Advanced Options", open=False):
|
| 176 |
hide_annotations_toggle = gr.Checkbox(value=True, label="Hide PDF Annotations (Links/Highlights)", info="Turn this on to remove the colored boxes that can appear around links and references.")
|
|
|
|
| 186 |
|
| 187 |
with gr.Column(scale=2):
|
| 188 |
gr.Markdown("## Output")
|
| 189 |
+
output_image_preview = gr.Gallery(label="Stitched Image(s) Preview", interactive=False, height=600)
|
| 190 |
+
output_image_download = gr.File(label="Download Stitched Image(s)", interactive=False, file_count="multiple")
|
| 191 |
|
| 192 |
submit_btn.click(
|
| 193 |
fn=process_pdf,
|
|
|
|
| 195 |
pdf_file_input,
|
| 196 |
pdf_url_input,
|
| 197 |
dpi_slider,
|
| 198 |
+
columns_slider,
|
| 199 |
+
num_images_slider,
|
| 200 |
crop_top,
|
| 201 |
crop_bottom,
|
| 202 |
crop_left,
|