broadfield-dev commited on
Commit
f524940
·
verified ·
1 Parent(s): 434e44f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -20
app.py CHANGED
@@ -60,7 +60,7 @@ def stitch_images_in_grid(images: List[Image.Image], num_columns: int, page_orde
60
 
61
  return grid_image
62
 
63
- def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop_left, crop_right, hide_annotations, page_order, progress=gr.Progress()):
64
  pdf_input_source = None
65
  is_bytes = False
66
  source_name = "document"
@@ -114,26 +114,43 @@ def process_pdf(pdf_file, pdf_url, dpi, num_columns, crop_top, crop_bottom, crop
114
  else:
115
  cropped_images = images
116
 
117
- progress(0.7, desc=f"Stitching {len(cropped_images)} images together...")
 
 
 
118
 
119
- if num_columns > 1:
120
- stitched_image = stitch_images_in_grid(cropped_images, num_columns, page_order)
121
- else:
122
- stitched_image = stitch_images_vertically(cropped_images)
123
-
124
- if stitched_image is None:
125
- raise gr.Error("Image stitching failed.")
126
- logger.info("Image stitching complete.")
127
-
128
- progress(0.9, desc="Saving final image...")
129
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_") as tmp_file:
130
- stitched_image.save(tmp_file.name, "PNG")
131
- output_path = tmp_file.name
132
 
133
- logger.info(f"Final image saved to temporary path: {output_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  progress(1, desc="Done!")
135
 
136
- return output_path, output_path
137
 
138
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
139
  gr.Markdown(
@@ -153,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
153
 
154
  dpi_slider = gr.Slider(minimum=100, maximum=600, step=5, value=200, label="Image Resolution (DPI)")
155
  columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
 
156
 
157
  with gr.Accordion("Advanced Options", open=False):
158
  hide_annotations_toggle = gr.Checkbox(value=True, label="Hide PDF Annotations (Links/Highlights)", info="Turn this on to remove the colored boxes that can appear around links and references.")
@@ -168,8 +186,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
168
 
169
  with gr.Column(scale=2):
170
  gr.Markdown("## Output")
171
- output_image_preview = gr.Image(label="Stitched Image Preview", type="filepath", interactive=False, height=600)
172
- output_image_download = gr.File(label="Download Stitched Image", interactive=False)
173
 
174
  submit_btn.click(
175
  fn=process_pdf,
@@ -177,7 +195,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
177
  pdf_file_input,
178
  pdf_url_input,
179
  dpi_slider,
180
- columns_slider,
 
181
  crop_top,
182
  crop_bottom,
183
  crop_left,
 
60
 
61
  return grid_image
62
 
63
+ def process_pdf(pdf_file, pdf_url, dpi, num_columns, num_images, crop_top, crop_bottom, crop_left, crop_right, hide_annotations, page_order, progress=gr.Progress()):
64
  pdf_input_source = None
65
  is_bytes = False
66
  source_name = "document"
 
114
  else:
115
  cropped_images = images
116
 
117
+ progress(0.7, desc=f"Splitting {len(cropped_images)} pages into {num_images} image(s)...")
118
+
119
+ total_pages = len(cropped_images)
120
+ effective_num_images = min(num_images, total_pages)
121
 
122
+ chunk_size = math.ceil(total_pages / effective_num_images)
123
+ image_chunks = [cropped_images[i:i + chunk_size] for i in range(0, total_pages, chunk_size)]
124
+
125
+ output_paths = []
126
+ num_chunks = len(image_chunks)
127
+
128
+ for i, chunk in enumerate(image_chunks):
129
+ progress(0.75 + (0.2 * (i / num_chunks)), desc=f"Stitching image {i+1} of {num_chunks}...")
 
 
 
 
 
130
 
131
+ if not chunk:
132
+ continue
133
+
134
+ if num_columns > 1:
135
+ stitched_image = stitch_images_in_grid(chunk, num_columns, page_order)
136
+ else:
137
+ stitched_image = stitch_images_vertically(chunk)
138
+
139
+ if stitched_image is None:
140
+ logger.warning(f"Image stitching failed for chunk {i+1}.")
141
+ continue
142
+
143
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png", prefix=f"{source_name}_stitched_{i+1}_") as tmp_file:
144
+ stitched_image.save(tmp_file.name, "PNG")
145
+ output_paths.append(tmp_file.name)
146
+
147
+ if not output_paths:
148
+ raise gr.Error("Image stitching failed for all pages.")
149
+
150
+ logger.info(f"Final images saved to temporary paths: {output_paths}")
151
  progress(1, desc="Done!")
152
 
153
+ return output_paths, output_paths
154
 
155
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
156
  gr.Markdown(
 
170
 
171
  dpi_slider = gr.Slider(minimum=100, maximum=600, step=5, value=200, label="Image Resolution (DPI)")
172
  columns_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Columns")
173
+ num_images_slider = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Output Images", info="Splits the PDF pages into multiple output images.")
174
 
175
  with gr.Accordion("Advanced Options", open=False):
176
  hide_annotations_toggle = gr.Checkbox(value=True, label="Hide PDF Annotations (Links/Highlights)", info="Turn this on to remove the colored boxes that can appear around links and references.")
 
186
 
187
  with gr.Column(scale=2):
188
  gr.Markdown("## Output")
189
+ output_image_preview = gr.Gallery(label="Stitched Image(s) Preview", interactive=False, height=600)
190
+ output_image_download = gr.File(label="Download Stitched Image(s)", interactive=False, file_count="multiple")
191
 
192
  submit_btn.click(
193
  fn=process_pdf,
 
195
  pdf_file_input,
196
  pdf_url_input,
197
  dpi_slider,
198
+ columns_slider,
199
+ num_images_slider,
200
  crop_top,
201
  crop_bottom,
202
  crop_left,