Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from docling.datamodel.base_models import InputFormat
|
|
| 13 |
# --- START OF OCR CONFIGURATION ---
|
| 14 |
# Configure DocLing converter with Tesseract OCR enabled
|
| 15 |
pdf_options = PdfPipelineOptions(
|
| 16 |
-
do_ocr=
|
| 17 |
ocr_model="tesseract",
|
| 18 |
ocr_languages=[
|
| 19 |
"eng", "fra", "deu", "spa", "ita", "por", "nld", "pol", "tur", "ces", "rus", "ukr", "ell", "ron", "hun",
|
|
@@ -165,7 +165,7 @@ def reset_form():
|
|
| 165 |
# Gradio Interface
|
| 166 |
with gr.Blocks(title="LLM-Ready Document Converter") as app:
|
| 167 |
|
| 168 |
-
gr.Markdown("# 📄 Document Converter
|
| 169 |
gr.Markdown("**HOWTO** : Upload a document and get 4 output files: Docling JSON, TXT, Markdown, and HTML")
|
| 170 |
gr.Markdown("**EXPLANATION** : This app transforms various document formats (like TXT, standard and scanned PDFs, DOCX, PPT, CSV, XLS, XLSX) into structured, machine-readable outputs optimized for Large Language Models (LLMs). It extracts and converts content into clean formats such as DocLing JSON (for document structure), plain text, Markdown, and HTML making it easier for AI models to process, analyze, or generate responses from complex documents without losing key details like layout or formatting. Essentially, it's a bridge between raw files and AI-ready data.")
|
| 171 |
|
|
@@ -209,7 +209,5 @@ with gr.Blocks(title="LLM-Ready Document Converter") as app:
|
|
| 209 |
|
| 210 |
if __name__ == "__main__":
|
| 211 |
app.launch(
|
| 212 |
-
# server_name="0.0.0.0",
|
| 213 |
-
# server_port=7860,
|
| 214 |
share=True
|
| 215 |
)
|
|
|
|
| 13 |
# --- START OF OCR CONFIGURATION ---
|
| 14 |
# Configure DocLing converter with Tesseract OCR enabled
|
| 15 |
pdf_options = PdfPipelineOptions(
|
| 16 |
+
do_ocr=False,
|
| 17 |
ocr_model="tesseract",
|
| 18 |
ocr_languages=[
|
| 19 |
"eng", "fra", "deu", "spa", "ita", "por", "nld", "pol", "tur", "ces", "rus", "ukr", "ell", "ron", "hun",
|
|
|
|
| 165 |
# Gradio Interface
|
| 166 |
with gr.Blocks(title="LLM-Ready Document Converter") as app:
|
| 167 |
|
| 168 |
+
gr.Markdown("# 📄 LLM-Ready Document Converter")
|
| 169 |
gr.Markdown("**HOWTO** : Upload a document and get 4 output files: Docling JSON, TXT, Markdown, and HTML")
|
| 170 |
gr.Markdown("**EXPLANATION** : This app transforms various document formats (like TXT, standard and scanned PDFs, DOCX, PPT, CSV, XLS, XLSX) into structured, machine-readable outputs optimized for Large Language Models (LLMs). It extracts and converts content into clean formats such as DocLing JSON (for document structure), plain text, Markdown, and HTML making it easier for AI models to process, analyze, or generate responses from complex documents without losing key details like layout or formatting. Essentially, it's a bridge between raw files and AI-ready data.")
|
| 171 |
|
|
|
|
| 209 |
|
| 210 |
if __name__ == "__main__":
|
| 211 |
app.launch(
|
|
|
|
|
|
|
| 212 |
share=True
|
| 213 |
)
|