pierreguillou commited on
Commit
18f46da
·
verified ·
1 Parent(s): 89c6707

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -13,7 +13,7 @@ from docling.datamodel.base_models import InputFormat
13
  # --- START OF OCR CONFIGURATION ---
14
  # Configure DocLing converter with Tesseract OCR enabled
15
  pdf_options = PdfPipelineOptions(
16
- do_ocr=True,
17
  ocr_model="tesseract",
18
  ocr_languages=[
19
  "eng", "fra", "deu", "spa", "ita", "por", "nld", "pol", "tur", "ces", "rus", "ukr", "ell", "ron", "hun",
@@ -165,7 +165,7 @@ def reset_form():
165
  # Gradio Interface
166
  with gr.Blocks(title="LLM-Ready Document Converter") as app:
167
 
168
- gr.Markdown("# 📄 Document Converter to LLM-ready")
169
  gr.Markdown("**HOWTO** : Upload a document and get 4 output files: Docling JSON, TXT, Markdown, and HTML")
170
  gr.Markdown("**EXPLANATION** : This app transforms various document formats (like TXT, standard and scanned PDFs, DOCX, PPT, CSV, XLS, XLSX) into structured, machine-readable outputs optimized for Large Language Models (LLMs). It extracts and converts content into clean formats such as DocLing JSON (for document structure), plain text, Markdown, and HTML making it easier for AI models to process, analyze, or generate responses from complex documents without losing key details like layout or formatting. Essentially, it's a bridge between raw files and AI-ready data.")
171
 
@@ -209,7 +209,5 @@ with gr.Blocks(title="LLM-Ready Document Converter") as app:
209
 
210
  if __name__ == "__main__":
211
  app.launch(
212
- # server_name="0.0.0.0",
213
- # server_port=7860,
214
  share=True
215
  )
 
13
  # --- START OF OCR CONFIGURATION ---
14
  # Configure DocLing converter with Tesseract OCR enabled
15
  pdf_options = PdfPipelineOptions(
16
+ do_ocr=False,
17
  ocr_model="tesseract",
18
  ocr_languages=[
19
  "eng", "fra", "deu", "spa", "ita", "por", "nld", "pol", "tur", "ces", "rus", "ukr", "ell", "ron", "hun",
 
165
  # Gradio Interface
166
  with gr.Blocks(title="LLM-Ready Document Converter") as app:
167
 
168
+ gr.Markdown("# 📄 LLM-Ready Document Converter")
169
  gr.Markdown("**HOWTO** : Upload a document and get 4 output files: Docling JSON, TXT, Markdown, and HTML")
170
  gr.Markdown("**EXPLANATION** : This app transforms various document formats (like TXT, standard and scanned PDFs, DOCX, PPT, CSV, XLS, XLSX) into structured, machine-readable outputs optimized for Large Language Models (LLMs). It extracts and converts content into clean formats such as DocLing JSON (for document structure), plain text, Markdown, and HTML making it easier for AI models to process, analyze, or generate responses from complex documents without losing key details like layout or formatting. Essentially, it's a bridge between raw files and AI-ready data.")
171
 
 
209
 
210
  if __name__ == "__main__":
211
  app.launch(
 
 
212
  share=True
213
  )