Spaces:
No application file
No application file
| #!/usr/bin/env python3 | |
| """ | |
| Standalone PDF Resume Text Extractor for IQKiller | |
| Run this script to extract text from your resume PDF, then copy the output into IQKiller | |
| """ | |
| import sys | |
| import os | |
| from pathlib import Path | |
| # PDF processing imports with error handling | |
| try: | |
| import PyPDF2 | |
| PYPDF2_AVAILABLE = True | |
| except ImportError: | |
| PYPDF2_AVAILABLE = False | |
| try: | |
| import pdfplumber | |
| PDFPLUMBER_AVAILABLE = True | |
| except ImportError: | |
| PDFPLUMBER_AVAILABLE = False | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| """Extract text from PDF using available libraries""" | |
| if not os.path.exists(pdf_path): | |
| return f"β Error: File '{pdf_path}' not found!" | |
| extracted_text = "" | |
| errors = [] | |
| # Try pdfplumber first (usually better for formatted text) | |
| if PDFPLUMBER_AVAILABLE: | |
| try: | |
| print("π Trying pdfplumber extraction...") | |
| with pdfplumber.open(pdf_path) as pdf: | |
| text_parts = [] | |
| for page_num, page in enumerate(pdf.pages, 1): | |
| page_text = page.extract_text() | |
| if page_text: | |
| text_parts.append(f"--- Page {page_num} ---\n{page_text}\n") | |
| if text_parts: | |
| extracted_text = "\n".join(text_parts) | |
| print(f"β Successfully extracted {len(extracted_text)} characters using pdfplumber") | |
| return extracted_text | |
| except Exception as e: | |
| errors.append(f"pdfplumber failed: {e}") | |
| print(f"β οΈ pdfplumber failed: {e}") | |
| # Fallback to PyPDF2 | |
| if PYPDF2_AVAILABLE and not extracted_text: | |
| try: | |
| print("π Trying PyPDF2 extraction...") | |
| with open(pdf_path, 'rb') as file: | |
| reader = PyPDF2.PdfReader(file) | |
| text_parts = [] | |
| for page_num, page in enumerate(reader.pages, 1): | |
| page_text = page.extract_text() | |
| if page_text: | |
| text_parts.append(f"--- Page {page_num} ---\n{page_text}\n") | |
| if text_parts: | |
| extracted_text = "\n".join(text_parts) | |
| print(f"β Successfully extracted {len(extracted_text)} characters using PyPDF2") | |
| return extracted_text | |
| except Exception as e: | |
| errors.append(f"PyPDF2 failed: {e}") | |
| print(f"β οΈ PyPDF2 failed: {e}") | |
| # If no libraries available or all failed | |
| if not PDFPLUMBER_AVAILABLE and not PYPDF2_AVAILABLE: | |
| return """β Error: No PDF processing libraries available! | |
| Please install PDF processing libraries: | |
| pip install PyPDF2 pdfplumber | |
| Then run this script again.""" | |
| if errors and not extracted_text: | |
| return f"""β Error: Could not extract text from PDF! | |
| Errors encountered: | |
| {chr(10).join(f"β’ {error}" for error in errors)} | |
| Try: | |
| 1. Ensuring the PDF is not password-protected | |
| 2. Converting the PDF to a simpler format | |
| 3. Copy-pasting text manually from the PDF""" | |
| return "β No text could be extracted from this PDF." | |
| def main(): | |
| """Main function to run the PDF text extractor""" | |
| print("π― IQKiller PDF Resume Text Extractor") | |
| print("=" * 50) | |
| # Check if file path provided as argument | |
| if len(sys.argv) > 1: | |
| pdf_path = sys.argv[1] | |
| else: | |
| # Interactive mode | |
| print("π Please provide the path to your resume PDF:") | |
| print(" Example: /Users/username/Documents/resume.pdf") | |
| print(" Or drag and drop your PDF file here:") | |
| pdf_path = input("\nPDF Path: ").strip().strip('"').strip("'") | |
| if not pdf_path: | |
| print("β No file path provided!") | |
| return | |
| # Extract text | |
| print(f"\nπ Processing: {pdf_path}") | |
| print("-" * 50) | |
| extracted_text = extract_text_from_pdf(pdf_path) | |
| print("\n" + "=" * 50) | |
| print("π EXTRACTED TEXT") | |
| print("=" * 50) | |
| print(extracted_text) | |
| print("=" * 50) | |
| # Instructions for use | |
| print("\nπ INSTRUCTIONS:") | |
| print("1. Copy the extracted text above") | |
| print("2. Open IQKiller in your browser: http://localhost:7860") | |
| print("3. Paste the text into the 'Resume Text' field") | |
| print("4. Add your job URL or description") | |
| print("5. Generate your personalized interview guide!") | |
| # Save to file option | |
| save_option = input("\nπΎ Save extracted text to file? (y/n): ").lower().strip() | |
| if save_option in ['y', 'yes']: | |
| output_file = Path(pdf_path).stem + "_extracted_text.txt" | |
| try: | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| f.write(extracted_text) | |
| print(f"β Text saved to: {output_file}") | |
| except Exception as e: | |
| print(f"β Could not save file: {e}") | |
| if __name__ == "__main__": | |
| main() |