#!/usr/bin/env python3 """ Standalone PDF Resume Text Extractor for IQKiller Run this script to extract text from your resume PDF, then copy the output into IQKiller """ import sys import os from pathlib import Path # PDF processing imports with error handling try: import PyPDF2 PYPDF2_AVAILABLE = True except ImportError: PYPDF2_AVAILABLE = False try: import pdfplumber PDFPLUMBER_AVAILABLE = True except ImportError: PDFPLUMBER_AVAILABLE = False def extract_text_from_pdf(pdf_path: str) -> str: """Extract text from PDF using available libraries""" if not os.path.exists(pdf_path): return f"āŒ Error: File '{pdf_path}' not found!" extracted_text = "" errors = [] # Try pdfplumber first (usually better for formatted text) if PDFPLUMBER_AVAILABLE: try: print("šŸ”„ Trying pdfplumber extraction...") with pdfplumber.open(pdf_path) as pdf: text_parts = [] for page_num, page in enumerate(pdf.pages, 1): page_text = page.extract_text() if page_text: text_parts.append(f"--- Page {page_num} ---\n{page_text}\n") if text_parts: extracted_text = "\n".join(text_parts) print(f"āœ… Successfully extracted {len(extracted_text)} characters using pdfplumber") return extracted_text except Exception as e: errors.append(f"pdfplumber failed: {e}") print(f"āš ļø pdfplumber failed: {e}") # Fallback to PyPDF2 if PYPDF2_AVAILABLE and not extracted_text: try: print("šŸ”„ Trying PyPDF2 extraction...") with open(pdf_path, 'rb') as file: reader = PyPDF2.PdfReader(file) text_parts = [] for page_num, page in enumerate(reader.pages, 1): page_text = page.extract_text() if page_text: text_parts.append(f"--- Page {page_num} ---\n{page_text}\n") if text_parts: extracted_text = "\n".join(text_parts) print(f"āœ… Successfully extracted {len(extracted_text)} characters using PyPDF2") return extracted_text except Exception as e: errors.append(f"PyPDF2 failed: {e}") print(f"āš ļø PyPDF2 failed: {e}") # If no libraries available or all failed if not PDFPLUMBER_AVAILABLE and not PYPDF2_AVAILABLE: return """āŒ Error: No PDF processing libraries available! Please install PDF processing libraries: pip install PyPDF2 pdfplumber Then run this script again.""" if errors and not extracted_text: return f"""āŒ Error: Could not extract text from PDF! Errors encountered: {chr(10).join(f"• {error}" for error in errors)} Try: 1. Ensuring the PDF is not password-protected 2. Converting the PDF to a simpler format 3. Copy-pasting text manually from the PDF""" return "āŒ No text could be extracted from this PDF." def main(): """Main function to run the PDF text extractor""" print("šŸŽÆ IQKiller PDF Resume Text Extractor") print("=" * 50) # Check if file path provided as argument if len(sys.argv) > 1: pdf_path = sys.argv[1] else: # Interactive mode print("šŸ“ Please provide the path to your resume PDF:") print(" Example: /Users/username/Documents/resume.pdf") print(" Or drag and drop your PDF file here:") pdf_path = input("\nPDF Path: ").strip().strip('"').strip("'") if not pdf_path: print("āŒ No file path provided!") return # Extract text print(f"\nšŸ”„ Processing: {pdf_path}") print("-" * 50) extracted_text = extract_text_from_pdf(pdf_path) print("\n" + "=" * 50) print("šŸ“„ EXTRACTED TEXT") print("=" * 50) print(extracted_text) print("=" * 50) # Instructions for use print("\nšŸ“‹ INSTRUCTIONS:") print("1. Copy the extracted text above") print("2. Open IQKiller in your browser: http://localhost:7860") print("3. Paste the text into the 'Resume Text' field") print("4. Add your job URL or description") print("5. Generate your personalized interview guide!") # Save to file option save_option = input("\nšŸ’¾ Save extracted text to file? (y/n): ").lower().strip() if save_option in ['y', 'yes']: output_file = Path(pdf_path).stem + "_extracted_text.txt" try: with open(output_file, 'w', encoding='utf-8') as f: f.write(extracted_text) print(f"āœ… Text saved to: {output_file}") except Exception as e: print(f"āŒ Could not save file: {e}") if __name__ == "__main__": main()