Spaces:

Akarrahe
/

IQKillerv2

No application file

App Files Files Community

IQKillerv2 / pdf_upload_tool.py

AvikalpK

🚀 Enhanced IQKiller with Next.js Vercel version

0939a57 6 months ago

raw

history blame contribute delete

4.97 kB

	#!/usr/bin/env python3
	"""
	Standalone PDF Resume Text Extractor for IQKiller
	Run this script to extract text from your resume PDF, then copy the output into IQKiller
	"""

	import sys
	import os
	from pathlib import Path

	# PDF processing imports with error handling
	try:
	import PyPDF2
	PYPDF2_AVAILABLE = True
	except ImportError:
	PYPDF2_AVAILABLE = False

	try:
	import pdfplumber
	PDFPLUMBER_AVAILABLE = True
	except ImportError:
	PDFPLUMBER_AVAILABLE = False

	def extract_text_from_pdf(pdf_path: str) -> str:
	"""Extract text from PDF using available libraries"""

	if not os.path.exists(pdf_path):
	return f"❌ Error: File '{pdf_path}' not found!"

	extracted_text = ""
	errors = []

	# Try pdfplumber first (usually better for formatted text)
	if PDFPLUMBER_AVAILABLE:
	try:
	print("🔄 Trying pdfplumber extraction...")
	with pdfplumber.open(pdf_path) as pdf:
	text_parts = []
	for page_num, page in enumerate(pdf.pages, 1):
	page_text = page.extract_text()
	if page_text:
	text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")

	if text_parts:
	extracted_text = "\n".join(text_parts)
	print(f"✅ Successfully extracted {len(extracted_text)} characters using pdfplumber")
	return extracted_text
	except Exception as e:
	errors.append(f"pdfplumber failed: {e}")
	print(f"⚠️ pdfplumber failed: {e}")

	# Fallback to PyPDF2
	if PYPDF2_AVAILABLE and not extracted_text:
	try:
	print("🔄 Trying PyPDF2 extraction...")
	with open(pdf_path, 'rb') as file:
	reader = PyPDF2.PdfReader(file)
	text_parts = []

	for page_num, page in enumerate(reader.pages, 1):
	page_text = page.extract_text()
	if page_text:
	text_parts.append(f"--- Page {page_num} ---\n{page_text}\n")

	if text_parts:
	extracted_text = "\n".join(text_parts)
	print(f"✅ Successfully extracted {len(extracted_text)} characters using PyPDF2")
	return extracted_text
	except Exception as e:
	errors.append(f"PyPDF2 failed: {e}")
	print(f"⚠️ PyPDF2 failed: {e}")

	# If no libraries available or all failed
	if not PDFPLUMBER_AVAILABLE and not PYPDF2_AVAILABLE:
	return """❌ Error: No PDF processing libraries available!

	Please install PDF processing libraries:
	pip install PyPDF2 pdfplumber

	Then run this script again."""

	if errors and not extracted_text:
	return f"""❌ Error: Could not extract text from PDF!

	Errors encountered:
	{chr(10).join(f"• {error}" for error in errors)}

	Try:
	1. Ensuring the PDF is not password-protected
	2. Converting the PDF to a simpler format
	3. Copy-pasting text manually from the PDF"""

	return "❌ No text could be extracted from this PDF."

	def main():
	"""Main function to run the PDF text extractor"""

	print("🎯 IQKiller PDF Resume Text Extractor")
	print("=" * 50)

	# Check if file path provided as argument
	if len(sys.argv) > 1:
	pdf_path = sys.argv[1]
	else:
	# Interactive mode
	print("📁 Please provide the path to your resume PDF:")
	print(" Example: /Users/username/Documents/resume.pdf")
	print(" Or drag and drop your PDF file here:")
	pdf_path = input("\nPDF Path: ").strip().strip('"').strip("'")

	if not pdf_path:
	print("❌ No file path provided!")
	return

	# Extract text
	print(f"\n🔄 Processing: {pdf_path}")
	print("-" * 50)

	extracted_text = extract_text_from_pdf(pdf_path)

	print("\n" + "=" * 50)
	print("📄 EXTRACTED TEXT")
	print("=" * 50)
	print(extracted_text)
	print("=" * 50)

	# Instructions for use
	print("\n📋 INSTRUCTIONS:")
	print("1. Copy the extracted text above")
	print("2. Open IQKiller in your browser: http://localhost:7860")
	print("3. Paste the text into the 'Resume Text' field")
	print("4. Add your job URL or description")
	print("5. Generate your personalized interview guide!")

	# Save to file option
	save_option = input("\n💾 Save extracted text to file? (y/n): ").lower().strip()
	if save_option in ['y', 'yes']:
	output_file = Path(pdf_path).stem + "_extracted_text.txt"
	try:
	with open(output_file, 'w', encoding='utf-8') as f:
	f.write(extracted_text)
	print(f"✅ Text saved to: {output_file}")
	except Exception as e:
	print(f"❌ Could not save file: {e}")

	if __name__ == "__main__":
	main()