Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
| 4 |
import csv
|
| 5 |
import tempfile
|
| 6 |
import time
|
|
|
|
|
|
|
| 7 |
from typing import List, Dict, Any, Tuple
|
| 8 |
import PyPDF2
|
| 9 |
import docx2txt
|
|
@@ -11,14 +14,16 @@ import gradio as gr
|
|
| 11 |
import pandas as pd
|
| 12 |
import logging
|
| 13 |
from openai import OpenAI
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Configure logging
|
| 16 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 17 |
|
| 18 |
# Global Configuration
|
| 19 |
-
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
|
| 20 |
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 21 |
-
DEFAULT_MODEL = "
|
| 22 |
REQUEST_TIMEOUT_SECS = 120
|
| 23 |
|
| 24 |
# OpenAI client for DeepInfra
|
|
@@ -97,6 +102,8 @@ Scoring Guide:
|
|
| 97 |
|
| 98 |
Keep each bullet short (max ~12 words).
|
| 99 |
Output ONLY JSON.
|
|
|
|
|
|
|
| 100 |
"""
|
| 101 |
|
| 102 |
RECOMMEND_SYSTEM = """You are a senior technical recruiter writing a concise recommendation summary for a hiring manager.
|
|
@@ -121,6 +128,31 @@ def _txt_to_text(path: str) -> str:
|
|
| 121 |
def _docx_to_text(path: str) -> str:
|
| 122 |
return docx2txt.process(path) or ""
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
def read_file_safely(path: str) -> str:
|
| 125 |
try:
|
| 126 |
low = path.lower()
|
|
@@ -130,39 +162,32 @@ def read_file_safely(path: str) -> str:
|
|
| 130 |
return _txt_to_text(path)
|
| 131 |
if low.endswith(".docx"):
|
| 132 |
return _docx_to_text(path)
|
|
|
|
|
|
|
| 133 |
return f"[Unsupported file type: {os.path.basename(path)}]"
|
| 134 |
except Exception as e:
|
| 135 |
logging.error(f"Error reading file {path}: {e}")
|
| 136 |
return f"[Error reading file: {e}]"
|
| 137 |
|
| 138 |
def safe_json_loads(text: str) -> dict:
|
| 139 |
-
"""
|
| 140 |
-
Robustly parses JSON from a string, even if it's embedded in other text
|
| 141 |
-
or a markdown block.
|
| 142 |
-
"""
|
| 143 |
text = text or ""
|
| 144 |
try:
|
| 145 |
-
# First, attempt to find a JSON markdown block
|
| 146 |
match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
|
| 147 |
if match:
|
| 148 |
block = match.group(1)
|
| 149 |
else:
|
| 150 |
-
# If no markdown, find the outermost curly braces
|
| 151 |
start_index = text.find('{')
|
| 152 |
end_index = text.rfind('}')
|
| 153 |
if start_index != -1 and end_index != -1 and end_index > start_index:
|
| 154 |
block = text[start_index : end_index + 1]
|
| 155 |
else:
|
| 156 |
-
# Fallback if no JSON structure is found at all
|
| 157 |
logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
|
| 158 |
return {}
|
| 159 |
-
|
| 160 |
return json.loads(block)
|
| 161 |
except Exception as e:
|
| 162 |
logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
|
| 163 |
return {}
|
| 164 |
|
| 165 |
-
|
| 166 |
# --- LLM Chat Wrapper ---
|
| 167 |
def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
|
| 168 |
try:
|
|
@@ -205,10 +230,6 @@ def load_resume(resume_file) -> Tuple[str, str]:
|
|
| 205 |
|
| 206 |
# --- Resume Normalizer ---
|
| 207 |
def normalize_resume(raw_resume: Dict) -> Dict:
|
| 208 |
-
"""
|
| 209 |
-
Normalize a resume dictionary to ensure all expected fields exist,
|
| 210 |
-
and add 'summary' which can contain candidate's profile, summary, or objective.
|
| 211 |
-
"""
|
| 212 |
return {
|
| 213 |
"name": raw_resume.get("name", "").strip(),
|
| 214 |
"email": raw_resume.get("email", "").strip(),
|
|
@@ -220,6 +241,7 @@ def normalize_resume(raw_resume: Dict) -> Dict:
|
|
| 220 |
"languages": raw_resume.get("languages", []) or [],
|
| 221 |
"certificates": raw_resume.get("certificates", []) or [],
|
| 222 |
"soft_skills": raw_resume.get("soft_skills", []) or [],
|
|
|
|
| 223 |
"summary": raw_resume.get("summary", "") or raw_resume.get("profile", "") or raw_resume.get("objective", "")
|
| 224 |
}
|
| 225 |
|
|
@@ -269,6 +291,8 @@ def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]]
|
|
| 269 |
"experience_titles": [e.get("title","") for e in (c.get("experience") or [])][:30],
|
| 270 |
"education": [e.get("degree","") for e in (c.get("education") or [])][:20],
|
| 271 |
"languages": c.get("languages", [])[:20],
|
|
|
|
|
|
|
| 272 |
})
|
| 273 |
|
| 274 |
system = (
|
|
@@ -304,7 +328,6 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
|
| 304 |
"justification": str(it.get("justification","")).strip(),
|
| 305 |
})
|
| 306 |
return rows
|
| 307 |
-
# Add another check for a list of candidates directly
|
| 308 |
if isinstance(parsed, list):
|
| 309 |
for it in parsed:
|
| 310 |
rows.append({
|
|
@@ -313,11 +336,51 @@ def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
|
|
| 313 |
"justification": str(it.get("justification","")).strip(),
|
| 314 |
})
|
| 315 |
return rows
|
| 316 |
-
if not rows:
|
| 317 |
logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
|
| 318 |
rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
|
| 319 |
return rows
|
| 320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
def process(
|
| 322 |
jd_text,
|
| 323 |
jd_file,
|
|
@@ -333,7 +396,6 @@ def process(
|
|
| 333 |
temperature = 0.2
|
| 334 |
top_n = 5
|
| 335 |
|
| 336 |
-
# --- JD ---
|
| 337 |
t_jd_start = time.perf_counter()
|
| 338 |
jd_raw = load_job_description(jd_text or "", jd_file)
|
| 339 |
if not jd_raw.strip():
|
|
@@ -342,45 +404,32 @@ def process(
|
|
| 342 |
t_jd = time.perf_counter() - t_jd_start
|
| 343 |
logging.info(f"JD parsing time: {t_jd:.2f}s")
|
| 344 |
|
| 345 |
-
# --- Resumes parse ---
|
| 346 |
if not resume_files or len(resume_files) == 0:
|
| 347 |
-
raise gr.Error("Please upload at least one resume (PDF or
|
|
|
|
| 348 |
parsed_cands = []
|
| 349 |
name_to_file = {}
|
| 350 |
t_parse_total = 0.0
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
cand_struct['detailed_scores'] = detailed_feedback.get('scores', {})
|
| 368 |
-
cand_struct['summary_feedback'] = detailed_feedback.get('overall_summary', '')
|
| 369 |
-
cand_struct['strengths'] = detailed_feedback.get('strengths', [])
|
| 370 |
-
cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
|
| 371 |
-
cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
|
| 372 |
-
|
| 373 |
-
# Generate recommendations
|
| 374 |
-
cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
|
| 375 |
-
|
| 376 |
-
parsed_cands.append(cand_struct)
|
| 377 |
-
name_to_file[cand_struct["name"]] = fname
|
| 378 |
-
t_parse_total += (time.perf_counter() - t_parse_s)
|
| 379 |
|
| 380 |
avg_parse = (t_parse_total / max(1, len(parsed_cands)))
|
| 381 |
-
logging.info(f"Total resume parsing time: {t_parse_total:.2f}s, avg: {avg_parse:.2f}s")
|
| 382 |
|
| 383 |
-
# --- Matching ---
|
| 384 |
t_match_start = time.perf_counter()
|
| 385 |
match_msgs = prompt_for_match(jd_struct, parsed_cands, conditional_req)
|
| 386 |
raw_match = deepinfra_chat(match_msgs, api_key=api_key, model=model_name, temperature=temperature)
|
|
@@ -413,31 +462,33 @@ def process(
|
|
| 413 |
**detailed_scores,
|
| 414 |
"recommendation": c.get("recommendation", ""),
|
| 415 |
"summary_feedback": c.get('summary_feedback', ''),
|
| 416 |
-
"strengths": ", ".join(c.get("strengths", [])),
|
| 417 |
-
"weaknesses": ", ".join(c.get("weaknesses", [])),
|
| 418 |
-
"missing_requirements": ", ".join(c.get("missing_requirements", [])),
|
| 419 |
-
"justification": just
|
|
|
|
| 420 |
})
|
| 421 |
|
| 422 |
-
df_export = pd.DataFrame(export_rows)
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
| 426 |
top_candidates_data = []
|
| 427 |
for _, row in df_export.head(top_n).iterrows():
|
| 428 |
top_candidates_data.append({
|
| 429 |
-
"Candidate": row
|
| 430 |
-
"Score": row
|
| 431 |
-
"Recommendation": row
|
| 432 |
-
"Justification": row
|
| 433 |
})
|
| 434 |
top_df = pd.DataFrame(top_candidates_data)
|
| 435 |
|
| 436 |
-
|
| 437 |
-
# --- Create a temporary file for the CSV export ---
|
| 438 |
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
|
| 439 |
df_export.to_csv(tmp_file.name, index=False)
|
| 440 |
-
csv_file_path = tmp_file.name
|
| 441 |
|
| 442 |
t_total = time.perf_counter() - t0
|
| 443 |
logging.info(f"Total process time: {t_total:.2f}s")
|
|
@@ -445,36 +496,111 @@ def process(
|
|
| 445 |
return df_table, csv_file_path, top_df
|
| 446 |
|
| 447 |
# --- Gradio App ---
|
| 448 |
-
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
with gr.Row():
|
| 451 |
-
with gr.Column(scale=
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
with gr.Accordion("Advanced Options", open=False):
|
| 457 |
-
# Note: The sliders are for future use and are not currently wired into the LLM ranking prompt.
|
| 458 |
-
w_skill = gr.Slider(label="Weight: Skills", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 459 |
-
w_qual = gr.Slider(label="Weight: Qualifications", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 460 |
-
w_resp = gr.Slider(label="Weight: Responsibilities", minimum=0, maximum=1, value=0.25, interactive=False)
|
| 461 |
-
conditional_req = gr.Textbox(label="Conditional Requirement (optional)", placeholder="e.g., 'Must have experience with AWS services'")
|
| 462 |
|
| 463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
|
| 472 |
submit_btn.click(
|
| 473 |
-
|
| 474 |
-
# Note: Sliders are removed from inputs as they are not used in the backend logic.
|
| 475 |
inputs=[jd_text, jd_file, resume_files, conditional_req],
|
| 476 |
-
outputs=[results_table, csv_export,
|
| 477 |
)
|
| 478 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
if __name__ == "__main__":
|
| 480 |
demo.launch()
|
|
|
|
| 1 |
+
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
import json
|
| 5 |
import csv
|
| 6 |
import tempfile
|
| 7 |
import time
|
| 8 |
+
import subprocess
|
| 9 |
+
import shutil
|
| 10 |
from typing import List, Dict, Any, Tuple
|
| 11 |
import PyPDF2
|
| 12 |
import docx2txt
|
|
|
|
| 14 |
import pandas as pd
|
| 15 |
import logging
|
| 16 |
from openai import OpenAI
|
| 17 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 18 |
+
import gradio.themes.soft as SoftTheme # For the UI theme
|
| 19 |
|
| 20 |
# Configure logging
|
| 21 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 22 |
|
| 23 |
# Global Configuration
|
| 24 |
+
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
|
| 25 |
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 26 |
+
DEFAULT_MODEL = "openai/gpt-oss-120b"
|
| 27 |
REQUEST_TIMEOUT_SECS = 120
|
| 28 |
|
| 29 |
# OpenAI client for DeepInfra
|
|
|
|
| 102 |
|
| 103 |
Keep each bullet short (max ~12 words).
|
| 104 |
Output ONLY JSON.
|
| 105 |
+
|
| 106 |
+
|
| 107 |
"""
|
| 108 |
|
| 109 |
RECOMMEND_SYSTEM = """You are a senior technical recruiter writing a concise recommendation summary for a hiring manager.
|
|
|
|
| 128 |
def _docx_to_text(path: str) -> str:
|
| 129 |
return docx2txt.process(path) or ""
|
| 130 |
|
| 131 |
+
def _doc_to_text_using_external_tool(path: str) -> str:
|
| 132 |
+
if shutil.which("antiword"):
|
| 133 |
+
try:
|
| 134 |
+
out = subprocess.check_output(["antiword", path], stderr=subprocess.DEVNULL)
|
| 135 |
+
return out.decode(errors="ignore")
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logging.debug(f"antiword failed for {path}: {e}")
|
| 138 |
+
if shutil.which("catdoc"):
|
| 139 |
+
try:
|
| 140 |
+
out = subprocess.check_output(["catdoc", path], stderr=subprocess.DEVNULL)
|
| 141 |
+
return out.decode(errors="ignore")
|
| 142 |
+
except Exception as e:
|
| 143 |
+
logging.debug(f"catdoc failed for {path}: {e}")
|
| 144 |
+
if shutil.which("soffice"):
|
| 145 |
+
try:
|
| 146 |
+
tmpdir = tempfile.mkdtemp()
|
| 147 |
+
subprocess.run(["soffice", "--headless", "--convert-to", "txt:Text", "--outdir", tmpdir, path], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 148 |
+
basename = os.path.splitext(os.path.basename(path))[0] + ".txt"
|
| 149 |
+
txt_path = os.path.join(tmpdir, basename)
|
| 150 |
+
if os.path.exists(txt_path):
|
| 151 |
+
return _txt_to_text(txt_path)
|
| 152 |
+
except Exception as e:
|
| 153 |
+
logging.debug(f"libreoffice conversion failed for {path}: {e}")
|
| 154 |
+
return f"[Unsupported or unreadable .doc file: {os.path.basename(path)}. Install antiword/catdoc or libreoffice to enable .doc reading]"
|
| 155 |
+
|
| 156 |
def read_file_safely(path: str) -> str:
|
| 157 |
try:
|
| 158 |
low = path.lower()
|
|
|
|
| 162 |
return _txt_to_text(path)
|
| 163 |
if low.endswith(".docx"):
|
| 164 |
return _docx_to_text(path)
|
| 165 |
+
if low.endswith(".doc"):
|
| 166 |
+
return _doc_to_text_using_external_tool(path)
|
| 167 |
return f"[Unsupported file type: {os.path.basename(path)}]"
|
| 168 |
except Exception as e:
|
| 169 |
logging.error(f"Error reading file {path}: {e}")
|
| 170 |
return f"[Error reading file: {e}]"
|
| 171 |
|
| 172 |
def safe_json_loads(text: str) -> dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
text = text or ""
|
| 174 |
try:
|
|
|
|
| 175 |
match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
|
| 176 |
if match:
|
| 177 |
block = match.group(1)
|
| 178 |
else:
|
|
|
|
| 179 |
start_index = text.find('{')
|
| 180 |
end_index = text.rfind('}')
|
| 181 |
if start_index != -1 and end_index != -1 and end_index > start_index:
|
| 182 |
block = text[start_index : end_index + 1]
|
| 183 |
else:
|
|
|
|
| 184 |
logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
|
| 185 |
return {}
|
|
|
|
| 186 |
return json.loads(block)
|
| 187 |
except Exception as e:
|
| 188 |
logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
|
| 189 |
return {}
|
| 190 |
|
|
|
|
| 191 |
# --- LLM Chat Wrapper ---
|
| 192 |
def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
|
| 193 |
try:
|
|
|
|
| 230 |
|
| 231 |
# --- Resume Normalizer ---
|
| 232 |
def normalize_resume(raw_resume: Dict) -> Dict:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
return {
|
| 234 |
"name": raw_resume.get("name", "").strip(),
|
| 235 |
"email": raw_resume.get("email", "").strip(),
|
|
|
|
| 241 |
"languages": raw_resume.get("languages", []) or [],
|
| 242 |
"certificates": raw_resume.get("certificates", []) or [],
|
| 243 |
"soft_skills": raw_resume.get("soft_skills", []) or [],
|
| 244 |
+
"Projects": raw_resume.get("Projects", []) or [],
|
| 245 |
"summary": raw_resume.get("summary", "") or raw_resume.get("profile", "") or raw_resume.get("objective", "")
|
| 246 |
}
|
| 247 |
|
|
|
|
| 291 |
"experience_titles": [e.get("title","") for e in (c.get("experience") or [])][:30],
|
| 292 |
"education": [e.get("degree","") for e in (c.get("education") or [])][:20],
|
| 293 |
"languages": c.get("languages", [])[:20],
|
| 294 |
+
"certificates": c.get("certificates", [])[:20],
|
| 295 |
+
"Projects": c.get("Projects", [])[:20],
|
| 296 |
})
|
| 297 |
|
| 298 |
system = (
|
|
|
|
| 328 |
"justification": str(it.get("justification","")).strip(),
|
| 329 |
})
|
| 330 |
return rows
|
|
|
|
| 331 |
if isinstance(parsed, list):
|
| 332 |
for it in parsed:
|
| 333 |
rows.append({
|
|
|
|
| 336 |
"justification": str(it.get("justification","")).strip(),
|
| 337 |
})
|
| 338 |
return rows
|
| 339 |
+
if not rows:
|
| 340 |
logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
|
| 341 |
rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
|
| 342 |
return rows
|
| 343 |
|
| 344 |
+
# --- New: process single resume (for parallel execution) ---
|
| 345 |
+
def process_single_resume(f, jd_struct: Dict, api_key: str, model_name: str) -> Tuple[Dict, str, float]:
|
| 346 |
+
t0 = time.perf_counter()
|
| 347 |
+
text, fname = load_resume(f)
|
| 348 |
+
contacts = quick_contacts(text)
|
| 349 |
+
try:
|
| 350 |
+
raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
|
| 351 |
+
except Exception as e:
|
| 352 |
+
logging.error(f"LLM resume extract failed for {fname}: {e}")
|
| 353 |
+
raw_resume = {}
|
| 354 |
+
|
| 355 |
+
cand_struct = normalize_resume(raw_resume)
|
| 356 |
+
|
| 357 |
+
if not cand_struct.get("name"):
|
| 358 |
+
cand_struct["name"] = os.path.splitext(fname)[0]
|
| 359 |
+
|
| 360 |
+
cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
|
| 361 |
+
cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])
|
| 362 |
+
|
| 363 |
+
try:
|
| 364 |
+
detailed_feedback = llm_detailed_feedback(jd_struct, cand_struct, api_key, model_name)
|
| 365 |
+
except Exception as e:
|
| 366 |
+
logging.error(f"LLM detailed feedback failed for {fname}: {e}")
|
| 367 |
+
detailed_feedback = {}
|
| 368 |
+
|
| 369 |
+
cand_struct['detailed_scores'] = detailed_feedback.get('scores', {})
|
| 370 |
+
cand_struct['summary_feedback'] = detailed_feedback.get('overall_summary', '')
|
| 371 |
+
cand_struct['strengths'] = detailed_feedback.get('strengths', [])
|
| 372 |
+
cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
|
| 373 |
+
cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])
|
| 374 |
+
|
| 375 |
+
try:
|
| 376 |
+
cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
|
| 377 |
+
except Exception as e:
|
| 378 |
+
logging.error(f"LLM recommendation failed for {fname}: {e}")
|
| 379 |
+
cand_struct["recommendation"] = ""
|
| 380 |
+
|
| 381 |
+
t_elapsed = time.perf_counter() - t0
|
| 382 |
+
return cand_struct, fname, t_elapsed
|
| 383 |
+
|
| 384 |
def process(
|
| 385 |
jd_text,
|
| 386 |
jd_file,
|
|
|
|
| 396 |
temperature = 0.2
|
| 397 |
top_n = 5
|
| 398 |
|
|
|
|
| 399 |
t_jd_start = time.perf_counter()
|
| 400 |
jd_raw = load_job_description(jd_text or "", jd_file)
|
| 401 |
if not jd_raw.strip():
|
|
|
|
| 404 |
t_jd = time.perf_counter() - t_jd_start
|
| 405 |
logging.info(f"JD parsing time: {t_jd:.2f}s")
|
| 406 |
|
|
|
|
| 407 |
if not resume_files or len(resume_files) == 0:
|
| 408 |
+
raise gr.Error("Please upload at least one resume (PDF, DOCX, DOC, or TXT).")
|
| 409 |
+
|
| 410 |
parsed_cands = []
|
| 411 |
name_to_file = {}
|
| 412 |
t_parse_total = 0.0
|
| 413 |
+
files_to_process = resume_files[:50]
|
| 414 |
+
max_workers = min(8, max(1, len(files_to_process)))
|
| 415 |
+
|
| 416 |
+
futures = []
|
| 417 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 418 |
+
for f in files_to_process:
|
| 419 |
+
futures.append(executor.submit(process_single_resume, f, jd_struct, api_key, model_name))
|
| 420 |
+
|
| 421 |
+
for future in as_completed(futures):
|
| 422 |
+
try:
|
| 423 |
+
cand_struct, fname, elapsed = future.result()
|
| 424 |
+
parsed_cands.append(cand_struct)
|
| 425 |
+
name_to_file[cand_struct["name"]] = fname
|
| 426 |
+
t_parse_total += elapsed
|
| 427 |
+
except Exception as e:
|
| 428 |
+
logging.error(f"Error processing a resume in parallel: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
|
| 430 |
avg_parse = (t_parse_total / max(1, len(parsed_cands)))
|
| 431 |
+
logging.info(f"Total resume parsing time: {t_parse_total:.2f}s, avg per file: {avg_parse:.2f}s")
|
| 432 |
|
|
|
|
| 433 |
t_match_start = time.perf_counter()
|
| 434 |
match_msgs = prompt_for_match(jd_struct, parsed_cands, conditional_req)
|
| 435 |
raw_match = deepinfra_chat(match_msgs, api_key=api_key, model=model_name, temperature=temperature)
|
|
|
|
| 462 |
**detailed_scores,
|
| 463 |
"recommendation": c.get("recommendation", ""),
|
| 464 |
"summary_feedback": c.get('summary_feedback', ''),
|
| 465 |
+
"strengths": ", ".join([str(s) for s in c.get("strengths", [])]),
|
| 466 |
+
"weaknesses": ", ".join([str(s) for s in c.get("weaknesses", [])]),
|
| 467 |
+
"missing_requirements": ", ".join([str(s) for s in c.get("missing_requirements", [])]),
|
| 468 |
+
"justification": just,
|
| 469 |
+
"full_json": json.dumps(c, ensure_ascii=False)
|
| 470 |
})
|
| 471 |
|
| 472 |
+
df_export = pd.DataFrame(export_rows)
|
| 473 |
+
if "score" in df_export.columns:
|
| 474 |
+
df_export = df_export.sort_values("score", ascending=False)
|
| 475 |
+
df_table = pd.DataFrame(table_rows)
|
| 476 |
+
if "Score (0-10)" in df_table.columns:
|
| 477 |
+
df_table = df_table.sort_values("Score (0-10)", ascending=False)
|
| 478 |
+
|
| 479 |
top_candidates_data = []
|
| 480 |
for _, row in df_export.head(top_n).iterrows():
|
| 481 |
top_candidates_data.append({
|
| 482 |
+
"Candidate": row.get("candidate", ""),
|
| 483 |
+
"Score": row.get("score", 0),
|
| 484 |
+
"Recommendation": row.get("recommendation", ""),
|
| 485 |
+
"Justification": row.get("justification", ""),
|
| 486 |
})
|
| 487 |
top_df = pd.DataFrame(top_candidates_data)
|
| 488 |
|
|
|
|
|
|
|
| 489 |
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
|
| 490 |
df_export.to_csv(tmp_file.name, index=False)
|
| 491 |
+
csv_file_path = tmp_file.name
|
| 492 |
|
| 493 |
t_total = time.perf_counter() - t0
|
| 494 |
logging.info(f"Total process time: {t_total:.2f}s")
|
|
|
|
| 496 |
return df_table, csv_file_path, top_df
|
| 497 |
|
| 498 |
# --- Gradio App ---
|
| 499 |
+
|
| 500 |
+
CUSTOM_CSS = """
|
| 501 |
+
/* Add a subtle background gradient and use a nicer font */
|
| 502 |
+
.gradio-container {
|
| 503 |
+
background-image: linear-gradient(to top, #f3e7e9 0%, #e3eeff 99%, #e3eeff 100%);
|
| 504 |
+
font-family: 'IBM Plex Sans', sans-serif;
|
| 505 |
+
}
|
| 506 |
+
/* Style the input/output areas like cards */
|
| 507 |
+
.gradio-row > .gradio-column, .gradio-group {
|
| 508 |
+
border: 1px solid #E5E7EB;
|
| 509 |
+
border-radius: 12px;
|
| 510 |
+
box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
|
| 511 |
+
background-color: white;
|
| 512 |
+
padding: 15px;
|
| 513 |
+
}
|
| 514 |
+
/* Make the file upload area more prominent */
|
| 515 |
+
.gradio-file {
|
| 516 |
+
border: 2px dashed #A4B0BE;
|
| 517 |
+
border-radius: 8px;
|
| 518 |
+
padding: 20px;
|
| 519 |
+
transition: all 0.2s ease;
|
| 520 |
+
}
|
| 521 |
+
.gradio-file:hover {
|
| 522 |
+
border-color: #4A90E2;
|
| 523 |
+
background-color: #F9FAFB;
|
| 524 |
+
}
|
| 525 |
+
"""
|
| 526 |
+
|
| 527 |
+
with gr.Blocks(theme=SoftTheme.Soft(), css=CUSTOM_CSS, title="AI Resume Matcher") as demo:
|
| 528 |
+
gr.Markdown(
|
| 529 |
+
"<h1 style='text-align: center; color: #1E3A8A;'>π€ AI Resume Matcher & Ranking</h1>"
|
| 530 |
+
"<p style='text-align: center; color: #4B5563;'>Upload a job description and resumes to automatically rank candidates.</p>"
|
| 531 |
+
)
|
| 532 |
+
|
| 533 |
with gr.Row():
|
| 534 |
+
with gr.Column(scale=2):
|
| 535 |
+
gr.Markdown("### π Step 1: Provide Inputs")
|
| 536 |
+
with gr.Group():
|
| 537 |
+
jd_text = gr.Textbox(label="Paste Job Description", lines=8, placeholder="Paste the full job description here...")
|
| 538 |
+
jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
+
resume_files = gr.File(
|
| 541 |
+
label="π Step 2: Upload Resumes (.pdf, .docx, .doc, .txt)",
|
| 542 |
+
file_types=[".pdf", ".docx", ".doc", ".txt"],
|
| 543 |
+
file_count="multiple"
|
| 544 |
+
)
|
| 545 |
+
|
| 546 |
+
with gr.Accordion("βοΈ Advanced Options", open=False):
|
| 547 |
+
conditional_req = gr.Textbox(
|
| 548 |
+
label="Conditional Requirement (Optional)",
|
| 549 |
+
placeholder="e.g., 'Must have 5+ years of Python experience'"
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
with gr.Row():
|
| 553 |
+
clear_btn = gr.Button("Clear All")
|
| 554 |
+
submit_btn = gr.Button("π Run Matching & Ranking", variant="primary", scale=2)
|
| 555 |
+
|
| 556 |
+
with gr.Column(scale=3):
|
| 557 |
+
gr.Markdown("### β¨ Step 3: View Results")
|
| 558 |
+
status_md = gr.Markdown("Status: Ready. Please provide inputs and click Run.", visible=True)
|
| 559 |
+
|
| 560 |
+
with gr.Tabs():
|
| 561 |
+
with gr.TabItem("π Top Candidates Summary"):
|
| 562 |
+
top_table = gr.DataFrame(label="Top 5 Candidates", interactive=False, headers=["Candidate", "Score", "Recommendation", "Justification"])
|
| 563 |
+
with gr.TabItem("π Detailed Ranking"):
|
| 564 |
+
results_table = gr.DataFrame(label="Full Candidate Ranking")
|
| 565 |
+
with gr.TabItem("π₯ Download Report"):
|
| 566 |
+
gr.Markdown("Click the file below to download the complete analysis, including all extracted data and feedback, in CSV format.")
|
| 567 |
+
csv_export = gr.File(label="Download Full Report (CSV)")
|
| 568 |
+
|
| 569 |
+
# This is a new state object to hold the results to avoid re-running the 'process' function
|
| 570 |
+
results_state = gr.State({})
|
| 571 |
|
| 572 |
+
def run_process_and_update_status(jd_text, jd_file, resume_files, conditional_req):
|
| 573 |
+
yield gr.Markdown(value="β³ Processing... Analyzing job description and resumes. This may take a moment.", visible=True), \
|
| 574 |
+
pd.DataFrame(), pd.DataFrame(), None, {} # Clear previous results while running
|
| 575 |
+
|
| 576 |
+
try:
|
| 577 |
+
df_table, csv_path, top_df = process(jd_text, jd_file, resume_files, conditional_req)
|
| 578 |
+
status_message = f"β
Done! Analyzed {len(df_table)} resumes. See results below."
|
| 579 |
+
results = {
|
| 580 |
+
"df_table": df_table,
|
| 581 |
+
"csv_path": csv_path,
|
| 582 |
+
"top_df": top_df
|
| 583 |
+
}
|
| 584 |
+
yield gr.Markdown(value=status_message, visible=True), df_table, top_df, csv_path, results
|
| 585 |
+
except Exception as e:
|
| 586 |
+
yield gr.Markdown(value=f"β Error: {e}", visible=True), \
|
| 587 |
+
pd.DataFrame(), pd.DataFrame(), None, {}
|
| 588 |
+
|
| 589 |
+
def clear_all():
|
| 590 |
+
return None, None, [], "", pd.DataFrame(), pd.DataFrame(), None, gr.Markdown(value="Status: Cleared. Ready for new inputs.", visible=True), {}
|
| 591 |
|
| 592 |
submit_btn.click(
|
| 593 |
+
run_process_and_update_status,
|
|
|
|
| 594 |
inputs=[jd_text, jd_file, resume_files, conditional_req],
|
| 595 |
+
outputs=[status_md, results_table, top_table, csv_export, results_state]
|
| 596 |
)
|
| 597 |
|
| 598 |
+
clear_btn.click(
|
| 599 |
+
clear_all,
|
| 600 |
+
inputs=[],
|
| 601 |
+
outputs=[jd_text, jd_file, resume_files, conditional_req, results_table, top_table, csv_export, status_md, results_state]
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
if __name__ == "__main__":
|
| 606 |
demo.launch()
|