File size: 25,348 Bytes
769a231
33b4426
 
 
 
 
 
769a231
 
33b4426
 
 
 
 
1687f4a
6bc6f4a
769a231
 
1687f4a
 
6bc6f4a
33b4426
 
769a231
1687f4a
769a231
33b4426
 
6bc6f4a
 
 
 
 
 
6192e6f
33b4426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897630b
 
 
33b4426
 
 
1687f4a
33b4426
1687f4a
33b4426
 
 
1687f4a
 
 
 
897630b
 
 
1687f4a
33b4426
 
 
 
 
897630b
 
 
 
 
 
 
 
 
 
 
33b4426
 
769a231
 
33b4426
 
1687f4a
 
 
6bc6f4a
 
1687f4a
897630b
33b4426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b4426
 
 
 
 
 
 
 
 
769a231
 
33b4426
 
1687f4a
33b4426
 
 
6192e6f
33b4426
6192e6f
 
 
 
 
 
 
 
 
 
 
33b4426
1687f4a
 
33b4426
 
897630b
33b4426
1687f4a
6bc6f4a
 
 
 
 
 
 
1687f4a
6bc6f4a
 
1687f4a
 
33b4426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897630b
 
 
 
 
 
 
 
 
 
 
 
 
769a231
897630b
 
 
 
33b4426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1687f4a
33b4426
 
1687f4a
33b4426
 
 
 
 
1687f4a
 
 
 
 
 
 
b819e7b
897630b
1687f4a
33b4426
 
 
 
 
 
 
 
 
 
769a231
 
33b4426
 
 
 
6192e6f
33b4426
 
 
 
 
1687f4a
33b4426
 
 
 
 
 
 
 
1687f4a
 
33b4426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
769a231
6192e6f
33b4426
 
 
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b4426
 
 
 
897630b
33b4426
 
658a650
33b4426
658a650
 
 
 
 
33b4426
 
 
 
 
 
 
1687f4a
33b4426
 
769a231
 
33b4426
 
 
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897630b
1687f4a
769a231
33b4426
 
1687f4a
33b4426
 
 
1687f4a
33b4426
 
658a650
33b4426
 
6192e6f
1687f4a
33b4426
 
1687f4a
897630b
 
 
 
 
 
33b4426
 
 
 
 
 
6192e6f
897630b
6192e6f
897630b
769a231
 
 
 
 
33b4426
 
769a231
 
 
 
 
 
 
6192e6f
 
 
769a231
 
 
 
6192e6f
 
 
 
 
769a231
33b4426
897630b
 
33b4426
6192e6f
33b4426
897630b
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897630b
769a231
 
 
 
 
6192e6f
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6192e6f
769a231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897630b
 
769a231
6192e6f
769a231
33b4426
 
769a231
 
 
 
 
 
 
6192e6f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606

import os
import re
import json
import csv
import tempfile
import time
import subprocess
import shutil
from typing import List, Dict, Any, Tuple
import PyPDF2
import docx2txt
import gradio as gr
import pandas as pd
import logging
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed
import gradio.themes.soft as SoftTheme # For the UI theme

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Global Configuration
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa"
DEEPINFRA_BASE_URL = "https://api.deepinfra.com/v1/openai"
DEFAULT_MODEL = "openai/gpt-oss-120b"
REQUEST_TIMEOUT_SECS = 120

# OpenAI client for DeepInfra
default_client = OpenAI(
    api_key=DEEPINFRA_API_KEY,
    base_url=DEEPINFRA_BASE_URL,
)

# --- Prompts for LLM Calls ---
JD_SYSTEM = """You are an expert recruitment analyst. Extract a job description into STRICT JSON.
Rules:
- Output ONLY JSON (no markdown, no prose).
- If the JD language is not English, still output keys in English but translate skills into an additional 'skills_en' array.
- Keep items short and normalized (e.g., 'python', 'sql').
Schema:
{
  "title": "",
  "seniority": "",
  "skills": [],
  "skills_en": [],
  "qualifications": [],
  "responsibilities": [],
  "nice_to_have": []
}
"""

RESUME_SYSTEM = """You are an expert resume parser. Extract a candidate profile into STRICT JSON.
Rules:
- Output ONLY JSON (no markdown, no prose).
- Provide 'skills_en' translated/normalized to English for matching.
- Keep arrays compact, deduplicate entries.
Schema:
{
  "name": "",
  "email": "",
  "phone": "",
  "skills": [],
  "skills_en": [],
  "education": [{"degree":"", "field":"", "institution":"", "year":""}],
  "experience": [{"title":"", "company":"", "start_date":"", "end_date":"", "summary":""}],
  "languages": [],
  "certificates": [],
  "soft_skills": []
}
"""

FEEDBACK_SYSTEM_DETAILED = """You are an expert technical recruiter. Compare a job and a candidate and return STRICT JSON with actionable feedback and a detailed score breakdown.
Respond in the job description's language.
Scores should be out of 100.
Schema:
{
  "overall_summary": "",
  "scores": {
    "skills": 0,
    "qualifications": 0,
    "responsibilities": 0,
    "education_and_experience": 0,
    "certificates": 0,
    "soft_skills": 0
  },
  "strengths": [],
  "weaknesses": [],
  "missing_requirements": [],
  "suggestions": []
}

Scoring Guide:
- It's ok to say candidate does not match the requirement.
- Degree Section: Prioritize major over degree level. A candidate with a more relevant major should score higher even if the degree level is lower.
- Experience Section: Candidate with more relevant experience fields scores higher.
- Technical Skills Section: Candidate with more relevant technical skills scores higher.
- Responsibilities Section: Candidate with more relevant responsibilities scores higher.
- Certificates Section: Candidate with required certificates scores highest. No certificate = no score. Related but not exact certificates = medium score.
- Soft Skills Section: Prioritize foreign language and leadership. Candidate with more relevant soft skills scores higher.
- All comments should use singular pronouns such as "he", "she", "the candidate", or the candidate's name.

Keep each bullet short (max ~12 words).
Output ONLY JSON.


"""

RECOMMEND_SYSTEM = """You are a senior technical recruiter writing a concise recommendation summary for a hiring manager.
Based on the provided candidate and job description, write a 2-3 sentence summary explaining why this candidate is a good match.
Focus on key skills, relevant experience, and overall fit. Do not use a conversational tone.
Output ONLY the summary text, no markdown or extra formatting.
"""

# --- Helpers for file parsing ---
def _pdf_to_text(path: str) -> str:
    text = []
    with open(path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            text.append(page.extract_text() or "")
    return "\n".join(text)

def _txt_to_text(path: str) -> str:
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()

def _docx_to_text(path: str) -> str:
    return docx2txt.process(path) or ""

def _doc_to_text_using_external_tool(path: str) -> str:
    if shutil.which("antiword"):
        try:
            out = subprocess.check_output(["antiword", path], stderr=subprocess.DEVNULL)
            return out.decode(errors="ignore")
        except Exception as e:
            logging.debug(f"antiword failed for {path}: {e}")
    if shutil.which("catdoc"):
        try:
            out = subprocess.check_output(["catdoc", path], stderr=subprocess.DEVNULL)
            return out.decode(errors="ignore")
        except Exception as e:
            logging.debug(f"catdoc failed for {path}: {e}")
    if shutil.which("soffice"):
        try:
            tmpdir = tempfile.mkdtemp()
            subprocess.run(["soffice", "--headless", "--convert-to", "txt:Text", "--outdir", tmpdir, path], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            basename = os.path.splitext(os.path.basename(path))[0] + ".txt"
            txt_path = os.path.join(tmpdir, basename)
            if os.path.exists(txt_path):
                return _txt_to_text(txt_path)
        except Exception as e:
            logging.debug(f"libreoffice conversion failed for {path}: {e}")
    return f"[Unsupported or unreadable .doc file: {os.path.basename(path)}. Install antiword/catdoc or libreoffice to enable .doc reading]"

def read_file_safely(path: str) -> str:
    try:
        low = path.lower()
        if low.endswith(".pdf"):
            return _pdf_to_text(path)
        if low.endswith(".txt"):
            return _txt_to_text(path)
        if low.endswith(".docx"):
            return _docx_to_text(path)
        if low.endswith(".doc"):
            return _doc_to_text_using_external_tool(path)
        return f"[Unsupported file type: {os.path.basename(path)}]"
    except Exception as e:
        logging.error(f"Error reading file {path}: {e}")
        return f"[Error reading file: {e}]"

def safe_json_loads(text: str) -> dict:
    text = text or ""
    try:
        match = re.search(r"```json\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
        if match:
            block = match.group(1)
        else:
            start_index = text.find('{')
            end_index = text.rfind('}')
            if start_index != -1 and end_index != -1 and end_index > start_index:
                block = text[start_index : end_index + 1]
            else:
                logging.error(f"Could not find any JSON object in the text: {text[:500]}...")
                return {}
        return json.loads(block)
    except Exception as e:
        logging.error(f"Failed to parse JSON: {e}\nRaw Text: {text[:500]}...")
        return {}

# --- LLM Chat Wrapper ---
def deepinfra_chat(messages: List[Dict[str, str]], api_key: str, model: str, temperature: float = 0.2) -> str:
    try:
        client = default_client
        if api_key and api_key != DEEPINFRA_API_KEY:
            client = OpenAI(api_key=api_key, base_url=DEEPINFRA_BASE_URL)
        resp = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
        )
        return (resp.choices[0].message.content or "").strip()
    except Exception as e:
        logging.error(f"API request failed: {e}")
        raise gr.Error(f"API request failed: {e}. Check your API key and model name.")

def quick_contacts(text: str) -> dict:
    email_re = re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b")
    phone_re = re.compile(r"(\+\d{1,3}\s?)?(\(\d{1,4}\)|\d{1,4})[-.\s]?\d{1,4}[-.\s]?\d{1,9}")
    email_guess = email_re.search(text)
    phone_guess = phone_re.search(text)
    return {
        "email_guess": email_guess.group(0) if email_guess else None,
        "phone_guess": phone_guess.group(0) if phone_guess else None,
    }

def load_job_description(jd_text: str, jd_file) -> str:
    if jd_text and jd_text.strip():
        return jd_text
    if jd_file:
        return read_file_safely(jd_file.name)
    return ""

def load_resume(resume_file) -> Tuple[str, str]:
    if not resume_file:
        return "", ""
    fname = os.path.basename(resume_file.name)
    text = read_file_safely(resume_file.name)
    return text, fname

# --- Resume Normalizer ---
def normalize_resume(raw_resume: Dict) -> Dict:
    return {
        "name": raw_resume.get("name", "").strip(),
        "email": raw_resume.get("email", "").strip(),
        "phone": raw_resume.get("phone", "").strip(),
        "skills": raw_resume.get("skills", []) or [],
        "skills_en": raw_resume.get("skills_en", []) or [],
        "education": raw_resume.get("education", []) or [{"degree": "", "field": "", "institution": "", "year": ""}],
        "experience": raw_resume.get("experience", []) or [{"title": "", "company": "", "start_date": "", "end_date": "", "summary": ""}],
        "languages": raw_resume.get("languages", []) or [],
        "certificates": raw_resume.get("certificates", []) or [],
        "soft_skills": raw_resume.get("soft_skills", []) or [],
        "Projects": raw_resume.get("Projects", []) or [],
        "summary": raw_resume.get("summary", "") or raw_resume.get("profile", "") or raw_resume.get("objective", "")
    }

# --- Extraction Functions ---
def llm_extract_jd(jd_text: str, api_key: str, model: str, temperature: float = 0.1) -> Dict:
    messages = [
        {"role": "system", "content": JD_SYSTEM},
        {"role": "user", "content": jd_text[:20000]},
    ]
    raw = deepinfra_chat(messages, api_key=api_key, model=model, temperature=temperature)
    return safe_json_loads(raw)

def llm_extract_resume(resume_text: str, api_key: str, model: str, temperature: float = 0.1) -> Dict:
    messages = [
        {"role": "system", "content": RESUME_SYSTEM},
        {"role": "user", "content": resume_text[:20000]},
    ]
    raw = deepinfra_chat(messages, api_key=api_key, model=model, temperature=temperature)
    return safe_json_loads(raw)

def llm_detailed_feedback(jd_struct: Dict, resume_struct: Dict, api_key: str, model: str, temperature: float = 0.2) -> Dict:
    prompt = json.dumps({"job": jd_struct, "candidate": resume_struct}, ensure_ascii=False)
    messages = [
        {"role": "system", "content": FEEDBACK_SYSTEM_DETAILED},
        {"role": "user", "content": prompt},
    ]
    raw = deepinfra_chat(messages, api_key=api_key, model=model, temperature=temperature)
    return safe_json_loads(raw)

def llm_recommend(jd_struct: Dict, resume_struct: Dict, api_key: str, model: str, temperature: float = 0.2) -> str:
    prompt = json.dumps({"job": jd_struct, "candidate": resume_struct}, ensure_ascii=False)
    messages = [
        {"role": "system", "content": RECOMMEND_SYSTEM},
        {"role": "user", "content": prompt},
    ]
    return deepinfra_chat(messages, api_key=api_key, model=model, temperature=temperature)

# --- Ranking Utilities ---
def prompt_for_match(jd_struct: Dict[str, Any], cv_structs: List[Dict[str, Any]], conditional_req: str) -> List[Dict[str, str]]:
    compact_cands = []
    for c in cv_structs:
        compact_cands.append({
            "name": c.get("name",""),
            "email": c.get("email",""),
            "phone": c.get("phone",""),
            "skills": (c.get("skills_en") or c.get("skills") or [])[:50],
            "experience_titles": [e.get("title","") for e in (c.get("experience") or [])][:30],
            "education": [e.get("degree","") for e in (c.get("education") or [])][:20],
            "languages": c.get("languages", [])[:20],
            "certificates": c.get("certificates", [])[:20],
            "Projects": c.get("Projects", [])[:20],
        })

    system = (
        "You are ranking candidates for a role. Output STRICT JSON ONLY:\n"
        '{ "candidates": [ { "candidate": str, "score": number (0-10), "justification": str } ] }\n'
        "Scoring criteria (weight them reasonably):\n"
        "- Must-have skills coverage and relevant years\n"
        "- Nice-to-have skills and domain fit\n"
        "- Evidence quality in work history/education\n"
        "- Language/locale requirements if any\n"
        "- **Conditional Requirement:** If provided, evaluate the candidate's fit against this requirement.\n"
        "IMPORTANT:\n"
        "- The 'candidate' MUST EXACTLY EQUAL the resume 'name' field provided.\n"
        "- No extra keys. No markdown."
    )
    user = (
        "Role (parsed JSON):\n"
        f"{json.dumps(jd_struct, ensure_ascii=False)}\n\n"
        "Candidates (compact JSON):\n"
        f"{json.dumps(compact_cands, ensure_ascii=False)}\n\n"
        f"Conditional Requirement: {conditional_req}"
    )
    return [{"role": "system", "content": system}, {"role": "user", "content": user}]

def parse_ranked_output(content: str) -> List[Dict[str, Any]]:
    rows: List[Dict[str, Any]] = []
    parsed = safe_json_loads(content or "")
    if isinstance(parsed, dict) and isinstance(parsed.get("candidates"), list):
        for it in parsed["candidates"]:
            rows.append({
                "candidate": str(it.get("candidate","")).strip(),
                "score": float(it.get("score", 0)),
                "justification": str(it.get("justification","")).strip(),
            })
        return rows
    if isinstance(parsed, list):
        for it in parsed:
            rows.append({
                "candidate": str(it.get("candidate","")).strip(),
                "score": float(it.get("score", 0)),
                "justification": str(it.get("justification","")).strip(),
            })
        return rows
    if not rows:
        logging.warning(f"Could not parse ranked output as JSON. Raw: {content[:500]}")
        rows = [{"candidate": "RAW_OUTPUT", "score": 0.0, "justification": (content or "")[:2000]}]
    return rows

# --- New: process single resume (for parallel execution) ---
def process_single_resume(f, jd_struct: Dict, api_key: str, model_name: str) -> Tuple[Dict, str, float]:
    t0 = time.perf_counter()
    text, fname = load_resume(f)
    contacts = quick_contacts(text)
    try:
        raw_resume = llm_extract_resume(text, api_key=api_key, model=model_name)
    except Exception as e:
        logging.error(f"LLM resume extract failed for {fname}: {e}")
        raw_resume = {}

    cand_struct = normalize_resume(raw_resume)

    if not cand_struct.get("name"):
        cand_struct["name"] = os.path.splitext(fname)[0]

    cand_struct.setdefault("email", cand_struct.get("email") or contacts["email_guess"])
    cand_struct.setdefault("phone", cand_struct.get("phone") or contacts["phone_guess"])

    try:
        detailed_feedback = llm_detailed_feedback(jd_struct, cand_struct, api_key, model_name)
    except Exception as e:
        logging.error(f"LLM detailed feedback failed for {fname}: {e}")
        detailed_feedback = {}
        
    cand_struct['detailed_scores'] = detailed_feedback.get('scores', {})
    cand_struct['summary_feedback'] = detailed_feedback.get('overall_summary', '')
    cand_struct['strengths'] = detailed_feedback.get('strengths', [])
    cand_struct['weaknesses'] = detailed_feedback.get('weaknesses', [])
    cand_struct['missing_requirements'] = detailed_feedback.get('missing_requirements', [])

    try:
        cand_struct["recommendation"] = llm_recommend(jd_struct, cand_struct, api_key, model_name)
    except Exception as e:
        logging.error(f"LLM recommendation failed for {fname}: {e}")
        cand_struct["recommendation"] = ""

    t_elapsed = time.perf_counter() - t0
    return cand_struct, fname, t_elapsed

def process(
    jd_text,
    jd_file,
    resume_files,
    conditional_req
):
    t0 = time.perf_counter()
    api_key = (DEEPINFRA_API_KEY or "").strip()
    if not api_key:
        raise gr.Error("Missing API key. Set DEEPINFRA_API_KEY env var.")

    model_name = DEFAULT_MODEL
    temperature = 0.2
    top_n = 5

    t_jd_start = time.perf_counter()
    jd_raw = load_job_description(jd_text or "", jd_file)
    if not jd_raw.strip():
        raise gr.Error("Please paste a Job Description or upload a JD file.")
    jd_struct = llm_extract_jd(jd_raw, api_key=api_key, model=model_name)
    t_jd = time.perf_counter() - t_jd_start
    logging.info(f"JD parsing time: {t_jd:.2f}s")

    if not resume_files or len(resume_files) == 0:
        raise gr.Error("Please upload at least one resume (PDF, DOCX, DOC, or TXT).")

    parsed_cands = []
    name_to_file = {}
    t_parse_total = 0.0
    files_to_process = resume_files[:50]
    max_workers = min(8, max(1, len(files_to_process)))

    futures = []
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for f in files_to_process:
            futures.append(executor.submit(process_single_resume, f, jd_struct, api_key, model_name))

        for future in as_completed(futures):
            try:
                cand_struct, fname, elapsed = future.result()
                parsed_cands.append(cand_struct)
                name_to_file[cand_struct["name"]] = fname
                t_parse_total += elapsed
            except Exception as e:
                logging.error(f"Error processing a resume in parallel: {e}")

    avg_parse = (t_parse_total / max(1, len(parsed_cands)))
    logging.info(f"Total resume parsing time: {t_parse_total:.2f}s, avg per file: {avg_parse:.2f}s")

    t_match_start = time.perf_counter()
    match_msgs = prompt_for_match(jd_struct, parsed_cands, conditional_req)
    raw_match = deepinfra_chat(match_msgs, api_key=api_key, model=model_name, temperature=temperature)
    ranked_rows = parse_ranked_output(raw_match)
    t_match_total = time.perf_counter() - t_match_start
    logging.info(f"Matching time: {t_match_total:.2f}s")

    score_map = {r["candidate"]: (float(r.get("score", 0.0)), r.get("justification","")) for r in ranked_rows}
    table_rows, export_rows = [], []
    for c in parsed_cands:
        nm = c.get("name","")
        sc, just = score_map.get(nm, (0.0, "Not ranked by model"))
        detailed_scores = c.get('detailed_scores', {})
        table_rows.append({
            "Candidate": nm,
            "Score (0-10)": round(sc, 1),
            "Skills (0-100)": detailed_scores.get('skills', 0),
            "Qualifications (0-100)": detailed_scores.get('qualifications', 0),
            "Responsibilities (0-100)": detailed_scores.get('responsibilities', 0),
            "Experience (0-100)": detailed_scores.get('education_and_experience', 0),
            "Certificates (0-100)": detailed_scores.get('certificates', 0),
            "Soft Skills (0-100)": detailed_scores.get('soft_skills', 0),
            "Email": c.get("email",""),
            "Phone": c.get("phone",""),
            "File": name_to_file.get(nm,""),
        })
        export_rows.append({
            "candidate": nm,
            "score": round(sc, 1),
            **detailed_scores,
            "recommendation": c.get("recommendation", ""),
            "summary_feedback": c.get('summary_feedback', ''),
            "strengths": ", ".join([str(s) for s in c.get("strengths", [])]),
            "weaknesses": ", ".join([str(s) for s in c.get("weaknesses", [])]),
            "missing_requirements": ", ".join([str(s) for s in c.get("missing_requirements", [])]),
            "justification": just,
            "full_json": json.dumps(c, ensure_ascii=False)
        })

    df_export = pd.DataFrame(export_rows)
    if "score" in df_export.columns:
        df_export = df_export.sort_values("score", ascending=False)
    df_table = pd.DataFrame(table_rows)
    if "Score (0-10)" in df_table.columns:
        df_table = df_table.sort_values("Score (0-10)", ascending=False)

    top_candidates_data = []
    for _, row in df_export.head(top_n).iterrows():
        top_candidates_data.append({
            "Candidate": row.get("candidate", ""),
            "Score": row.get("score", 0),
            "Recommendation": row.get("recommendation", ""),
            "Justification": row.get("justification", ""),
        })
    top_df = pd.DataFrame(top_candidates_data)

    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.csv', encoding='utf-8') as tmp_file:
        df_export.to_csv(tmp_file.name, index=False)
        csv_file_path = tmp_file.name

    t_total = time.perf_counter() - t0
    logging.info(f"Total process time: {t_total:.2f}s")

    return df_table, csv_file_path, top_df

# --- Gradio App ---

CUSTOM_CSS = """
/* Add a subtle background gradient and use a nicer font */
.gradio-container {
    background-image: linear-gradient(to top, #f3e7e9 0%, #e3eeff 99%, #e3eeff 100%);
    font-family: 'IBM Plex Sans', sans-serif;
}
/* Style the input/output areas like cards */
.gradio-row > .gradio-column, .gradio-group {
    border: 1px solid #E5E7EB;
    border-radius: 12px;
    box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
    background-color: white;
    padding: 15px;
}
/* Make the file upload area more prominent */
.gradio-file {
    border: 2px dashed #A4B0BE;
    border-radius: 8px;
    padding: 20px;
    transition: all 0.2s ease;
}
.gradio-file:hover {
    border-color: #4A90E2;
    background-color: #F9FAFB;
}
"""

with gr.Blocks(theme=SoftTheme.Soft(), css=CUSTOM_CSS, title="AI Resume Matcher") as demo:
    gr.Markdown(
        "<h1 style='text-align: center; color: #1E3A8A;'>πŸ€– AI Resume Matcher & Ranking</h1>"
        "<p style='text-align: center; color: #4B5563;'>Upload a job description and resumes to automatically rank candidates.</p>"
    )

    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### πŸ“‚ Step 1: Provide Inputs")
            with gr.Group():
                jd_text = gr.Textbox(label="Paste Job Description", lines=8, placeholder="Paste the full job description here...")
                jd_file = gr.File(label="Or Upload JD File (.txt, .pdf, .docx)")
            
            resume_files = gr.File(
                label="πŸ“‚ Step 2: Upload Resumes (.pdf, .docx, .doc, .txt)",
                file_types=[".pdf", ".docx", ".doc", ".txt"],
                file_count="multiple"
            )

            with gr.Accordion("βš™οΈ Advanced Options", open=False):
                conditional_req = gr.Textbox(
                    label="Conditional Requirement (Optional)",
                    placeholder="e.g., 'Must have 5+ years of Python experience'"
                )

            with gr.Row():
                clear_btn = gr.Button("Clear All")
                submit_btn = gr.Button("πŸš€ Run Matching & Ranking", variant="primary", scale=2)

        with gr.Column(scale=3):
            gr.Markdown("### ✨ Step 3: View Results")
            status_md = gr.Markdown("Status: Ready. Please provide inputs and click Run.", visible=True)

            with gr.Tabs():
                with gr.TabItem("πŸ† Top Candidates Summary"):
                    top_table = gr.DataFrame(label="Top 5 Candidates", interactive=False, headers=["Candidate", "Score", "Recommendation", "Justification"])
                with gr.TabItem("πŸ“Š Detailed Ranking"):
                    results_table = gr.DataFrame(label="Full Candidate Ranking")
                with gr.TabItem("πŸ“₯ Download Report"):
                    gr.Markdown("Click the file below to download the complete analysis, including all extracted data and feedback, in CSV format.")
                    csv_export = gr.File(label="Download Full Report (CSV)")
    
    # This is a new state object to hold the results to avoid re-running the 'process' function
    results_state = gr.State({})

    def run_process_and_update_status(jd_text, jd_file, resume_files, conditional_req):
        yield gr.Markdown(value="⏳ Processing... Analyzing job description and resumes. This may take a moment.", visible=True), \
              pd.DataFrame(), pd.DataFrame(), None, {} # Clear previous results while running
        
        try:
            df_table, csv_path, top_df = process(jd_text, jd_file, resume_files, conditional_req)
            status_message = f"βœ… Done! Analyzed {len(df_table)} resumes. See results below."
            results = {
                "df_table": df_table,
                "csv_path": csv_path,
                "top_df": top_df
            }
            yield gr.Markdown(value=status_message, visible=True), df_table, top_df, csv_path, results
        except Exception as e:
            yield gr.Markdown(value=f"❌ Error: {e}", visible=True), \
                  pd.DataFrame(), pd.DataFrame(), None, {}

    def clear_all():
        return None, None, [], "", pd.DataFrame(), pd.DataFrame(), None, gr.Markdown(value="Status: Cleared. Ready for new inputs.", visible=True), {}

    submit_btn.click(
        run_process_and_update_status,
        inputs=[jd_text, jd_file, resume_files, conditional_req],
        outputs=[status_md, results_table, top_table, csv_export, results_state]
    )

    clear_btn.click(
        clear_all,
        inputs=[],
        outputs=[jd_text, jd_file, resume_files, conditional_req, results_table, top_table, csv_export, status_md, results_state]
    )


if __name__ == "__main__":
    demo.launch()