Intelcruit / app.py
namfam's picture
Update app.py
1bc03c8 verified
import streamlit as st
import yaml
import requests
import pypdf
import docx
import re
from io import BytesIO
import os
st.set_page_config(page_title="AI Interview Scorer", page_icon="🤖", layout="wide")
st.title("Intelcruit: AI Interview Scorer")
# --- MOCK DATA CONSTANTS (to make frontend self-contained) ---
MOCK_JOB_DESCRIPTION = """
Mô tả công việc
Sử dụng các công cụ và framework như TensorFlow, PyTorch, và Hugging Face Transformers để xây dựng các mô hình ngôn ngữ.
Sử dụng các kỹ thuật NLP để phân tích, trích xuất thông tin từ văn bản, và xử lý ngôn ngữ tự nhiên.
Phát triển các hệ thống truy xuất thông tin từ cơ sở dữ liệu để hỗ trợ quá trình tạo ra câu trả lời chính xác và đầy đủ.
Sử dụng các kỹ thuật RAG để kết hợp thông tin truy xuất từ các nguồn dữ liệu với khả năng sinh văn bản của mô hình.
Theo dõi và nghiên cứu các xu hướng và công nghệ mới trong lĩnh vực NLP, Chatbot và RAG.
Tối ưu hóa thời gian phản hồi và hiệu suất của hệ thống truy xuất thông tin.
Yêu cầu ứng viên
Có tối thiểu 1 năm kinh nghiệm
Tốt nghiệp Cao đẳng/Đại học các chuyên ngành Công nghệ Thông tin, Toán Tin, Điện tử Viễn thông, Điều khiển Tự động, hoặc các ngành liên quan.
Kiến thức chuyên môn:
Có hiểu biết về Machine Learning và Deep Learning.
Kinh nghiệm làm việc với các mô hình ngôn ngữ lớn (LLM)
Có kinh nghiệm làm việc với RESTAPI, Langchain, llamaindex, ...
Kỹ năng nghiên cứu và nền tảng:
Khả năng nghiên cứu và áp dụng các công nghệ mới.
Nền tảng vững chắc về cấu trúc dữ liệu và thuật toán.
Hiểu biết và có kinh nghiệm lập trình với các ngôn ngữ như C++ và Python.
Có kinh nghiệm làm việc với cơ sở dữ liệu SQL.
Quyền lợi
Mức lương: thỏa thuận khi phỏng vấn
Công ty đóng 100% BHYT, BHXH, BHTN
Công ty cung cấp thiết bị làm việc
Review lương 1 - 2 lần/năm theo năng lực
Thưởng ngày lễ 2/9, 30/04, 1/5, ..., Tết, thưởng lương tháng 13
Thưởng kết quả kinh doanh toàn công ty cuối năm
Du lịch 2 lần/năm
Môi trường làm việc năng động, chuyên nghiệp
"""
MOCK_RUBRIC_CONTENT = """expertise:
description: "Đánh giá mức độ thành thạo về chuyên môn AI, bao gồm kiến thức và kinh nghiệm thực tế với NLP, LLM, RAG, và các công cụ như PyTorch, TensorFlow, HuggingFace, LangChain, REST API. Khả năng áp dụng thuật toán, xử lý dữ liệu và tối ưu hóa hệ thống cũng được xem xét."
weight: 0.7
communication:
description: "Đánh giá khả năng trình bày ý tưởng rõ ràng, trao đổi kỹ thuật hiệu quả, viết tài liệu hoặc báo cáo kỹ thuật dễ hiểu, và khả năng giao tiếp với các thành viên không chuyên kỹ thuật (PM, khách hàng nội bộ)."
weight: 0.3
"""
# --- MOCK DATA PATHS ---
# Get the absolute path of the directory containing the current script (frontend/)
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
# Go up one level to get the project root (intelcruit/)
PROJECT_ROOT = os.path.dirname(SCRIPT_DIR)
print("PROJECT_ROOT: ", PROJECT_ROOT)
# Construct the full, robust paths to the mock files
# MOCK_AUDIO_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_interview_audio_tts_ai_engineer.wav")
# MOCK_TRANSCRIPT_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_interview_transcipt.txt")
# MOCK_RESUME_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_resume_ai_engineer.pdf")
# MOCK_JD_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_job_description.txt")
# MOCK_RUBRIC_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_rubric.yaml")
# MOCK_TRANSCRIPT_PLACEHOLDER = "This is a placeholder for the mock transcript. It will be replaced by content from the mock file if available."
MOCK_AUDIO_PATH = os.path.join("examples", "example_interview_audio_tts_ai_engineer.wav")
MOCK_TRANSCRIPT_PATH = os.path.join("examples", "example_interview_transcipt.txt")
MOCK_RESUME_PATH = os.path.join("examples", "example_resume_ai_engineer.pdf")
MOCK_JD_PATH = os.path.join("examples", "example_job_description.txt")
MOCK_RUBRIC_PATH = os.path.join("examples", "example_rubric.yaml")
MOCK_TRANSCRIPT_PLACEHOLDER = "This is a placeholder for the mock transcript. It will be replaced by content from the mock file if available."
def load_mock_file(path, mime_type):
"""Loads a mock file from the given path and returns a BytesIO object."""
if not os.path.exists(path):
st.warning(f"Mock file not found at: {path}. Please ensure it exists.")
return None
with open(path, "rb") as f:
file_bytes = f.read()
mock_file = BytesIO(file_bytes)
mock_file.name = os.path.basename(path)
mock_file.type = mime_type
return mock_file
# --- API & HELPER FUNCTIONS ---
def calculate_overall_score(scored_pairs, rubric):
if not rubric or not scored_pairs:
return 0, {}
category_weights = {cat: data.get('weight', 1) for cat, data in rubric.items()}
total_rubric_weight = sum(category_weights.values())
if total_rubric_weight == 0:
return 0, {}
category_scores = {cat: [] for cat in category_weights.keys()}
for pair in scored_pairs:
if 'analysis' in pair and 'scores' in pair['analysis']:
for score_item in pair['analysis']['scores']:
category = score_item.get('category')
score = score_item.get('score')
if category in category_scores and isinstance(score, (int, float)):
category_scores[category].append(score)
avg_category_scores = {}
for cat, scores in category_scores.items():
avg_category_scores[cat] = sum(scores) / len(scores) if scores else 0
weighted_score = sum(avg_score * category_weights.get(cat, 1) for cat, avg_score in avg_category_scores.items())
final_score_10 = weighted_score / total_rubric_weight
# Scale scores to 100
final_score_100 = final_score_10 * 10
avg_category_scores_100 = {cat: score * 10 for cat, score in avg_category_scores.items()}
return final_score_100, avg_category_scores_100
def calculate_resume_overall_score(results):
"""Calculates the overall resume score from the detailed scores."""
if not results:
return 0
scores = []
for category in ['experience', 'education', 'skills']:
# Safely get the score, defaulting to 0 if not found or not a number
score = results.get(category, {}).get('score')
if isinstance(score, (int, float)):
scores.append(score)
if not scores:
return 0
# Calculate the average score
overall_score = sum(scores) / len(scores)
return round(overall_score, 1)
def display_results(results_data, rubric_content):
try:
rubric = yaml.safe_load(rubric_content)
except yaml.YAMLError:
st.error("Could not parse rubric.")
rubric = {}
# st.header("📊 Analysis Report")
scored_pairs = results_data.get('results', {}).get('scored_qa_pairs', [])
if not scored_pairs:
st.warning("No scorable question and answer pairs were found.")
return
st.subheader("Summary")
final_score, avg_category_scores = calculate_overall_score(scored_pairs, rubric)
st.metric(label="Overall Score", value=f"{final_score:.1f}/100")
if avg_category_scores:
st.markdown("**Category Scores:**")
cols = st.columns(len(avg_category_scores))
for i, (cat, score) in enumerate(avg_category_scores.items()):
with cols[i]:
st.metric(label=cat.replace('_', ' ').title(), value=f"{score:.1f}/100")
st.markdown("---")
st.subheader("Detailed Question & Answer Analysis")
categorized_scores = {}
for pair in scored_pairs:
category = pair.get('category', 'general')
if category not in categorized_scores:
categorized_scores[category] = []
categorized_scores[category].append(pair)
sorted_categories = sorted(categorized_scores.keys(), key=lambda x: (x == 'general', x))
for category in sorted_categories:
pairs = categorized_scores[category]
# st.subheader(f"Category: {category.replace('_', ' ').title()}")
for pair in pairs:
with st.expander(f"**{pair['question']}**"):
st.markdown(f"**Candidate's Answer:** *{pair['answer']}*")
if 'analysis' in pair and 'scores' in pair['analysis']:
st.markdown("**AI Analysis:**")
for score_item in pair['analysis']['scores']:
category_name = score_item.get('category', 'General').replace('_', ' ').title()
score = score_item.get('score', 'N/A')
reasoning = score_item.get('reasoning', 'No reasoning provided.')
st.markdown(f"**{category_name} Score:** `{score}/10`")
st.info(f"**Explanation:** {reasoning}")
elif 'analysis' in pair and 'error' in pair['analysis']:
st.error(f"Could not score this answer: {pair['analysis']['error']}")
else:
st.warning("No analysis available for this Q&A pair.")
# BASE_URL = "http://127.0.0.1:8000"
BASE_URL = "https://namfam-intelcruit-backend.hf.space/"
RESUME_EXTRACTION_URL = f"{BASE_URL}/extract_from_resume/"
def get_text_from_file(file):
"""Extracts text from an uploaded file (PDF, DOCX, TXT)."""
text = ""
try:
file.seek(0) # Reset file pointer
if file.type == "application/pdf":
pdf_reader = pypdf.PdfReader(file)
raw_text = " ".join(page.extract_text() or "" for page in pdf_reader.pages)
text = re.sub(r'\s+', ' ', raw_text).strip()
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = docx.Document(file)
raw_text = " ".join(p.text for p in doc.paragraphs)
text = re.sub(r'\s+', ' ', raw_text).strip()
elif file.type == "text/plain":
text = file.read().decode('utf-8')
else:
st.warning(f"Unsupported file type: {file.type}")
return None
except Exception as e:
st.error(f"Error reading file: {e}")
return None
return text
def display_resume_content(file):
"""Displays the content of the uploaded resume in the UI."""
resume_text = get_text_from_file(file)
if resume_text:
st.text_area("Resume Content", value=resume_text, height=300, disabled=True)
else:
st.info("Could not display content for this file type or an error occurred.")
def call_analyze_mock_api():
url = f"{BASE_URL}/analyze_mock/"
try:
response = requests.post(url, timeout=120)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"Failed to connect to backend: {e}"}
def call_analyze_interview_api(job_description, rubric_content, audio_file=None, transcript_content=None):
"""Calls the backend to analyze an interview from audio or transcript."""
api_url = f"{BASE_URL}/analyze/"
files = {}
data = {
'job_description': job_description,
'rubric_content': rubric_content
}
if audio_file:
files['audio_file'] = (audio_file.name, audio_file.getvalue(), audio_file.type)
elif transcript_content:
data['transcript_content'] = transcript_content
else:
# This case should ideally be prevented by the UI
return {"error": "No audio file or transcript was provided."}
try:
# Use a long timeout as analysis can take time
response = requests.post(api_url, files=files, data=data, timeout=300)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API request failed: {e}"}
def call_analyze_resume_api(resume_file, job_description):
"""Calls the backend to analyze and score a resume."""
api_url = f"{BASE_URL}/analyze_resume/"
files = {'resume_file': (resume_file.name, resume_file.getvalue(), resume_file.type)}
data = {'job_description': job_description}
try:
response = requests.post(api_url, files=files, data=data, timeout=180)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API request failed: {e}"}
def call_extract_api(resume_file):
if not resume_file:
return None
files = {'resume_file': (resume_file.name, resume_file.getvalue(), resume_file.type)}
try:
response = requests.post(RESUME_EXTRACTION_URL, files=files, timeout=60)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
st.error(f"Error connecting to backend for extraction: {e}")
return {"error": str(e)}
# --- PAGE CONFIG & SESSION STATE ---
if 'interview_results' not in st.session_state:
st.session_state.interview_results = None
if 'use_mock_data' not in st.session_state:
st.session_state.use_mock_data = False
if 'candidate_info' not in st.session_state:
st.session_state.candidate_info = None
if 'resume_file_name' not in st.session_state:
st.session_state.resume_file_name = None
if 'jd_input' not in st.session_state:
st.session_state.jd_input = None
# --- UI LAYOUT ---
# --- SIDEBAR ---
st.sidebar.header("⚙️ Configuration")
st.session_state.use_mock_data = st.sidebar.checkbox("Use Mock Data for Quick Testing", key='use_mock_data_checkbox')
if st.session_state.use_mock_data:
job_description = st.sidebar.text_area("Job Description", value=MOCK_JOB_DESCRIPTION, height=250, disabled=True)
rubric_content = st.sidebar.text_area("Scoring Rubric (YAML)", value=MOCK_RUBRIC_CONTENT, height=400, disabled=True)
else:
job_description = st.sidebar.text_area("Job Description", placeholder="Paste the full job description here...", height=250, key="jd_input")
rubric_content = st.sidebar.text_area("Scoring Rubric (YAML)", placeholder="Paste the YAML scoring rubric here...", height=400)
st.sidebar.markdown("---")
# st.sidebar.info("Configure the job details here, then manage candidate analysis in the main panel.")
# --- MAIN PANEL ---
# --- Candidate Profile Section ---
with st.container():
st.subheader("Candidate Profile")
profile_col1, profile_col2, profile_col3 = st.columns([1, 1, 2])
with profile_col1:
if st.session_state.use_mock_data:
uploaded_resume = load_mock_file(MOCK_RESUME_PATH, "application/pdf")
else:
uploaded_resume = st.file_uploader("Upload Candidate Resume/CV", type=["pdf", "docx", "txt"], key="resume_uploader")
if uploaded_resume:
st.image("https://www.w3schools.com/howto/img_avatar.png", width=150)
if uploaded_resume and uploaded_resume.name != st.session_state.get('resume_file_name'):
st.session_state.resume_file_name = uploaded_resume.name
st.session_state.candidate_info = None
with st.spinner("🤖 Extracting key information from resume..."):
extracted_data = call_extract_api(uploaded_resume)
if extracted_data and "error" not in extracted_data:
st.session_state.candidate_info = extracted_data
st.success("Information extracted successfully!")
elif extracted_data:
st.error(f"Extraction failed: {extracted_data.get('error')}")
with profile_col2:
st.markdown("**Basic Information:**")
if st.session_state.candidate_info:
info = st.session_state.candidate_info
st.write(f"**Full Name:** {info.get('full_name', 'N/A')}")
st.write(f"**Email:** {info.get('email', 'N/A')}")
st.write(f"**Phone:** {info.get('phone', 'N/A')}")
else:
st.info("Upload a resume to extract details.")
with profile_col3:
st.markdown("**Summaries:**")
if st.session_state.candidate_info:
info = st.session_state.candidate_info
with st.expander("Experience"):
st.markdown(info.get('experience_summary', 'N/A'))
with st.expander("Education"):
st.markdown(info.get('education_summary', 'N/A'))
with st.expander("Skills"):
st.markdown(info.get('skill_summary', 'N/A'))
else:
st.info("Summaries appear after extraction.")
if uploaded_resume:
st.info(f"Using mock resume: {uploaded_resume.name}",)
# --- Analysis Tabs ---
st.subheader("Assessment")
resume_tab, interview_tab = st.tabs(["📝 Resume/CV Score", "🎙️ Interview Score"])
with interview_tab:
st.header("Interview Analysis")
# Add a radio button for input method selection
input_method = st.radio("Choose input method:", ("Upload Audio", "Enter Transcript"), key="interview_input_method")
audio_file = None
transcript_input = None
if input_method == "Upload Audio":
if st.session_state.use_mock_data:
audio_file = load_mock_file(MOCK_AUDIO_PATH, "audio/mp3")
if audio_file:
st.info(f"Using mock audio: {audio_file.name}")
st.audio(audio_file)
# Keep mock transcript display for context if audio is used
try:
with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f:
mock_transcript_content = f.read()
with st.expander("View Mock Transcript"):
transcript_input = st.text_area("Paste Transcript Here", value=mock_transcript_content, height=300, disabled=True, placeholder=mock_transcript_content)
except FileNotFoundError:
st.warning("Mock transcript file not found.")
transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder="Mock transcript file not found.", disabled=True)
else:
audio_file = st.file_uploader("Upload Interview Audio", type=['mp3', 'wav', 'm4a', 'mp4'], key="live_audio_uploader")
if audio_file:
st.audio(audio_file)
else: # input_method == "Enter Transcript"
if st.session_state.use_mock_data:
try:
with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f:
transcript_input = f.read()
st.info("Using mock transcript.")
transcript_input = st.text_area("Paste Transcript Here", value=transcript_input, height=300, disabled=True)
except FileNotFoundError:
st.warning("Mock transcript file not found.")
transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder="Mock transcript file not found.", disabled=True)
else:
with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f:
transcript_input = f.read()
# st.info("Using mock transcript.")
transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder=transcript_input)
# Initialize response_data outside the button block
response_data = None
# Determine if the analyze button should be enabled
# This needs to be done after transcript_input and audio_file are potentially set
analyze_button_disabled = (not audio_file and not transcript_input)
if st.button("Analyze Interview", key="analyze_interview_btn", disabled=analyze_button_disabled):
if st.session_state.use_mock_data:
with st.spinner('Analyzing mock interview...'):
response_data = call_analyze_mock_api()
elif audio_file:
with st.spinner('Analyzing interview... This may take several minutes.'):
response_data = call_analyze_interview_api(job_description, rubric_content, audio_file=audio_file)
elif transcript_input:
with st.spinner('Analyzing interview... This may take several minutes.'):
response_data = call_analyze_interview_api(job_description, rubric_content, transcript_content=transcript_input)
if response_data and "error" not in response_data:
st.session_state.interview_results = response_data
st.success('Interview analysis complete!')
elif response_data:
st.error(f"API Error: {response_data.get('error')}")
if 'interview_results' in st.session_state:
del st.session_state.interview_results
if st.session_state.get('interview_results'):
display_results(st.session_state.interview_results, rubric_content)
with resume_tab:
st.header("Resume Analysis")
if uploaded_resume:
with st.expander("View Uploaded Resume Content"):
display_resume_content(uploaded_resume)
# Button to trigger analysis
if st.button("Analyze Resume Score", key="analyze_resume_btn", type="primary", disabled=(not uploaded_resume or not job_description)):
with st.spinner('Analyzing resume... This may take a moment.'):
# Call the backend API
api_results = call_analyze_resume_api(uploaded_resume, job_description)
# Store results or handle errors
if api_results and 'error' not in api_results:
st.session_state.resume_score = api_results
st.success('Resume analysis complete!')
else:
error_message = api_results.get('error', 'An unknown error occurred.') if api_results else 'An unknown error occurred.'
st.error(f"Analysis failed: {error_message}")
if 'resume_score' in st.session_state:
del st.session_state.resume_score # Clear old results on failure
# Display results if they exist in the session state
if 'resume_score' in st.session_state and st.session_state.resume_score:
results = st.session_state.resume_score.get('results', {})
# Safely get nested data
summary = results.get('overall_summary', 'No summary provided.')
overall_score = calculate_resume_overall_score(results)
exp = results.get('experience', {})
edu = results.get('education', {})
skills = results.get('skills', {})
st.markdown(f"### Overall Score: {overall_score}/100")
st.markdown("---")
st.subheader("Score Breakdown")
score_col1, score_col2, score_col3 = st.columns(3)
score_col1.metric(label="Experience Score", value=f"{exp.get('score', 'N/A')}")
score_col2.metric(label="Education Score", value=f"{edu.get('score', 'N/A')}")
score_col3.metric(label="Skills Match", value=f"{skills.get('score', 'N/A')}")
st.subheader("Overall Summary")
st.write(summary)
st.subheader("Detailed Analysis")
with st.expander("**Experience Analysis**"):
st.write(exp.get('justification', 'No justification provided.'))
with st.expander("**Education Analysis**"):
st.write(edu.get('justification', 'No justification provided.'))
with st.expander("**Skills Match Analysis**"):
st.write(skills.get('justification', 'No justification provided.'))
else:
# Default view shown on page load or when no analysis has been run
# st.info("Upload a CV and provide a Job Description in the sidebar to start the analysis.")
st.markdown("---")
# st.subheader("Score Summary")
# score_col1, score_col2, score_col3 = st.columns(3)
# score_col1.metric(label="Experience Score", value="N/A")
# score_col2.metric(label="Education Score", value="N/A")
# score_col3.metric(label="Skills Match", value="N/A")