import streamlit as st import yaml import requests import pypdf import docx import re from io import BytesIO import os st.set_page_config(page_title="AI Interview Scorer", page_icon="🤖", layout="wide") st.title("Intelcruit: AI Interview Scorer") # --- MOCK DATA CONSTANTS (to make frontend self-contained) --- MOCK_JOB_DESCRIPTION = """ Mô tả công việc Sử dụng các công cụ và framework như TensorFlow, PyTorch, và Hugging Face Transformers để xây dựng các mô hình ngôn ngữ. Sử dụng các kỹ thuật NLP để phân tích, trích xuất thông tin từ văn bản, và xử lý ngôn ngữ tự nhiên. Phát triển các hệ thống truy xuất thông tin từ cơ sở dữ liệu để hỗ trợ quá trình tạo ra câu trả lời chính xác và đầy đủ. Sử dụng các kỹ thuật RAG để kết hợp thông tin truy xuất từ các nguồn dữ liệu với khả năng sinh văn bản của mô hình. Theo dõi và nghiên cứu các xu hướng và công nghệ mới trong lĩnh vực NLP, Chatbot và RAG. Tối ưu hóa thời gian phản hồi và hiệu suất của hệ thống truy xuất thông tin. Yêu cầu ứng viên Có tối thiểu 1 năm kinh nghiệm Tốt nghiệp Cao đẳng/Đại học các chuyên ngành Công nghệ Thông tin, Toán Tin, Điện tử Viễn thông, Điều khiển Tự động, hoặc các ngành liên quan. Kiến thức chuyên môn: Có hiểu biết về Machine Learning và Deep Learning. Kinh nghiệm làm việc với các mô hình ngôn ngữ lớn (LLM) Có kinh nghiệm làm việc với RESTAPI, Langchain, llamaindex, ... Kỹ năng nghiên cứu và nền tảng: Khả năng nghiên cứu và áp dụng các công nghệ mới. Nền tảng vững chắc về cấu trúc dữ liệu và thuật toán. Hiểu biết và có kinh nghiệm lập trình với các ngôn ngữ như C++ và Python. Có kinh nghiệm làm việc với cơ sở dữ liệu SQL. Quyền lợi Mức lương: thỏa thuận khi phỏng vấn Công ty đóng 100% BHYT, BHXH, BHTN Công ty cung cấp thiết bị làm việc Review lương 1 - 2 lần/năm theo năng lực Thưởng ngày lễ 2/9, 30/04, 1/5, ..., Tết, thưởng lương tháng 13 Thưởng kết quả kinh doanh toàn công ty cuối năm Du lịch 2 lần/năm Môi trường làm việc năng động, chuyên nghiệp """ MOCK_RUBRIC_CONTENT = """expertise: description: "Đánh giá mức độ thành thạo về chuyên môn AI, bao gồm kiến thức và kinh nghiệm thực tế với NLP, LLM, RAG, và các công cụ như PyTorch, TensorFlow, HuggingFace, LangChain, REST API. Khả năng áp dụng thuật toán, xử lý dữ liệu và tối ưu hóa hệ thống cũng được xem xét." weight: 0.7 communication: description: "Đánh giá khả năng trình bày ý tưởng rõ ràng, trao đổi kỹ thuật hiệu quả, viết tài liệu hoặc báo cáo kỹ thuật dễ hiểu, và khả năng giao tiếp với các thành viên không chuyên kỹ thuật (PM, khách hàng nội bộ)." weight: 0.3 """ # --- MOCK DATA PATHS --- # Get the absolute path of the directory containing the current script (frontend/) SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) # Go up one level to get the project root (intelcruit/) PROJECT_ROOT = os.path.dirname(SCRIPT_DIR) print("PROJECT_ROOT: ", PROJECT_ROOT) # Construct the full, robust paths to the mock files # MOCK_AUDIO_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_interview_audio_tts_ai_engineer.wav") # MOCK_TRANSCRIPT_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_interview_transcipt.txt") # MOCK_RESUME_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_resume_ai_engineer.pdf") # MOCK_JD_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_job_description.txt") # MOCK_RUBRIC_PATH = os.path.join(PROJECT_ROOT, "frontend", "examples", "example_rubric.yaml") # MOCK_TRANSCRIPT_PLACEHOLDER = "This is a placeholder for the mock transcript. It will be replaced by content from the mock file if available." MOCK_AUDIO_PATH = os.path.join("examples", "example_interview_audio_tts_ai_engineer.wav") MOCK_TRANSCRIPT_PATH = os.path.join("examples", "example_interview_transcipt.txt") MOCK_RESUME_PATH = os.path.join("examples", "example_resume_ai_engineer.pdf") MOCK_JD_PATH = os.path.join("examples", "example_job_description.txt") MOCK_RUBRIC_PATH = os.path.join("examples", "example_rubric.yaml") MOCK_TRANSCRIPT_PLACEHOLDER = "This is a placeholder for the mock transcript. It will be replaced by content from the mock file if available." def load_mock_file(path, mime_type): """Loads a mock file from the given path and returns a BytesIO object.""" if not os.path.exists(path): st.warning(f"Mock file not found at: {path}. Please ensure it exists.") return None with open(path, "rb") as f: file_bytes = f.read() mock_file = BytesIO(file_bytes) mock_file.name = os.path.basename(path) mock_file.type = mime_type return mock_file # --- API & HELPER FUNCTIONS --- def calculate_overall_score(scored_pairs, rubric): if not rubric or not scored_pairs: return 0, {} category_weights = {cat: data.get('weight', 1) for cat, data in rubric.items()} total_rubric_weight = sum(category_weights.values()) if total_rubric_weight == 0: return 0, {} category_scores = {cat: [] for cat in category_weights.keys()} for pair in scored_pairs: if 'analysis' in pair and 'scores' in pair['analysis']: for score_item in pair['analysis']['scores']: category = score_item.get('category') score = score_item.get('score') if category in category_scores and isinstance(score, (int, float)): category_scores[category].append(score) avg_category_scores = {} for cat, scores in category_scores.items(): avg_category_scores[cat] = sum(scores) / len(scores) if scores else 0 weighted_score = sum(avg_score * category_weights.get(cat, 1) for cat, avg_score in avg_category_scores.items()) final_score_10 = weighted_score / total_rubric_weight # Scale scores to 100 final_score_100 = final_score_10 * 10 avg_category_scores_100 = {cat: score * 10 for cat, score in avg_category_scores.items()} return final_score_100, avg_category_scores_100 def calculate_resume_overall_score(results): """Calculates the overall resume score from the detailed scores.""" if not results: return 0 scores = [] for category in ['experience', 'education', 'skills']: # Safely get the score, defaulting to 0 if not found or not a number score = results.get(category, {}).get('score') if isinstance(score, (int, float)): scores.append(score) if not scores: return 0 # Calculate the average score overall_score = sum(scores) / len(scores) return round(overall_score, 1) def display_results(results_data, rubric_content): try: rubric = yaml.safe_load(rubric_content) except yaml.YAMLError: st.error("Could not parse rubric.") rubric = {} # st.header("📊 Analysis Report") scored_pairs = results_data.get('results', {}).get('scored_qa_pairs', []) if not scored_pairs: st.warning("No scorable question and answer pairs were found.") return st.subheader("Summary") final_score, avg_category_scores = calculate_overall_score(scored_pairs, rubric) st.metric(label="Overall Score", value=f"{final_score:.1f}/100") if avg_category_scores: st.markdown("**Category Scores:**") cols = st.columns(len(avg_category_scores)) for i, (cat, score) in enumerate(avg_category_scores.items()): with cols[i]: st.metric(label=cat.replace('_', ' ').title(), value=f"{score:.1f}/100") st.markdown("---") st.subheader("Detailed Question & Answer Analysis") categorized_scores = {} for pair in scored_pairs: category = pair.get('category', 'general') if category not in categorized_scores: categorized_scores[category] = [] categorized_scores[category].append(pair) sorted_categories = sorted(categorized_scores.keys(), key=lambda x: (x == 'general', x)) for category in sorted_categories: pairs = categorized_scores[category] # st.subheader(f"Category: {category.replace('_', ' ').title()}") for pair in pairs: with st.expander(f"**{pair['question']}**"): st.markdown(f"**Candidate's Answer:** *{pair['answer']}*") if 'analysis' in pair and 'scores' in pair['analysis']: st.markdown("**AI Analysis:**") for score_item in pair['analysis']['scores']: category_name = score_item.get('category', 'General').replace('_', ' ').title() score = score_item.get('score', 'N/A') reasoning = score_item.get('reasoning', 'No reasoning provided.') st.markdown(f"**{category_name} Score:** `{score}/10`") st.info(f"**Explanation:** {reasoning}") elif 'analysis' in pair and 'error' in pair['analysis']: st.error(f"Could not score this answer: {pair['analysis']['error']}") else: st.warning("No analysis available for this Q&A pair.") # BASE_URL = "http://127.0.0.1:8000" BASE_URL = "https://namfam-intelcruit-backend.hf.space/" RESUME_EXTRACTION_URL = f"{BASE_URL}/extract_from_resume/" def get_text_from_file(file): """Extracts text from an uploaded file (PDF, DOCX, TXT).""" text = "" try: file.seek(0) # Reset file pointer if file.type == "application/pdf": pdf_reader = pypdf.PdfReader(file) raw_text = " ".join(page.extract_text() or "" for page in pdf_reader.pages) text = re.sub(r'\s+', ' ', raw_text).strip() elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": doc = docx.Document(file) raw_text = " ".join(p.text for p in doc.paragraphs) text = re.sub(r'\s+', ' ', raw_text).strip() elif file.type == "text/plain": text = file.read().decode('utf-8') else: st.warning(f"Unsupported file type: {file.type}") return None except Exception as e: st.error(f"Error reading file: {e}") return None return text def display_resume_content(file): """Displays the content of the uploaded resume in the UI.""" resume_text = get_text_from_file(file) if resume_text: st.text_area("Resume Content", value=resume_text, height=300, disabled=True) else: st.info("Could not display content for this file type or an error occurred.") def call_analyze_mock_api(): url = f"{BASE_URL}/analyze_mock/" try: response = requests.post(url, timeout=120) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: return {"error": f"Failed to connect to backend: {e}"} def call_analyze_interview_api(job_description, rubric_content, audio_file=None, transcript_content=None): """Calls the backend to analyze an interview from audio or transcript.""" api_url = f"{BASE_URL}/analyze/" files = {} data = { 'job_description': job_description, 'rubric_content': rubric_content } if audio_file: files['audio_file'] = (audio_file.name, audio_file.getvalue(), audio_file.type) elif transcript_content: data['transcript_content'] = transcript_content else: # This case should ideally be prevented by the UI return {"error": "No audio file or transcript was provided."} try: # Use a long timeout as analysis can take time response = requests.post(api_url, files=files, data=data, timeout=300) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: return {"error": f"API request failed: {e}"} def call_analyze_resume_api(resume_file, job_description): """Calls the backend to analyze and score a resume.""" api_url = f"{BASE_URL}/analyze_resume/" files = {'resume_file': (resume_file.name, resume_file.getvalue(), resume_file.type)} data = {'job_description': job_description} try: response = requests.post(api_url, files=files, data=data, timeout=180) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: return {"error": f"API request failed: {e}"} def call_extract_api(resume_file): if not resume_file: return None files = {'resume_file': (resume_file.name, resume_file.getvalue(), resume_file.type)} try: response = requests.post(RESUME_EXTRACTION_URL, files=files, timeout=60) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: st.error(f"Error connecting to backend for extraction: {e}") return {"error": str(e)} # --- PAGE CONFIG & SESSION STATE --- if 'interview_results' not in st.session_state: st.session_state.interview_results = None if 'use_mock_data' not in st.session_state: st.session_state.use_mock_data = False if 'candidate_info' not in st.session_state: st.session_state.candidate_info = None if 'resume_file_name' not in st.session_state: st.session_state.resume_file_name = None if 'jd_input' not in st.session_state: st.session_state.jd_input = None # --- UI LAYOUT --- # --- SIDEBAR --- st.sidebar.header("⚙️ Configuration") st.session_state.use_mock_data = st.sidebar.checkbox("Use Mock Data for Quick Testing", key='use_mock_data_checkbox') if st.session_state.use_mock_data: job_description = st.sidebar.text_area("Job Description", value=MOCK_JOB_DESCRIPTION, height=250, disabled=True) rubric_content = st.sidebar.text_area("Scoring Rubric (YAML)", value=MOCK_RUBRIC_CONTENT, height=400, disabled=True) else: job_description = st.sidebar.text_area("Job Description", placeholder="Paste the full job description here...", height=250, key="jd_input") rubric_content = st.sidebar.text_area("Scoring Rubric (YAML)", placeholder="Paste the YAML scoring rubric here...", height=400) st.sidebar.markdown("---") # st.sidebar.info("Configure the job details here, then manage candidate analysis in the main panel.") # --- MAIN PANEL --- # --- Candidate Profile Section --- with st.container(): st.subheader("Candidate Profile") profile_col1, profile_col2, profile_col3 = st.columns([1, 1, 2]) with profile_col1: if st.session_state.use_mock_data: uploaded_resume = load_mock_file(MOCK_RESUME_PATH, "application/pdf") else: uploaded_resume = st.file_uploader("Upload Candidate Resume/CV", type=["pdf", "docx", "txt"], key="resume_uploader") if uploaded_resume: st.image("https://www.w3schools.com/howto/img_avatar.png", width=150) if uploaded_resume and uploaded_resume.name != st.session_state.get('resume_file_name'): st.session_state.resume_file_name = uploaded_resume.name st.session_state.candidate_info = None with st.spinner("🤖 Extracting key information from resume..."): extracted_data = call_extract_api(uploaded_resume) if extracted_data and "error" not in extracted_data: st.session_state.candidate_info = extracted_data st.success("Information extracted successfully!") elif extracted_data: st.error(f"Extraction failed: {extracted_data.get('error')}") with profile_col2: st.markdown("**Basic Information:**") if st.session_state.candidate_info: info = st.session_state.candidate_info st.write(f"**Full Name:** {info.get('full_name', 'N/A')}") st.write(f"**Email:** {info.get('email', 'N/A')}") st.write(f"**Phone:** {info.get('phone', 'N/A')}") else: st.info("Upload a resume to extract details.") with profile_col3: st.markdown("**Summaries:**") if st.session_state.candidate_info: info = st.session_state.candidate_info with st.expander("Experience"): st.markdown(info.get('experience_summary', 'N/A')) with st.expander("Education"): st.markdown(info.get('education_summary', 'N/A')) with st.expander("Skills"): st.markdown(info.get('skill_summary', 'N/A')) else: st.info("Summaries appear after extraction.") if uploaded_resume: st.info(f"Using mock resume: {uploaded_resume.name}",) # --- Analysis Tabs --- st.subheader("Assessment") resume_tab, interview_tab = st.tabs(["📝 Resume/CV Score", "🎙️ Interview Score"]) with interview_tab: st.header("Interview Analysis") # Add a radio button for input method selection input_method = st.radio("Choose input method:", ("Upload Audio", "Enter Transcript"), key="interview_input_method") audio_file = None transcript_input = None if input_method == "Upload Audio": if st.session_state.use_mock_data: audio_file = load_mock_file(MOCK_AUDIO_PATH, "audio/mp3") if audio_file: st.info(f"Using mock audio: {audio_file.name}") st.audio(audio_file) # Keep mock transcript display for context if audio is used try: with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f: mock_transcript_content = f.read() with st.expander("View Mock Transcript"): transcript_input = st.text_area("Paste Transcript Here", value=mock_transcript_content, height=300, disabled=True, placeholder=mock_transcript_content) except FileNotFoundError: st.warning("Mock transcript file not found.") transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder="Mock transcript file not found.", disabled=True) else: audio_file = st.file_uploader("Upload Interview Audio", type=['mp3', 'wav', 'm4a', 'mp4'], key="live_audio_uploader") if audio_file: st.audio(audio_file) else: # input_method == "Enter Transcript" if st.session_state.use_mock_data: try: with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f: transcript_input = f.read() st.info("Using mock transcript.") transcript_input = st.text_area("Paste Transcript Here", value=transcript_input, height=300, disabled=True) except FileNotFoundError: st.warning("Mock transcript file not found.") transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder="Mock transcript file not found.", disabled=True) else: with open(MOCK_TRANSCRIPT_PATH, 'r', encoding='utf-8') as f: transcript_input = f.read() # st.info("Using mock transcript.") transcript_input = st.text_area("Paste Transcript Here", height=300, placeholder=transcript_input) # Initialize response_data outside the button block response_data = None # Determine if the analyze button should be enabled # This needs to be done after transcript_input and audio_file are potentially set analyze_button_disabled = (not audio_file and not transcript_input) if st.button("Analyze Interview", key="analyze_interview_btn", disabled=analyze_button_disabled): if st.session_state.use_mock_data: with st.spinner('Analyzing mock interview...'): response_data = call_analyze_mock_api() elif audio_file: with st.spinner('Analyzing interview... This may take several minutes.'): response_data = call_analyze_interview_api(job_description, rubric_content, audio_file=audio_file) elif transcript_input: with st.spinner('Analyzing interview... This may take several minutes.'): response_data = call_analyze_interview_api(job_description, rubric_content, transcript_content=transcript_input) if response_data and "error" not in response_data: st.session_state.interview_results = response_data st.success('Interview analysis complete!') elif response_data: st.error(f"API Error: {response_data.get('error')}") if 'interview_results' in st.session_state: del st.session_state.interview_results if st.session_state.get('interview_results'): display_results(st.session_state.interview_results, rubric_content) with resume_tab: st.header("Resume Analysis") if uploaded_resume: with st.expander("View Uploaded Resume Content"): display_resume_content(uploaded_resume) # Button to trigger analysis if st.button("Analyze Resume Score", key="analyze_resume_btn", type="primary", disabled=(not uploaded_resume or not job_description)): with st.spinner('Analyzing resume... This may take a moment.'): # Call the backend API api_results = call_analyze_resume_api(uploaded_resume, job_description) # Store results or handle errors if api_results and 'error' not in api_results: st.session_state.resume_score = api_results st.success('Resume analysis complete!') else: error_message = api_results.get('error', 'An unknown error occurred.') if api_results else 'An unknown error occurred.' st.error(f"Analysis failed: {error_message}") if 'resume_score' in st.session_state: del st.session_state.resume_score # Clear old results on failure # Display results if they exist in the session state if 'resume_score' in st.session_state and st.session_state.resume_score: results = st.session_state.resume_score.get('results', {}) # Safely get nested data summary = results.get('overall_summary', 'No summary provided.') overall_score = calculate_resume_overall_score(results) exp = results.get('experience', {}) edu = results.get('education', {}) skills = results.get('skills', {}) st.markdown(f"### Overall Score: {overall_score}/100") st.markdown("---") st.subheader("Score Breakdown") score_col1, score_col2, score_col3 = st.columns(3) score_col1.metric(label="Experience Score", value=f"{exp.get('score', 'N/A')}") score_col2.metric(label="Education Score", value=f"{edu.get('score', 'N/A')}") score_col3.metric(label="Skills Match", value=f"{skills.get('score', 'N/A')}") st.subheader("Overall Summary") st.write(summary) st.subheader("Detailed Analysis") with st.expander("**Experience Analysis**"): st.write(exp.get('justification', 'No justification provided.')) with st.expander("**Education Analysis**"): st.write(edu.get('justification', 'No justification provided.')) with st.expander("**Skills Match Analysis**"): st.write(skills.get('justification', 'No justification provided.')) else: # Default view shown on page load or when no analysis has been run # st.info("Upload a CV and provide a Job Description in the sidebar to start the analysis.") st.markdown("---") # st.subheader("Score Summary") # score_col1, score_col2, score_col3 = st.columns(3) # score_col1.metric(label="Experience Score", value="N/A") # score_col2.metric(label="Education Score", value="N/A") # score_col3.metric(label="Skills Match", value="N/A")