import os import random from crewai import Agent, Task, Crew, Process, LLM from crewai_tools import SerperDevTool from crewai.tools import BaseTool import asyncio from mcp import ClientSession from mcp.client.sse import sse_client # Define the LLM with higher temperature for variety def get_llm(api_key): return LLM( model="gemini/gemini-2.5-flash", api_key=api_key, temperature=0.9 # Higher temperature for more creative/varied questions ) class InterviewAgents: def __init__(self, api_key): self.llm = get_llm(api_key) self.serper_tool = SerperDevTool() def technical_interviewer(self): return Agent( role='Technical Interviewer', goal='Analyze the Job Description and CV to generate relevant technical interview questions.', backstory='You are an expert technical recruiter with years of experience in assessing candidate skills against job requirements. You focus on hard skills and technical proficiency.', llm=self.llm, verbose=True ) def personality_interviewer(self): return Agent( role='Personality & Culture Fit Specialist', goal='Generate behavioral and personality-based interview questions using online resources to ensure best practices.', backstory='You are an organizational psychologist specializing in culture fit and soft skills. You use data-driven approaches and current trends to ask meaningful behavioral questions.', tools=[self.serper_tool], llm=self.llm, verbose=True ) def interview_director(self): return Agent( role='Interview Director', goal='Compile the final interview plan and system instructions.', backstory='You are the Lead Interviewer. You oversee the process and ensure a balanced interview. You combine inputs from technical and personality specialists to create a cohesive interview script.', llm=self.llm, reasoning=True, memory=True, verbose=True ) class InterviewTasks: def __init__(self, jd_text, cv_text, num_questions): self.jd_text = jd_text self.cv_text = cv_text self.num_questions = num_questions self.n_tech = max(1, round(num_questions * 0.8)) self.n_psych = max(1, num_questions - self.n_tech) def generate_technical_questions(self, agent): # Add randomization for variety seed = random.randint(1000, 9999) return Task( description=f""" Analyze the following Job Description (JD) and Curriculum Vitae (CV). JD: {self.jd_text[:2000]}... CV: {self.cv_text[:2000]}... IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_tech} UNIQUE technical interview questions. - Each question should be DIFFERENT from common interview questions - Focus on specific skills mentioned in the JD - Ask about practical scenarios or real-world applications - Keep questions VERY SHORT (max 15 words) for voice conversation - Make questions open-ended to encourage discussion - Vary question types: scenario-based, problem-solving, experience-based Example formats: - "How would you handle [specific technical scenario]?" - "Describe your experience with [technology]." - "What's your approach to [technical challenge]?" """, expected_output=f"A list of {self.n_tech} unique, concise technical questions (max 15 words each).", agent=agent ) def generate_personality_questions(self, agent): # Add randomization for variety seed = random.randint(1000, 9999) return Task( description=f""" Analyze the JD and CV to understand the company culture and required soft skills. JD: {self.jd_text[:2000]}... IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_psych} UNIQUE behavioral/personality questions. - Use the Serper tool to find CURRENT, trending behavioral interview questions - Avoid cliché questions like "What's your greatest weakness?" - Focus on real scenarios and past experiences - Keep questions VERY SHORT (max 15 words) for voice conversation - Make questions conversational and natural Example formats: - "Tell me about a time you faced [specific challenge]." - "How do you handle [workplace situation]?" - "Describe a situation where you [behavioral trait]." """, expected_output=f"A list of {self.n_psych} unique, concise behavioral questions (max 15 words each).", agent=agent ) def compile_interview(self, agent, tech_task, psych_task): return Task( description=f""" Compile the final interview plan from the technical and personality questions. CRITICAL REQUIREMENTS: 1. QUESTIONS LIST: - Combine all questions into a single numbered list - Total must be exactly {self.num_questions} questions - Mix: ~80% technical, ~20% behavioral 2. SYSTEM INSTRUCTION (MUST BE CONCISE AND CONVERSATIONAL): Create a SHORT, natural system prompt for a voice AI interviewer. **IMPORTANT: The system instruction MUST include the complete list of questions to ask.** Format the system instruction like this: "You are Alex, a friendly professional interviewer conducting a voice interview. Start with as soon as the connection is established 'Hi! I'm Alex. Let's begin with the first question. YOUR QUESTIONS (ask these in order): 1. [First question] 2. [Second question] 3. [Third question] ... [all questions] CONVERSATION RULES: - Ask ONE question at a time and WAIT for the complete answer - Keep responses SHORT (1-2 sentences max) - If interrupted, STOP talking immediately and listen - After each answer, briefly acknowledge (e.g., 'Great!', 'I see', 'Thanks') then ask the next question - Use a warm, conversational tone - End with: 'Thanks for your time today!' Remember: Listen actively, don't interrupt, and keep it conversational." Output Format (JSON): {{ "questions_markdown": "# Interview Questions\\n\\n1. [Question 1]\\n2. [Question 2]...", "system_instruction": "[Complete system instruction with embedded questions list as shown above]" }} """, expected_output="A JSON object with 'questions_markdown' (formatted list) and 'system_instruction' (concise prompt with embedded questions, under 300 words).", agent=agent, context=[tech_task, psych_task] ) def run_interview_crew(jd_text, cv_text, num_questions, api_key): agents = InterviewAgents(api_key) tasks = InterviewTasks(jd_text, cv_text, num_questions) tech_agent = agents.technical_interviewer() psych_agent = agents.personality_interviewer() director_agent = agents.interview_director() tech_task = tasks.generate_technical_questions(tech_agent) psych_task = tasks.generate_personality_questions(psych_agent) compile_task = tasks.compile_interview(director_agent, tech_task, psych_task) crew = Crew( agents=[tech_agent, psych_agent, director_agent], tasks=[tech_task, psych_task, compile_task], process=Process.sequential, verbose=True ) result = crew.kickoff() return result # --- Custom Tools --- class SentimentAnalysisTool(BaseTool): name: str = "Sentiment Analysis Tool" description: str = "Analyzes the sentiment of a given text. Returns 'Positive', 'Negative', or 'Neutral'. Use this to gauge the candidate's attitude." def _run(self, text: str) -> str: async def call_mcp(text_input): sse_url = "https://uq-sentimentanalysismcpserver.hf.space/gradio_api/mcp/sse" try: async with sse_client(sse_url) as (read, write): async with ClientSession(read, write) as session: await session.initialize() result = await session.call_tool( "SentimentAnalysisMCPserver_predict_sentiment", arguments={"text": text_input} ) if result.content and len(result.content) > 0: return result.content[0].text return "Error: No content returned" except Exception as e: return f"Error connecting to MCP: {str(e)}" try: # Create a new event loop if one doesn't exist, or use the existing one if compatible # Since this is running in a thread (via asyncio.to_thread in app.py), # we should be able to use asyncio.run() if no loop is running in this thread. # However, to be safe with nested loops or existing loops: try: loop = asyncio.get_event_loop() if loop.is_running(): # This is tricky if we are in a sync method called from an async context/loop. # But app.py uses asyncio.to_thread, which runs in a separate thread. # That thread likely doesn't have a running loop unless we started one. # asyncio.to_thread runs in a ThreadPoolExecutor. return loop.run_until_complete(call_mcp(text)) else: return loop.run_until_complete(call_mcp(text)) except RuntimeError: return asyncio.run(call_mcp(text)) except Exception as e: return f"Error analyzing sentiment: {str(e)}" # --- Evaluation Agents --- def get_evaluation_llm(api_key): return LLM( model="gemini/gemini-2.5-flash", api_key=api_key, temperature=0.7 # Lower temperature for more consistent evaluation ) class EvaluationAgents: def __init__(self, api_key): self.llm = get_evaluation_llm(api_key) self.sentiment_tool = SentimentAnalysisTool() def technical_evaluator(self): return Agent( role='Technical Skills Evaluator', goal='Evaluate the candidate\'s technical skills and knowledge based on their interview responses.', backstory='You are an expert technical recruiter with deep knowledge in assessing technical competencies. You analyze answers for depth, accuracy, and practical application of skills.', llm=self.llm, verbose=True ) def behavioral_evaluator(self): return Agent( role='Behavioral & Culture Fit Evaluator', goal='Assess the candidate\'s soft skills, communication, and cultural fit based on behavioral questions.', backstory='You are an organizational psychologist specializing in evaluating interpersonal skills, problem-solving approaches, and alignment with company culture. You look for evidence of leadership, teamwork, and adaptability.', tools=[self.sentiment_tool], llm=self.llm, verbose=True ) def evaluation_director(self): return Agent( role='Evaluation Director', goal='Compile a comprehensive scorecard with scores, feedback, and hiring recommendation.', backstory='You are the Lead Evaluator responsible for synthesizing all evaluation inputs into a clear, actionable scorecard. You ensure fairness and consistency in scoring.', llm=self.llm, reasoning=True, memory=True, verbose=True ) class EvaluationTasks: def __init__(self, transcript, jd_text, questions_text): self.transcript = transcript self.jd_text = jd_text self.questions_text = questions_text def evaluate_technical_skills(self, agent): return Task( description=f""" Evaluate the candidate's technical performance based on the interview transcript. TRANSCRIPT: {self.transcript[:3000]} JOB DESCRIPTION: {self.jd_text[:2000]} INTERVIEW QUESTIONS: {self.questions_text[:2000]} TASK: **CRITICAL: ONLY evaluate TECHNICAL questions. Identify which questions are technical (related to hard skills, technologies, tools, programming, systems, etc.) and ONLY score those.** For EACH technical question identified, provide: 1. The exact question text 2. Technical competency score (0-10) 3. Detailed feedback on technical knowledge, problem-solving approach, and depth of understanding Also provide: 4. Overall technical strengths 5. Overall technical weaknesses 6. Alignment with job requirements Focus on: - Accuracy and correctness of technical answers - Depth of knowledge demonstrated - Problem-solving methodology - Practical application of skills - Communication of technical concepts **DO NOT evaluate behavioral, personality, or soft skills questions. Only technical questions.** """, expected_output="A detailed technical evaluation with scores (0-10) and feedback for EACH TECHNICAL QUESTION ONLY. Format: For each technical question, provide: Question | Score (0-10) | Feedback. Plus overall technical strengths and weaknesses.", agent=agent ) def evaluate_behavioral_skills(self, agent): return Task( description=f""" Evaluate the candidate's behavioral and soft skills based on the interview transcript. TRANSCRIPT: {self.transcript[:3000]} JOB DESCRIPTION: {self.jd_text[:2000]} INTERVIEW QUESTIONS: {self.questions_text[:2000]} TASK: **CRITICAL: ONLY evaluate BEHAVIORAL/SOFT SKILLS questions. Identify which questions are behavioral (related to past experiences, teamwork, leadership, culture fit, problem-solving scenarios, etc.) and ONLY score those.** For EACH behavioral question identified, provide: 1. The exact question text 2. Behavioral competency score (0-10) 3. Sentiment Analysis: Use the 'Sentiment Analysis Tool' to analyze the candidate's answer. Include the result (Positive/Negative/Neutral) in your evaluation. 4. Detailed feedback on communication, examples shared, and soft skills demonstrated Also provide: 4. Assessment of communication skills, leadership, teamwork, and adaptability 5. Cultural fit evaluation 6. Examples of demonstrated soft skills Focus on: - Quality of examples and stories shared - Problem-solving approach in real situations - Interpersonal skills and communication clarity - Alignment with company values and culture - Emotional intelligence and self-awareness **DO NOT evaluate technical, programming, or hard skills questions. Only behavioral/soft skills questions.** """, expected_output="A detailed behavioral evaluation with scores (0-10) and feedback for EACH BEHAVIORAL QUESTION ONLY. Format: For each behavioral question, provide: Question | Score (0-10) | Feedback. Plus overall soft skills assessment and culture fit analysis.", agent=agent ) def compile_scorecard(self, agent, tech_task, behavioral_task): return Task( description=f""" Compile a comprehensive interview scorecard from technical and behavioral evaluations. You have received evaluations from: 1. Technical Evaluator - evaluated ONLY technical questions 2. Behavioral Evaluator - evaluated ONLY behavioral questions CRITICAL REQUIREMENTS: 1. SUMMARY: - Brief overview of candidate performance (2-3 sentences) 2. SCORECARD TABLE: - Create a markdown table with columns: Question | Category | Score (0-10) | Feedback - **CRITICAL: Each question must appear EXACTLY ONCE in the table - NO DUPLICATES** - Merge the two evaluations: take technical questions from Technical Evaluator's output, behavioral questions from Behavioral Evaluator's output - For each technical question: Use the exact question text, Category = "Technical", and the score/feedback from Technical Evaluator - For each behavioral question: Use the exact question text, Category = "Behavioral", and the score/feedback from Behavioral Evaluator - If a question appears in both evaluations, that's an error - each question should only be in one category - List all questions in the order they appear in the interview 3. OVERALL SCORES: - Average Technical Score - Average Behavioral Score - Overall Score 4. STRENGTHS: - List 3-5 key strengths demonstrated 5. AREAS FOR IMPROVEMENT: - List 2-4 areas where the candidate could improve 6. FINAL DECISION: - One of: "Strong Hire", "Hire", "No Hire" - Brief justification (1-2 sentences) Output Format (Markdown): # Interview Scorecard ## Summary [Brief overview] ## Scorecard | Question | Category | Score | Feedback | |----------|----------|-------|----------| | [Q1] | Technical | X/10 | [Feedback] | ... ## Overall Scores - **Technical Average**: X/10 - **Behavioral Average**: X/10 - **Overall Score**: X/10 ## Strengths 1. [Strength 1] 2. [Strength 2] ... ## Areas for Improvement 1. [Area 1] 2. [Area 2] ... ## Final Decision **Decision**: [Strong Hire/Hire/No Hire] [Justification] """, expected_output="A comprehensive markdown scorecard with summary, detailed table, scores, strengths, weaknesses, and hiring recommendation.", agent=agent, context=[tech_task, behavioral_task] ) def run_evaluation_crew(transcript, jd_text, questions_text, api_key): """Run CrewAI evaluation crew to generate scorecard""" agents = EvaluationAgents(api_key) tasks = EvaluationTasks(transcript, jd_text, questions_text) tech_evaluator = agents.technical_evaluator() behavioral_evaluator = agents.behavioral_evaluator() director = agents.evaluation_director() tech_task = tasks.evaluate_technical_skills(tech_evaluator) behavioral_task = tasks.evaluate_behavioral_skills(behavioral_evaluator) compile_task = tasks.compile_scorecard(director, tech_task, behavioral_task) crew = Crew( agents=[tech_evaluator, behavioral_evaluator, director], tasks=[tech_task, behavioral_task, compile_task], process=Process.sequential, verbose=True ) result = crew.kickoff() return result