Spaces:
Runtime error
Runtime error
| # gemini_integration.py | |
| import os | |
| import time | |
| import base64 | |
| import json | |
| from typing import List, Dict, Any, Optional, Union | |
| # Fix: Replace incorrect imports with the proper Google Generative AI imports | |
| import google.generativeai as genai | |
| from google.generativeai import types | |
| class GeminiClient: | |
| """Enhanced Gemini client for the GitHub AI Agent""" | |
| def __init__(self, api_key: str, model: str = "gemini-2.0-pro-exp-02-05"): | |
| """ | |
| Initialize the Gemini client | |
| Args: | |
| api_key: Gemini API key | |
| model: Gemini model to use (default: gemini-1.5-pro) | |
| """ | |
| self.api_key = api_key | |
| self.model_name = model | |
| genai.configure(api_key=api_key) # Configure API key | |
| self.model = genai.GenerativeModel(model) # Use GenerativeModel | |
| self.chat_history = [] | |
| self.config = types.GenerationConfig( | |
| temperature=0.2, | |
| top_p=0.95, | |
| top_k=64, | |
| max_output_tokens=8192, | |
| # response_mime_type="text/plain", # No longer needed. | |
| ) | |
| def analyze_repository(self, repo_data: Dict, file_contents: Dict, | |
| commit_history: List[Dict], contributors: Dict, insights: Dict) -> Dict: | |
| """ | |
| Analyze a GitHub repository with Gemini | |
| Args: | |
| repo_data: Repository metadata | |
| file_contents: Contents of key files | |
| commit_history: Repository commit history | |
| contributors: Contributors data | |
| insights: Extracted repository insights | |
| Returns: | |
| Dictionary containing the analysis and metadata | |
| """ | |
| prompt = self._build_repo_analysis_prompt(repo_data, file_contents, commit_history, insights) | |
| try: | |
| response = self.model.generate_content(prompt, generation_config = self.config) | |
| return { | |
| 'analysis': response.text, | |
| 'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") | |
| } | |
| except Exception as e: | |
| return { | |
| 'analysis': f"Error analyzing repository: {str(e)}", | |
| 'timestamp': time.strftime("%Y-%m-%d %H:%M:%S") | |
| } | |
| def answer_query(self, query: str, repo_context: Dict, related_files: List[Dict], insights: Dict = None) -> str: | |
| """ | |
| Answer natural language queries about the repository | |
| Args: | |
| query: User's natural language query | |
| repo_context: Repository context information | |
| related_files: List of files relevant to the query | |
| insights: Repository insights (optional) | |
| Returns: | |
| Gemini's response to the query | |
| """ | |
| prompt = self._build_query_prompt(query, repo_context, related_files) | |
| # Save query to history | |
| self.chat_history.append({"role": "user", "parts": [query]}) | |
| try: | |
| response = self.model.generate_content(prompt, generation_config = self.config) | |
| answer = response.text | |
| # Save response to history | |
| self.chat_history.append({"role": "model", "parts": [answer]}) | |
| return answer | |
| except Exception as e: | |
| return f"Error answering query: {str(e)}" | |
| def analyze_code_snippet(self, code: str, language: str = "") -> str: | |
| """ | |
| Analyze a code snippet with Gemini | |
| Args: | |
| code: Code snippet to analyze | |
| language: Programming language (optional) | |
| Returns: | |
| Code analysis | |
| """ | |
| # Build prompt for code analysis | |
| lang_info = f"Language: {language}\n" if language else "" | |
| prompt = f""" | |
| You are an expert code analyzer. Please analyze the following code snippet: | |
| {lang_info} | |
| ```{language} | |
| {code} | |
| ``` | |
| Please provide a detailed analysis covering: | |
| 1. What does this code do? Provide a clear and concise overview. | |
| 2. Key components and functions/methods and their purpose. | |
| 3. Architecture and design patterns used (if any). | |
| 4. Potential issues, bugs, or code smells. | |
| 5. Suggestions for improvements or best practices. | |
| Focus on providing meaningful insights rather than just describing the code line by line. | |
| """ | |
| try: | |
| response = self.model.generate_content(prompt, generation_config = self.config) | |
| return response.text | |
| except Exception as e: | |
| return f"Error analyzing code: {str(e)}" | |
| def identify_potential_collaborators(self, contributors: Dict, insights: Dict, requirements: str) -> List[Dict]: | |
| """ | |
| Find potential collaborators based on requirements | |
| Args: | |
| contributors: Repository contributors data | |
| insights: Repository insights | |
| requirements: User requirements for collaborators | |
| Returns: | |
| List of potential collaborators with reasoning | |
| """ | |
| # Build prompt for collaborator search | |
| prompt = self._build_collaborator_prompt(contributors, requirements, insights) | |
| try: | |
| response = self.model.generate_content(prompt, generation_config = self.config) | |
| # Try to parse JSON response | |
| try: | |
| result = json.loads(response.text) | |
| return result.get('recommended_collaborators', []) | |
| except json.JSONDecodeError: | |
| # If JSON parsing fails, return error | |
| return [{ | |
| 'login': 'error', | |
| 'reasons': ['Failed to parse response from Gemini. Please try again.'], | |
| 'confidence': 0 | |
| }] | |
| except Exception as e: | |
| return [{ | |
| 'login': 'error', | |
| 'reasons': [f"Error finding collaborators: {str(e)}"], | |
| 'confidence': 0 | |
| }] | |
| def _build_repo_analysis_prompt(self, repo_data: Dict, file_contents: Dict, | |
| commit_history: List[Dict], insights: Dict) -> str: | |
| """Build a comprehensive prompt for repository analysis""" | |
| # Basic repository information | |
| basic_info = f""" | |
| # GitHub Repository Analysis | |
| ## Basic Information | |
| Repository: {repo_data.get('full_name', 'Unknown')} | |
| Description: {repo_data.get('description', 'No description provided')} | |
| Primary Language: {repo_data.get('language', 'Unknown')} | |
| Stars: {repo_data.get('stars', 0)} | |
| Forks: {repo_data.get('forks', 0)} | |
| Open Issues: {repo_data.get('open_issues', 0)} | |
| Created: {repo_data.get('created_at', 'Unknown')} | |
| Last Updated: {repo_data.get('updated_at', 'Unknown')} | |
| License: {repo_data.get('license', 'Unknown')} | |
| Topics: {', '.join(repo_data.get('topics', ['None']))} | |
| """ | |
| # File structure information | |
| file_structure = "\n## File Structure\n" | |
| for path in list(file_contents.keys())[:20]: # Limit to 20 files to avoid token limit | |
| file_structure += f"- {path}\n" | |
| if len(file_contents) > 20: | |
| file_structure += f"- ... and {len(file_contents) - 20} more files\n" | |
| # Key files content | |
| key_files = "\n## Key Files Content\n" | |
| priority_files = [f for f in file_contents.keys() if 'readme' in f.lower() or 'main' in f.lower()] | |
| for path in priority_files[:3]: # Limit to 3 key files | |
| key_files += f"\n### {path}\n" | |
| content = file_contents[path]['content'][:3000] # Limit content to 3000 characters | |
| key_files += f"```\n{content}\n```\n" | |
| if len(file_contents[path]['content']) > 3000: | |
| key_files += "(content truncated for brevity)\n" | |
| # Commit history summary | |
| commit_summary = "\n## Commit History Summary\n" | |
| commit_summary += f"Total Commits Analyzed: {len(commit_history)}\n" | |
| if commit_history: | |
| commit_summary += "\nRecent Commits:\n" | |
| for commit in commit_history[:5]: # Show 5 most recent commits | |
| commit_summary += f"- {commit.get('date', 'Unknown')}: {commit.get('message', 'No message')} by {commit.get('author', 'Unknown')}\n" | |
| # Insights summary | |
| insights_summary = "\n## Insights Summary\n" | |
| insights_json = json.dumps(insights, indent=2, default=str) | |
| insights_summary += f"```json\n{insights_json}\n```\n" | |
| # Analysis instructions | |
| instructions = """ | |
| ## Analysis Instructions | |
| As an expert in software development and repository analysis, please provide a comprehensive analysis of this GitHub repository. Your analysis should include: | |
| 1. **Purpose and Overview**: What is this project about? What problem does it solve? | |
| 2. **Architecture and Design**: Analyze the code organization, architectural patterns, and overall design. Identify the key components and how they interact. | |
| 3. **Code Quality Assessment**: Evaluate the code quality, readability, and maintainability. Are there any best practices or patterns employed? | |
| 4. **Development Activity**: Analyze the development patterns, collaboration, and project health based on commit history and contributor information. | |
| 5. **Strengths and Areas for Improvement**: Identify the strengths of this repository and suggest areas that could be improved. | |
| 6. **Summary**: Provide a concise summary of your analysis. | |
| Base your analysis entirely on the information provided above. Be specific and reference actual files, code patterns, or commit information when possible. | |
| """ | |
| # Combine all sections | |
| prompt = basic_info + file_structure + key_files + commit_summary + insights_summary + instructions | |
| return prompt | |
| def _build_query_prompt(self, query: str, repo_context: Dict, related_files: List[Dict]) -> str: | |
| """Build a prompt for answering natural language queries""" | |
| # Basic repository context | |
| basic_context = f""" | |
| Repository: {repo_context.get('full_name', 'Unknown')} | |
| Description: {repo_context.get('description', 'No description provided')} | |
| Primary Language: {repo_context.get('language', 'Unknown')} | |
| """ | |
| # Relevant files context | |
| files_context = "\n## Relevant Files\n" | |
| for file_data in related_files[:3]: # Limit to 3 most relevant files | |
| files_context += f"\n### {file_data['file']}\n" | |
| files_context += f"```\n{file_data['content'][:2000]}\n```\n" | |
| if len(file_data['content']) > 2000: | |
| files_context += "(content truncated for brevity)\n" | |
| # History context | |
| history_context = "\n## Conversation History\n" | |
| recent_history = self.chat_history[-6:] # Get last 3 exchanges (6 messages) | |
| for i, message in enumerate(recent_history): | |
| role = "User" if message["role"] == "user" else "Assistant" | |
| history_context += f"{role}: {message['parts'][0]}\n\n" | |
| # Instructions | |
| instructions = f""" | |
| ## Query Instructions | |
| You are an AI assistant specialized in understanding GitHub repositories. The user has asked: | |
| "{query}" | |
| Please answer this question clearly and concisely based ONLY on the information provided about the repository. | |
| If you cannot answer the question based on the available information, say so clearly. | |
| Reference specific files, code, or other relevant details in your answer whenever possible. | |
| """ | |
| # Combine all sections | |
| prompt = basic_context + files_context + history_context + instructions | |
| return prompt | |
| def _build_collaborator_prompt(self, contributors: Dict, requirements: str, insights: Dict) -> str: | |
| """Build a prompt for finding potential collaborators""" | |
| # Prepare contributor data | |
| contributor_data = [] | |
| for login, data in contributors.items(): | |
| contributor_info = { | |
| 'login': login, | |
| 'contributions': data.get('contributions', 0), | |
| 'files_modified': data.get('files_modified', [])[:10], # Top 10 files | |
| 'commit_messages': data.get('commit_messages', [])[:5], # Sample 5 messages | |
| 'activity_dates': [str(d) for d in data.get('activity_dates', [])[-10:]] # Recent 10 dates | |
| } | |
| contributor_data.append(contributor_info) | |
| # Build prompt | |
| prompt = f""" | |
| # Collaborator Identification | |
| ## Requirements | |
| {requirements} | |
| ## Contributor Data | |
| ```json | |
| {json.dumps(contributor_data, indent=2, default=str)[:5000]} | |
| ``` | |
| ## Repository Insights | |
| ```json | |
| {json.dumps(insights, indent=2, default=str)[:3000]} | |
| ``` | |
| ## Instructions | |
| You are an AI assistant specialized in GitHub repository analysis. Based on the contributor data and repository insights provided, identify the most suitable collaborators that match the requirements. | |
| For each recommended collaborator, explain why they would be a good fit based on: | |
| 1. Their contribution patterns and activity | |
| 2. The specific files and areas they've worked on | |
| 3. Their apparent expertise and skills | |
| 4. How well they match the stated requirements | |
| Provide a confidence score (0.0 to 1.0) for each recommendation based on how well they match the requirements. | |
| Format your response as a JSON object with the following structure: | |
| {{ | |
| "recommended_collaborators": [ | |
| {{ | |
| "login": "username", | |
| "reasons": ["reason1", "reason2"], | |
| "confidence": 0.85 | |
| }} | |
| ] | |
| }} | |
| Provide no more than 3 recommendations, focusing on quality matches rather than quantity. | |
| """ | |
| return prompt | |
| def generate_prompt(self, prompt: str) -> str: | |
| """ | |
| Generate a response to a custom prompt using Gemini | |
| Args: | |
| prompt: Custom prompt for Gemini | |
| Returns: | |
| Gemini's response | |
| """ | |
| try: | |
| response = self.model.generate_content(prompt, generation_config = self.config) | |
| return response.text | |
| except Exception as e: | |
| return f"Error generating response: {str(e)}" |