Spaces:

nihalaninihal
/

Gitu

Runtime error

App Files Files Community

Gitu / gemini_integration.py

nihalaninihal

Update gemini_integration.py

276f458 verified 9 months ago

raw

history blame contribute delete

14.5 kB

	# gemini_integration.py

	import os
	import time
	import base64
	import json
	from typing import List, Dict, Any, Optional, Union

	# Fix: Replace incorrect imports with the proper Google Generative AI imports
	import google.generativeai as genai
	from google.generativeai import types

	class GeminiClient:
	"""Enhanced Gemini client for the GitHub AI Agent"""

	def __init__(self, api_key: str, model: str = "gemini-2.0-pro-exp-02-05"):
	"""
	Initialize the Gemini client

	Args:
	api_key: Gemini API key
	model: Gemini model to use (default: gemini-1.5-pro)
	"""
	self.api_key = api_key
	self.model_name = model
	genai.configure(api_key=api_key) # Configure API key
	self.model = genai.GenerativeModel(model) # Use GenerativeModel
	self.chat_history = []
	self.config = types.GenerationConfig(
	temperature=0.2,
	top_p=0.95,
	top_k=64,
	max_output_tokens=8192,
	# response_mime_type="text/plain", # No longer needed.
	)

	def analyze_repository(self, repo_data: Dict, file_contents: Dict,
	commit_history: List[Dict], contributors: Dict, insights: Dict) -> Dict:
	"""
	Analyze a GitHub repository with Gemini

	Args:
	repo_data: Repository metadata
	file_contents: Contents of key files
	commit_history: Repository commit history
	contributors: Contributors data
	insights: Extracted repository insights

	Returns:
	Dictionary containing the analysis and metadata
	"""
	prompt = self._build_repo_analysis_prompt(repo_data, file_contents, commit_history, insights)

	try:
	response = self.model.generate_content(prompt, generation_config = self.config)
	return {
	'analysis': response.text,
	'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
	}
	except Exception as e:
	return {
	'analysis': f"Error analyzing repository: {str(e)}",
	'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
	}

	def answer_query(self, query: str, repo_context: Dict, related_files: List[Dict], insights: Dict = None) -> str:
	"""
	Answer natural language queries about the repository

	Args:
	query: User's natural language query
	repo_context: Repository context information
	related_files: List of files relevant to the query
	insights: Repository insights (optional)

	Returns:
	Gemini's response to the query
	"""
	prompt = self._build_query_prompt(query, repo_context, related_files)

	# Save query to history
	self.chat_history.append({"role": "user", "parts": [query]})

	try:
	response = self.model.generate_content(prompt, generation_config = self.config)
	answer = response.text

	# Save response to history
	self.chat_history.append({"role": "model", "parts": [answer]})

	return answer
	except Exception as e:
	return f"Error answering query: {str(e)}"

	def analyze_code_snippet(self, code: str, language: str = "") -> str:
	"""
	Analyze a code snippet with Gemini

	Args:
	code: Code snippet to analyze
	language: Programming language (optional)

	Returns:
	Code analysis
	"""
	# Build prompt for code analysis
	lang_info = f"Language: {language}\n" if language else ""

	prompt = f"""
	You are an expert code analyzer. Please analyze the following code snippet:

	{lang_info}
	```{language}
	{code}
	```

	Please provide a detailed analysis covering:
	1. What does this code do? Provide a clear and concise overview.
	2. Key components and functions/methods and their purpose.
	3. Architecture and design patterns used (if any).
	4. Potential issues, bugs, or code smells.
	5. Suggestions for improvements or best practices.

	Focus on providing meaningful insights rather than just describing the code line by line.
	"""

	try:
	response = self.model.generate_content(prompt, generation_config = self.config)
	return response.text
	except Exception as e:
	return f"Error analyzing code: {str(e)}"

	def identify_potential_collaborators(self, contributors: Dict, insights: Dict, requirements: str) -> List[Dict]:
	"""
	Find potential collaborators based on requirements

	Args:
	contributors: Repository contributors data
	insights: Repository insights
	requirements: User requirements for collaborators

	Returns:
	List of potential collaborators with reasoning
	"""
	# Build prompt for collaborator search
	prompt = self._build_collaborator_prompt(contributors, requirements, insights)

	try:
	response = self.model.generate_content(prompt, generation_config = self.config)

	# Try to parse JSON response
	try:
	result = json.loads(response.text)
	return result.get('recommended_collaborators', [])
	except json.JSONDecodeError:
	# If JSON parsing fails, return error
	return [{
	'login': 'error',
	'reasons': ['Failed to parse response from Gemini. Please try again.'],
	'confidence': 0
	}]
	except Exception as e:
	return [{
	'login': 'error',
	'reasons': [f"Error finding collaborators: {str(e)}"],
	'confidence': 0
	}]

	def _build_repo_analysis_prompt(self, repo_data: Dict, file_contents: Dict,
	commit_history: List[Dict], insights: Dict) -> str:
	"""Build a comprehensive prompt for repository analysis"""
	# Basic repository information
	basic_info = f"""
	# GitHub Repository Analysis

	## Basic Information
	Repository: {repo_data.get('full_name', 'Unknown')}
	Description: {repo_data.get('description', 'No description provided')}
	Primary Language: {repo_data.get('language', 'Unknown')}
	Stars: {repo_data.get('stars', 0)}
	Forks: {repo_data.get('forks', 0)}
	Open Issues: {repo_data.get('open_issues', 0)}
	Created: {repo_data.get('created_at', 'Unknown')}
	Last Updated: {repo_data.get('updated_at', 'Unknown')}
	License: {repo_data.get('license', 'Unknown')}
	Topics: {', '.join(repo_data.get('topics', ['None']))}
	"""

	# File structure information
	file_structure = "\n## File Structure\n"
	for path in list(file_contents.keys())[:20]: # Limit to 20 files to avoid token limit
	file_structure += f"- {path}\n"
	if len(file_contents) > 20:
	file_structure += f"- ... and {len(file_contents) - 20} more files\n"

	# Key files content
	key_files = "\n## Key Files Content\n"
	priority_files = [f for f in file_contents.keys() if 'readme' in f.lower() or 'main' in f.lower()]
	for path in priority_files[:3]: # Limit to 3 key files
	key_files += f"\n### {path}\n"
	content = file_contents[path]['content'][:3000] # Limit content to 3000 characters
	key_files += f"```\n{content}\n```\n"
	if len(file_contents[path]['content']) > 3000:
	key_files += "(content truncated for brevity)\n"

	# Commit history summary
	commit_summary = "\n## Commit History Summary\n"
	commit_summary += f"Total Commits Analyzed: {len(commit_history)}\n"
	if commit_history:
	commit_summary += "\nRecent Commits:\n"
	for commit in commit_history[:5]: # Show 5 most recent commits
	commit_summary += f"- {commit.get('date', 'Unknown')}: {commit.get('message', 'No message')} by {commit.get('author', 'Unknown')}\n"

	# Insights summary
	insights_summary = "\n## Insights Summary\n"
	insights_json = json.dumps(insights, indent=2, default=str)
	insights_summary += f"```json\n{insights_json}\n```\n"

	# Analysis instructions
	instructions = """
	## Analysis Instructions

	As an expert in software development and repository analysis, please provide a comprehensive analysis of this GitHub repository. Your analysis should include:

	1. Purpose and Overview: What is this project about? What problem does it solve?

	2. Architecture and Design: Analyze the code organization, architectural patterns, and overall design. Identify the key components and how they interact.

	3. Code Quality Assessment: Evaluate the code quality, readability, and maintainability. Are there any best practices or patterns employed?

	4. Development Activity: Analyze the development patterns, collaboration, and project health based on commit history and contributor information.

	5. Strengths and Areas for Improvement: Identify the strengths of this repository and suggest areas that could be improved.

	6. Summary: Provide a concise summary of your analysis.

	Base your analysis entirely on the information provided above. Be specific and reference actual files, code patterns, or commit information when possible.
	"""

	# Combine all sections
	prompt = basic_info + file_structure + key_files + commit_summary + insights_summary + instructions

	return prompt

	def _build_query_prompt(self, query: str, repo_context: Dict, related_files: List[Dict]) -> str:
	"""Build a prompt for answering natural language queries"""
	# Basic repository context
	basic_context = f"""
	Repository: {repo_context.get('full_name', 'Unknown')}
	Description: {repo_context.get('description', 'No description provided')}
	Primary Language: {repo_context.get('language', 'Unknown')}
	"""

	# Relevant files context
	files_context = "\n## Relevant Files\n"
	for file_data in related_files[:3]: # Limit to 3 most relevant files
	files_context += f"\n### {file_data['file']}\n"
	files_context += f"```\n{file_data['content'][:2000]}\n```\n"
	if len(file_data['content']) > 2000:
	files_context += "(content truncated for brevity)\n"

	# History context
	history_context = "\n## Conversation History\n"
	recent_history = self.chat_history[-6:] # Get last 3 exchanges (6 messages)
	for i, message in enumerate(recent_history):
	role = "User" if message["role"] == "user" else "Assistant"
	history_context += f"{role}: {message['parts'][0]}\n\n"

	# Instructions
	instructions = f"""
	## Query Instructions

	You are an AI assistant specialized in understanding GitHub repositories. The user has asked:

	"{query}"

	Please answer this question clearly and concisely based ONLY on the information provided about the repository.
	If you cannot answer the question based on the available information, say so clearly.
	Reference specific files, code, or other relevant details in your answer whenever possible.
	"""

	# Combine all sections
	prompt = basic_context + files_context + history_context + instructions

	return prompt

	def _build_collaborator_prompt(self, contributors: Dict, requirements: str, insights: Dict) -> str:
	"""Build a prompt for finding potential collaborators"""
	# Prepare contributor data
	contributor_data = []
	for login, data in contributors.items():
	contributor_info = {
	'login': login,
	'contributions': data.get('contributions', 0),
	'files_modified': data.get('files_modified', [])[:10], # Top 10 files
	'commit_messages': data.get('commit_messages', [])[:5], # Sample 5 messages
	'activity_dates': [str(d) for d in data.get('activity_dates', [])[-10:]] # Recent 10 dates
	}
	contributor_data.append(contributor_info)

	# Build prompt
	prompt = f"""
	# Collaborator Identification

	## Requirements
	{requirements}

	## Contributor Data
	```json
	{json.dumps(contributor_data, indent=2, default=str)[:5000]}
	```

	## Repository Insights
	```json
	{json.dumps(insights, indent=2, default=str)[:3000]}
	```

	## Instructions
	You are an AI assistant specialized in GitHub repository analysis. Based on the contributor data and repository insights provided, identify the most suitable collaborators that match the requirements.

	For each recommended collaborator, explain why they would be a good fit based on:
	1. Their contribution patterns and activity
	2. The specific files and areas they've worked on
	3. Their apparent expertise and skills
	4. How well they match the stated requirements

	Provide a confidence score (0.0 to 1.0) for each recommendation based on how well they match the requirements.

	Format your response as a JSON object with the following structure:
	{{
	"recommended_collaborators": [
	{{
	"login": "username",
	"reasons": ["reason1", "reason2"],
	"confidence": 0.85
	}}
	]
	}}

	Provide no more than 3 recommendations, focusing on quality matches rather than quantity.
	"""

	return prompt

	def generate_prompt(self, prompt: str) -> str:
	"""
	Generate a response to a custom prompt using Gemini

	Args:
	prompt: Custom prompt for Gemini

	Returns:
	Gemini's response
	"""
	try:
	response = self.model.generate_content(prompt, generation_config = self.config)
	return response.text
	except Exception as e:
	return f"Error generating response: {str(e)}"