Spaces:

nihalaninihal
/

Gitu

Runtime error

App Files Files Community

nihalaninihal commited on Mar 24

Commit

7da0953

verified ·

1 Parent(s): b4986a9

Update github_ai_agent.py

Browse files

Files changed (1) hide show

github_ai_agent.py +517 -209

github_ai_agent.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# github_ai_agent.py
 import os
 import re
@@ -10,14 +10,16 @@ from collections import defaultdict, Counter
 from itertools import combinations
 import numpy as np
 from typing import List, Dict, Tuple, Any, Optional, Union
 import google.generativeai as genai
 # External libraries
 from github import Github, GithubException
 from sentence_transformers import SentenceTransformer
 import faiss
-from gemini_integration import GeminiClient  # Import GeminiClient
-from visualization_module import RepositoryVisualizer  # Import RepositoryVisualizer
 # Configuration
@@ -27,11 +29,13 @@ class Config:
         self.gemini_api_key = os.environ.get("GEMINI_API_KEY")
         self.github_token = os.environ.get("GITHUB_ACCESS_TOKEN")
         self.embedding_model_name = "all-MiniLM-L6-v2"
-        self.gemini_model = "gemini-1.5-pro"
         self.max_files_to_load = 100  # Safety limit for large repos
         self.max_token_length = 64000  # Gemini Pro context limit
         self.enable_advanced_metrics = True
         self.visualization_node_limit = 150
         # File extensions to analyze
         self.code_extensions = [
@@ -42,6 +46,7 @@ class Config:
             '.md', '.txt', '.rst', '.html', '.xml', '.json', '.yaml', '.yml'
         ]
 # GitHub Repository Management
 class GitHubManager:
     """Manages interaction with GitHub repositories"""
@@ -54,6 +59,7 @@ class GitHubManager:
         self.contributors_data = {}
         self.commit_history = []
         self.issues_data = []
     def load_repository(self, repo_url: str) -> bool:
         """Load a repository from URL"""
@@ -100,8 +106,8 @@ class GitHubManager:
             return f"{username}/{repo}"
         return None
-    def load_files(self) -> Dict[str, str]:
-        """Load files from repository"""
         if not self.current_repo:
             return {}
@@ -109,43 +115,98 @@ class GitHubManager:
             contents = self.current_repo.get_contents("")
             self.file_contents = {}
             files_loaded = 0
-            while contents and files_loaded < self.config.max_files_to_load:
-                file_content = contents.pop(0)
-                # Skip directories but process their contents
-                if file_content.type == "dir":
-                    contents.extend(self.current_repo.get_contents(file_content.path))
                     continue
                 # Filter by extensions
-                _, ext = os.path.splitext(file_content.path)
                 if ext not in self.config.code_extensions + self.config.doc_extensions:
                     continue
-                # Load file content
-                try:
-                    # Handle binary files (images, etc.)
-                    if ext in self.config.code_extensions + self.config.doc_extensions:
-                        decoded_content = file_content.decoded_content.decode('utf-8')
-                        self.file_contents[file_content.path] = {
-                            'content': decoded_content,
-                            'type': 'code' if ext in self.config.code_extensions else 'document',
-                            'size': file_content.size,
-                            'ext': ext
-                        }
-                        files_loaded += 1
-                except UnicodeDecodeError:
-                    # Skip binary files that can't be decoded as text
-                    pass
             return self.file_contents
         except Exception as e:
             print(f"Error loading files: {e}")
             return {}
     def load_contributors(self) -> List[Dict]:
-        """Load repository contributors"""
         if not self.current_repo:
             return []
@@ -153,67 +214,128 @@ class GitHubManager:
             contributors = self.current_repo.get_contributors()
             self.contributors_data = {}
-            for contributor in contributors:
-                self.contributors_data[contributor.login] = {
-                    'login': contributor.login,
-                    'id': contributor.id,
-                    'contributions': contributor.contributions,
-                    'avatar_url': contributor.avatar_url,
-                    'html_url': contributor.html_url,
-                    'type': contributor.type,
-                    'files_modified': [],
-                    'commit_messages': [],
-                    'activity_dates': []
                 }
             return list(self.contributors_data.values())
         except Exception as e:
             print(f"Error loading contributors: {e}")
             return []
     def load_commits(self, limit: int = 100) -> List[Dict]:
-        """Load repository commits"""
         if not self.current_repo:
             return []
         try:
             commits = self.current_repo.get_commits()[:limit]
             self.commit_history = []
-            for commit in commits:
-                commit_data = {
-                    'sha': commit.sha,
-                    'author': commit.author.login if commit.author else 'Unknown',
-                    'date': commit.commit.author.date,
-                    'message': commit.commit.message,
-                    'files': []
                 }
-                # Get files changed in this commit
-                try:
-                    commit_files = commit.files
-                    for file in commit_files:
-                        commit_data['files'].append({
-                            'filename': file.filename,
-                            'additions': file.additions,
-                            'deletions': file.deletions,
-                            'changes': file.changes,
-                            'status': file.status
-                        })
-                        # Add this file to the contributor's file list
-                        if commit.author and commit.author.login in self.contributors_data:
-                            self.contributors_data[commit.author.login]['files_modified'].append(file.filename)
-                            self.contributors_data[commit.author.login]['commit_messages'].append(commit.commit.message)
-                            self.contributors_data[commit.author.login]['activity_dates'].append(commit.commit.author.date)
-                except:
-                    # Some commits might not have accessible files
-                    pass
-                self.commit_history.append(commit_data)
-            # Count frequency of modified files for each contributor
-            for login, contributor in self.contributors_data.items():
                 # Count occurrences of each file
                 file_counts = Counter(contributor['files_modified'])
                 # Replace list with a list of (filename, count) tuples
@@ -222,52 +344,87 @@ class GitHubManager:
                     for filename, count in file_counts.most_common(10)
                 ]
-            return self.commit_history
-        except Exception as e:
-            print(f"Error loading commits: {e}")
-            return []
     def load_issues(self, limit: int = 30) -> List[Dict]:
-        """Load repository issues"""
         if not self.current_repo:
             return []
         try:
             issues = self.current_repo.get_issues(state='all')[:limit]
             self.issues_data = []
-            for issue in issues:
-                issue_data = {
-                    'number': issue.number,
-                    'title': issue.title,
-                    'body': issue.body,
-                    'user': issue.user.login if issue.user else 'Unknown',
-                    'state': issue.state,
-                    'created_at': issue.created_at,
-                    'updated_at': issue.updated_at,
-                    'closed_at': issue.closed_at,
-                    'labels': [label.name for label in issue.labels],
-                    'comments': []
                 }
-                # Get comments for this issue (limited to 10)
-                try:
-                    comments = issue.get_comments()[:10]
-                    for comment in comments:
-                        issue_data['comments'].append({
-                            'user': comment.user.login if comment.user else 'Unknown',
-                            'body': comment.body,
-                            'created_at': comment.created_at
-                        })
-                except:
-                    pass
-                self.issues_data.append(issue_data)
             return self.issues_data
         except Exception as e:
             print(f"Error loading issues: {e}")
             return []
 # Knowledge Base and Vector Storage
 class KnowledgeBase:
@@ -279,9 +436,11 @@ class KnowledgeBase:
         self.index = None
         self.knowledge_graph = nx.Graph()
         self.insights = {}
     def initialize_vector_storage(self, file_contents: Dict[str, Dict]) -> None:
-        """Initialize vector storage with file contents"""
         try:
             # Clear existing data
             self.embeddings = {}
@@ -291,30 +450,72 @@ class KnowledgeBase:
             texts = []
             ids = []
             for path, file_data in file_contents.items():
-                content = file_data['content']
-                file_type = file_data['type']
-                # Skip very large files to avoid embedding issues
-                if len(content) > 10000:
-                    content = content[:10000] + "..."
-                # Store file content for embedding
-                texts.append(content)
-                ids.append(path)
-                # Add node to knowledge graph
                 self.knowledge_graph.add_node(
                     path,
                     type='file',
-                    file_type=file_type,
-                    size=file_data['size'],
-                    extension=file_data['ext']
                 )
             # Create embeddings for all files
             if texts:
-                file_embeddings = self.embedding_model.encode(texts)
                 # Initialize FAISS index
                 dimension = file_embeddings.shape[1]
@@ -356,18 +557,24 @@ class KnowledgeBase:
                             # Create new edge
                             self.knowledge_graph.add_edge(login, filename, weight=count)
-            # Add connections between files based on commit co-occurrence
             file_co_occurrence = defaultdict(int)
-            for commit in commits:
-                # Get all files in this commit
-                commit_files = [file['filename'] for file in commit['files']]
-                # Add co-occurrence for each pair of files
-                for file1, file2 in combinations(commit_files, 2):
-                    if file1 in self.knowledge_graph and file2 in self.knowledge_graph:
-                        file_pair = tuple(sorted([file1, file2]))
-                        file_co_occurrence[file_pair] += 1
             # Add edges for file co-occurrence
             for (file1, file2), count in file_co_occurrence.items():
@@ -382,8 +589,9 @@ class KnowledgeBase:
             print(f"Error building knowledge graph: {e}")
             return nx.Graph()
     def search_similar_files(self, query: str, top_k: int = 5) -> List[Dict]:
-        """Search for files similar to query"""
         try:
             if not self.index:
                 return []
@@ -413,7 +621,12 @@ class KnowledgeBase:
             return []
     def extract_insights(self, repo_data: Dict, commits: List[Dict], contributors: Dict, issues: List[Dict]) -> Dict:
-        """Extract insights from repository data"""
         try:
             insights = {
                 'basic_stats': {},
@@ -423,21 +636,44 @@ class KnowledgeBase:
                 'issues': {}
             }
             # Basic statistics
             insights['basic_stats'] = {
-                'name': repo_data['name'],
-                'description': repo_data['description'],
-                'stars': repo_data['stars'],
-                'forks': repo_data['forks'],
-                'age_days': (datetime.datetime.now() - repo_data['created_at']).days if repo_data['created_at'] else 0,
-                'primary_language': repo_data['language'],
-                'topics': repo_data['topics']
             }
             # Activity insights
             if commits:
-                # Convert dates to datetime objects and sort
-                commit_dates = [commit['date'] for commit in commits if commit['date']]
                 commit_dates.sort()
                 if commit_dates:
@@ -452,9 +688,13 @@ class KnowledgeBase:
                         'last_commit': last_commit,
                         'days_span': days_span,
                         'commits_per_day': round(len(commits) / max(days_span, 1), 2),
-                        'most_active_day': max(commit_dates, key=commit_dates.count) if commit_dates else None,
                     }
                     # Commit activity by month
                     commit_months = [d.strftime('%Y-%m') for d in commit_dates]
                     month_counts = Counter(commit_months)
@@ -537,7 +777,17 @@ class KnowledgeBase:
                 close_times = []
                 for issue in closed_issues:
                     if issue['created_at'] and issue['closed_at']:
-                        close_time = (issue['closed_at'] - issue['created_at']).days
                         close_times.append(close_time)
                 if close_times:
@@ -550,26 +800,40 @@ class KnowledgeBase:
                     {'label': label, 'count': count} for label, count in label_counts.most_common(5)
                 ]
             self.insights = insights
             return insights
         except Exception as e:
             print(f"Error extracting insights: {e}")
             return {}
 # Main GitHub AI Agent Class
 class GitHubAIAgent:
     """Main class for GitHub AI Agent"""
     def __init__(self):
         self.config = Config()
-        self.github_manager = GitHubManager(self.config)
-        self.knowledge_base = KnowledgeBase(self.config)
-        self.gemini_client = GeminiClient(self.config)
-        self.visualization_manager = RepositoryVisualizer(self.config)
         self.repository_loaded = False
         self.repository_url = ""
         self.repository_analysis = {}
         self.visualizations = {}
     def set_api_keys(self, gemini_api_key: str, github_token: str = None) -> None:
         """Set API keys"""
@@ -582,13 +846,14 @@ class GitHubAIAgent:
         self.config.gemini_api_key = gemini_api_key
         self.config.github_token = github_token
-        # Reinitialize clients
         self.github_manager = GitHubManager(self.config)
-        self.gemini_client = GeminiClient(self.config)
-        self.visualization_manager = RepositoryVisualizer(self.config) # Reinitialize
     def load_repository(self, repository_url: str) -> Dict:
-        """Load and analyze a GitHub repository"""
         result = {
             'success': False,
             'message': '',
@@ -604,7 +869,7 @@ class GitHubAIAgent:
             self.repository_analysis = {}
             self.visualizations = {}
-            # Load repository
             print(f"Loading repository: {repository_url}")
             repo_loaded = self.github_manager.load_repository(repository_url)
@@ -615,30 +880,27 @@ class GitHubAIAgent:
             # Store repository URL
             self.repository_url = repository_url
-            # Load repository files
-            print("Loading repository files")
-            files = self.github_manager.load_files()
             result['file_count'] = len(files)
-            # Load contributors
-            print("Loading contributors")
-            contributors = self.github_manager.load_contributors()
             result['contributor_count'] = len(contributors)
-            # Load commits
-            print("Loading commit history")
-            commits = self.github_manager.load_commits()
-            # Load issues
-            print("Loading issues")
-            issues = self.github_manager.load_issues()
-            # Initialize vector storage
-            print("Building vector embeddings")
             self.knowledge_base.initialize_vector_storage(files)
-            # Build knowledge graph
-            print("Building knowledge graph")
             knowledge_graph = self.knowledge_base.build_knowledge_graph(
                 commits, self.github_manager.contributors_data
             )
@@ -652,31 +914,43 @@ class GitHubAIAgent:
                 issues
             )
-            # Analyze repository with Gemini
-            print("Analyzing repository with Gemini")
-            self.repository_analysis = self.gemini_client.analyze_repository(
-                self.github_manager.repo_data,
-                files,
-                commits,
-                self.github_manager.contributors_data,
-                insights
-            )
-            # Create repository visualizations
-            print("Creating repository visualizations")
-            repo_graph_path = self.visualization_manager.create_repository_graph(knowledge_graph)
-            activity_chart_path = self.visualization_manager.create_commit_activity_chart(commits)
-            contributor_network_path = self.visualization_manager.create_contributor_network(
-                self.github_manager.contributors_data, commits
-            )
-            dependency_graph_path = self.visualization_manager.create_file_dependency_graph(files)
-            self.visualizations = {
-                'repository_graph': repo_graph_path,
-                'activity_chart': activity_chart_path,
-                'contributor_network': contributor_network_path,
-                'dependency_graph': dependency_graph_path,
-            }
             # Update result
             result['success'] = True
@@ -687,11 +961,15 @@ class GitHubAIAgent:
             return result
         except Exception as e:
             result['message'] = f"Error loading repository: {str(e)}"
             return result
     def answer_query(self, query: str) -> Dict:
-        """Answer a natural language query about the repository"""
         if not self.repository_loaded:
             return {
                 'success': False,
@@ -699,6 +977,14 @@ class GitHubAIAgent:
                 'answer': ""
             }
         try:
             # Search for relevant files
             similar_files = self.knowledge_base.search_similar_files(query)
@@ -711,12 +997,21 @@ class GitHubAIAgent:
                 self.knowledge_base.insights
             )
-            return {
                 'success': True,
                 'message': "Query answered successfully",
                 'answer': answer,
                 'relevant_files': [f['file'] for f in similar_files]
             }
         except Exception as e:
             return {
                 'success': False,
@@ -725,7 +1020,7 @@ class GitHubAIAgent:
             }
     def analyze_code(self, file_path: str = "", code_snippet: str = "", language: str = "") -> Dict:
-        """Analyze a code file or snippet"""
         if not file_path and not code_snippet:
             return {
                 'success': False,
@@ -836,4 +1131,17 @@ class GitHubAIAgent:
             'success': True,
             'message': "Repository visualizations retrieved",
             'visualizations': self.visualizations
-        }

+# github_ai_agent.py - Improved version with parallel processing and error handling
 import os
 import re
 from itertools import combinations
 import numpy as np
 from typing import List, Dict, Tuple, Any, Optional, Union
+import concurrent.futures
+from functools import lru_cache
 import google.generativeai as genai
 # External libraries
 from github import Github, GithubException
 from sentence_transformers import SentenceTransformer
 import faiss
+from gemini_integration import GeminiClient
+from visualization_module import RepositoryVisualizer
 # Configuration
         self.gemini_api_key = os.environ.get("GEMINI_API_KEY")
         self.github_token = os.environ.get("GITHUB_ACCESS_TOKEN")
         self.embedding_model_name = "all-MiniLM-L6-v2"
+        self.gemini_model = "gemini-2.0-pro-exp-02-05"
         self.max_files_to_load = 100  # Safety limit for large repos
         self.max_token_length = 64000  # Gemini Pro context limit
         self.enable_advanced_metrics = True
         self.visualization_node_limit = 150
+        self.cache_enabled = True
+        self.cache_ttl = 3600  # Cache time to live in seconds
         # File extensions to analyze
         self.code_extensions = [
             '.md', '.txt', '.rst', '.html', '.xml', '.json', '.yaml', '.yml'
         ]
 # GitHub Repository Management
 class GitHubManager:
     """Manages interaction with GitHub repositories"""
         self.contributors_data = {}
         self.commit_history = []
         self.issues_data = []
+        self.file_cache = {}  # Cache for loaded files
     def load_repository(self, repo_url: str) -> bool:
         """Load a repository from URL"""
             return f"{username}/{repo}"
         return None
+    def load_files(self) -> Dict[str, Dict]:
+        """Load files from repository with improved performance"""
         if not self.current_repo:
             return {}
             contents = self.current_repo.get_contents("")
             self.file_contents = {}
             files_loaded = 0
+            batch_size = 20  # Process files in batches
+            # Create a queue of files to process
+            file_queue = []
+            # First pass - collect all file paths
+            while contents:
+                content_item = contents.pop(0)
+                # Skip directories but add their contents to our processing queue
+                if content_item.type == "dir":
+                    try:
+                        dir_contents = self.current_repo.get_contents(content_item.path)
+                        contents.extend(dir_contents)
+                    except Exception as e:
+                        print(f"Error accessing directory {content_item.path}: {e}")
                     continue
                 # Filter by extensions
+                _, ext = os.path.splitext(content_item.path)
                 if ext not in self.config.code_extensions + self.config.doc_extensions:
                     continue
+                # Add file to processing queue
+                file_queue.append(content_item)
+                # Stop if we've reached our limit
+                if len(file_queue) >= self.config.max_files_to_load:
+                    break
+            # Process files in batches
+            for i in range(0, len(file_queue), batch_size):
+                batch = file_queue[i:i+batch_size]
+                # Process batch in parallel
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    future_to_file = {
+                        executor.submit(self._process_file, file_content): file_content
+                        for file_content in batch
+                    }
+                    for future in concurrent.futures.as_completed(future_to_file):
+                        file_content = future_to_file[future]
+                        try:
+                            result = future.result()
+                            if result:
+                                self.file_contents[file_content.path] = result
+                                files_loaded += 1
+                        except Exception as e:
+                            print(f"Error processing file {file_content.path}: {e}")
             return self.file_contents
         except Exception as e:
             print(f"Error loading files: {e}")
             return {}
+    def _process_file(self, file_content) -> Optional[Dict]:
+        """Process a single file (for parallel execution)"""
+        try:
+            # Check if in cache
+            if file_content.path in self.file_cache:
+                return self.file_cache[file_content.path]
+            _, ext = os.path.splitext(file_content.path)
+            # Only process text files with specified extensions
+            if ext not in self.config.code_extensions + self.config.doc_extensions:
+                return None
+            try:
+                # Decode content
+                decoded_content = file_content.decoded_content.decode('utf-8')
+                result = {
+                    'content': decoded_content,
+                    'type': 'code' if ext in self.config.code_extensions else 'document',
+                    'size': file_content.size,
+                    'ext': ext
+                }
+                # Update cache
+                self.file_cache[file_content.path] = result
+                return result
+            except UnicodeDecodeError:
+                # Skip binary files
+                return None
+        except Exception as e:
+            print(f"Error processing file {file_content.path}: {e}")
+            return None
     def load_contributors(self) -> List[Dict]:
+        """Load repository contributors with improved performance"""
         if not self.current_repo:
             return []
             contributors = self.current_repo.get_contributors()
             self.contributors_data = {}
+            # Collect basic contributor info
+            contributor_list = list(contributors)  # Convert from PaginatedList to list
+            # Process in parallel
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future_to_contributor = {
+                    executor.submit(self._process_contributor, contributor): contributor
+                    for contributor in contributor_list
                 }
+                for future in concurrent.futures.as_completed(future_to_contributor):
+                    contributor = future_to_contributor[future]
+                    try:
+                        contributor_data = future.result()
+                        if contributor_data:
+                            self.contributors_data[contributor.login] = contributor_data
+                    except Exception as e:
+                        print(f"Error processing contributor {contributor.login}: {e}")
             return list(self.contributors_data.values())
         except Exception as e:
             print(f"Error loading contributors: {e}")
             return []
+    def _process_contributor(self, contributor) -> Dict:
+        """Process a single contributor (for parallel execution)"""
+        try:
+            return {
+                'login': contributor.login,
+                'id': contributor.id,
+                'contributions': contributor.contributions,
+                'avatar_url': contributor.avatar_url,
+                'html_url': contributor.html_url,
+                'type': contributor.type,
+                'files_modified': [],
+                'commit_messages': [],
+                'activity_dates': []
+            }
+        except Exception as e:
+            print(f"Error processing contributor {contributor.login}: {e}")
+            return None
     def load_commits(self, limit: int = 100) -> List[Dict]:
+        """Load repository commits with improved performance"""
         if not self.current_repo:
             return []
         try:
             commits = self.current_repo.get_commits()[:limit]
             self.commit_history = []
+            commits_list = list(commits)  # Convert from PaginatedList to list
+            # Process commits in parallel
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future_to_commit = {
+                    executor.submit(self._process_commit, commit): commit
+                    for commit in commits_list
                 }
+                for future in concurrent.futures.as_completed(future_to_commit):
+                    commit = future_to_commit[future]
+                    try:
+                        commit_data = future.result()
+                        if commit_data:
+                            self.commit_history.append(commit_data)
+                    except Exception as e:
+                        print(f"Error processing commit {commit.sha}: {e}")
+            # Process contributor file statistics
+            self._update_contributor_file_stats()
+            return self.commit_history
+        except Exception as e:
+            print(f"Error loading commits: {e}")
+            return []
+    def _process_commit(self, commit) -> Optional[Dict]:
+        """Process a single commit (for parallel execution)"""
+        try:
+            # Make sure the commit date is timezone-naive
+            commit_date = commit.commit.author.date
+            if hasattr(commit_date, 'tzinfo') and commit_date.tzinfo:
+                commit_date = commit_date.replace(tzinfo=None)
+            commit_data = {
+                'sha': commit.sha,
+                'author': commit.author.login if commit.author else 'Unknown',
+                'date': commit_date,
+                'message': commit.commit.message,
+                'files': []
+            }
+            # Get files changed in this commit
+            try:
+                commit_files = commit.files
+                for file in commit_files:
+                    file_data = {
+                        'filename': file.filename,
+                        'additions': file.additions,
+                        'deletions': file.deletions,
+                        'changes': file.changes,
+                        'status': file.status
+                    }
+                    commit_data['files'].append(file_data)
+                    # Add this file to the contributor's file list
+                    if commit.author and commit.author.login in self.contributors_data:
+                        self.contributors_data[commit.author.login]['files_modified'].append(file.filename)
+                        self.contributors_data[commit.author.login]['commit_messages'].append(commit.commit.message)
+                        self.contributors_data[commit.author.login]['activity_dates'].append(commit_date)
+            except Exception as e:
+                print(f"Error processing files for commit {commit.sha}: {e}")
+            return commit_data
+        except Exception as e:
+            print(f"Error processing commit {commit.sha}: {e}")
+            return None
+    def _update_contributor_file_stats(self):
+        """Update contributor file statistics"""
+        for login, contributor in self.contributors_data.items():
+            if 'files_modified' in contributor:
                 # Count occurrences of each file
                 file_counts = Counter(contributor['files_modified'])
                 # Replace list with a list of (filename, count) tuples
                     for filename, count in file_counts.most_common(10)
                 ]
     def load_issues(self, limit: int = 30) -> List[Dict]:
+        """Load repository issues with improved performance"""
         if not self.current_repo:
             return []
         try:
             issues = self.current_repo.get_issues(state='all')[:limit]
             self.issues_data = []
+            issues_list = list(issues)  # Convert from PaginatedList to list
+            # Process issues in parallel
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                future_to_issue = {
+                    executor.submit(self._process_issue, issue): issue
+                    for issue in issues_list
                 }
+                for future in concurrent.futures.as_completed(future_to_issue):
+                    issue = future_to_issue[future]
+                    try:
+                        issue_data = future.result()
+                        if issue_data:
+                            self.issues_data.append(issue_data)
+                    except Exception as e:
+                        print(f"Error processing issue #{issue.number}: {e}")
             return self.issues_data
         except Exception as e:
             print(f"Error loading issues: {e}")
             return []
+    def _process_issue(self, issue) -> Optional[Dict]:
+        """Process a single issue (for parallel execution)"""
+        try:
+            # Normalize datetime objects
+            created_at = issue.created_at
+            updated_at = issue.updated_at
+            closed_at = issue.closed_at
+            if hasattr(created_at, 'tzinfo') and created_at.tzinfo:
+                created_at = created_at.replace(tzinfo=None)
+            if hasattr(updated_at, 'tzinfo') and updated_at.tzinfo:
+                updated_at = updated_at.replace(tzinfo=None)
+            if hasattr(closed_at, 'tzinfo') and closed_at and closed_at.tzinfo:
+                closed_at = closed_at.replace(tzinfo=None)
+            issue_data = {
+                'number': issue.number,
+                'title': issue.title,
+                'body': issue.body,
+                'user': issue.user.login if issue.user else 'Unknown',
+                'state': issue.state,
+                'created_at': created_at,
+                'updated_at': updated_at,
+                'closed_at': closed_at,
+                'labels': [label.name for label in issue.labels],
+                'comments': []
+            }
+            # Get comments for this issue (limited to 10)
+            try:
+                comments = issue.get_comments()[:10]
+                for comment in comments:
+                    # Normalize datetime
+                    comment_created_at = comment.created_at
+                    if hasattr(comment_created_at, 'tzinfo') and comment_created_at.tzinfo:
+                        comment_created_at = comment_created_at.replace(tzinfo=None)
+                    issue_data['comments'].append({
+                        'user': comment.user.login if comment.user else 'Unknown',
+                        'body': comment.body,
+                        'created_at': comment_created_at
+                    })
+            except Exception as e:
+                print(f"Error loading comments for issue #{issue.number}: {e}")
+            return issue_data
+        except Exception as e:
+            print(f"Error processing issue #{issue.number}: {e}")
+            return None
 # Knowledge Base and Vector Storage
 class KnowledgeBase:
         self.index = None
         self.knowledge_graph = nx.Graph()
         self.insights = {}
+        self.insights_cache = {}
+        self.cache_timestamp = None
     def initialize_vector_storage(self, file_contents: Dict[str, Dict]) -> None:
+        """Initialize vector storage with file contents and batched processing"""
         try:
             # Clear existing data
             self.embeddings = {}
             texts = []
             ids = []
+            # Process files in parallel for large repositories
+            if len(file_contents) > 50:
+                with concurrent.futures.ThreadPoolExecutor() as executor:
+                    # Process files in batches
+                    batch_size = 20
+                    keys = list(file_contents.keys())
+                    batches = [keys[i:i + batch_size] for i in range(0, len(keys), batch_size)]
+                    # Create a function to process a batch
+                    def process_batch(batch_keys):
+                        batch_texts = []
+                        batch_ids = []
+                        for path in batch_keys:
+                            file_data = file_contents[path]
+                            content = file_data['content']
+                            # Skip very large files to avoid embedding issues
+                            if len(content) > 10000:
+                                content = content[:10000] + "..."
+                            batch_texts.append(content)
+                            batch_ids.append(path)
+                        return batch_texts, batch_ids
+                    # Submit batch processing tasks
+                    futures = [executor.submit(process_batch, batch) for batch in batches]
+                    # Collect results
+                    for future in concurrent.futures.as_completed(futures):
+                        batch_texts, batch_ids = future.result()
+                        texts.extend(batch_texts)
+                        ids.extend(batch_ids)
+            else:
+                # For smaller repositories, process sequentially
+                for path, file_data in file_contents.items():
+                    content = file_data['content']
+                    # Skip very large files to avoid embedding issues
+                    if len(content) > 10000:
+                        content = content[:10000] + "..."
+                    texts.append(content)
+                    ids.append(path)
+            # Add nodes to knowledge graph
             for path, file_data in file_contents.items():
                 self.knowledge_graph.add_node(
                     path,
                     type='file',
+                    file_type=file_data.get('type', 'unknown'),
+                    size=file_data.get('size', 0),
+                    extension=file_data.get('ext', '')
                 )
             # Create embeddings for all files
             if texts:
+                # Process embeddings in batches to avoid memory issues
+                batch_size = 32
+                file_embeddings = []
+                for i in range(0, len(texts), batch_size):
+                    batch_texts = texts[i:i+batch_size]
+                    batch_embeddings = self.embedding_model.encode(batch_texts)
+                    file_embeddings.append(batch_embeddings)
+                file_embeddings = np.vstack(file_embeddings)
                 # Initialize FAISS index
                 dimension = file_embeddings.shape[1]
                             # Create new edge
                             self.knowledge_graph.add_edge(login, filename, weight=count)
+            # Optimized co-occurrence calculation
             file_co_occurrence = defaultdict(int)
+            # Process in batches for large commit histories
+            batch_size = 50
+            for i in range(0, len(commits), batch_size):
+                batch_commits = commits[i:i+batch_size]
+                for commit in batch_commits:
+                    # Get all files in this commit
+                    commit_files = [file['filename'] for file in commit['files']]
+                    # Add co-occurrence for each pair of files
+                    from itertools import combinations
+                    for file1, file2 in combinations(commit_files, 2):
+                        if file1 in self.knowledge_graph and file2 in self.knowledge_graph:
+                            file_pair = tuple(sorted([file1, file2]))
+                            file_co_occurrence[file_pair] += 1
             # Add edges for file co-occurrence
             for (file1, file2), count in file_co_occurrence.items():
             print(f"Error building knowledge graph: {e}")
             return nx.Graph()
+    @lru_cache(maxsize=32)
     def search_similar_files(self, query: str, top_k: int = 5) -> List[Dict]:
+        """Search for files similar to query with caching"""
         try:
             if not self.index:
                 return []
             return []
     def extract_insights(self, repo_data: Dict, commits: List[Dict], contributors: Dict, issues: List[Dict]) -> Dict:
+        """Extract insights from repository data with datetime fix and caching"""
+        # Check if we have a recent cache (less than 10 minutes old)
+        current_time = time.time()
+        if self.cache_timestamp and (current_time - self.cache_timestamp < 600) and self.insights_cache:
+            return self.insights_cache
         try:
             insights = {
                 'basic_stats': {},
                 'issues': {}
             }
+            # Make a deep copy of repo_data to avoid modifying the original
+            repo_data_copy = {k: v for k, v in repo_data.items()}
             # Basic statistics
             insights['basic_stats'] = {
+                'name': repo_data_copy['name'],
+                'description': repo_data_copy['description'],
+                'stars': repo_data_copy['stars'],
+                'forks': repo_data_copy['forks'],
+                'age_days': None,  # Will calculate below
+                'primary_language': repo_data_copy['language'],
+                'topics': repo_data_copy['topics']
             }
+            # Fix: Normalize datetime objects to be timezone-naive for consistent comparison
+            created_at = repo_data_copy.get('created_at')
+            if created_at:
+                # Remove timezone info if present
+                if hasattr(created_at, 'tzinfo') and created_at.tzinfo:
+                    created_at = created_at.replace(tzinfo=None)
+                # Calculate age
+                now = datetime.datetime.now()
+                insights['basic_stats']['age_days'] = (now - created_at).days
             # Activity insights
             if commits:
+                # Fix: Normalize all datetime objects to be timezone-naive
+                commit_dates = []
+                for commit in commits:
+                    date = commit.get('date')
+                    if date:
+                        # Remove timezone info if present
+                        if hasattr(date, 'tzinfo') and date.tzinfo:
+                            date = date.replace(tzinfo=None)
+                        commit_dates.append(date)
+                # Sort dates
                 commit_dates.sort()
                 if commit_dates:
                         'last_commit': last_commit,
                         'days_span': days_span,
                         'commits_per_day': round(len(commits) / max(days_span, 1), 2),
                     }
+                    # Fix: Use Counter for most active day calculation
+                    date_counter = Counter(d.date() for d in commit_dates)
+                    if date_counter:
+                        insights['activity']['most_active_day'] = date_counter.most_common(1)[0][0]
                     # Commit activity by month
                     commit_months = [d.strftime('%Y-%m') for d in commit_dates]
                     month_counts = Counter(commit_months)
                 close_times = []
                 for issue in closed_issues:
                     if issue['created_at'] and issue['closed_at']:
+                        # Fix: Normalize datetime objects to be timezone-naive
+                        created_at = issue['created_at']
+                        closed_at = issue['closed_at']
+                        if hasattr(created_at, 'tzinfo') and created_at.tzinfo:
+                            created_at = created_at.replace(tzinfo=None)
+                        if hasattr(closed_at, 'tzinfo') and closed_at.tzinfo:
+                            closed_at = closed_at.replace(tzinfo=None)
+                        close_time = (closed_at - created_at).days
                         close_times.append(close_time)
                 if close_times:
                     {'label': label, 'count': count} for label, count in label_counts.most_common(5)
                 ]
+            # Update cache
+            self.insights_cache = insights
+            self.cache_timestamp = current_time
             self.insights = insights
             return insights
         except Exception as e:
+            import traceback
             print(f"Error extracting insights: {e}")
+            print(traceback.format_exc())
             return {}
 # Main GitHub AI Agent Class
 class GitHubAIAgent:
     """Main class for GitHub AI Agent"""
     def __init__(self):
         self.config = Config()
+        self.github_manager = None
+        self.knowledge_base = None
+        self.gemini_client = None
+        self.visualization_manager = None
         self.repository_loaded = False
         self.repository_url = ""
         self.repository_analysis = {}
         self.visualizations = {}
+        # Initialize caches
+        self.file_cache = {}
+        self.contributor_cache = {}
+        self.commit_cache = {}
+        self.issue_cache = {}
+        self.query_cache = {}
     def set_api_keys(self, gemini_api_key: str, github_token: str = None) -> None:
         """Set API keys"""
         self.config.gemini_api_key = gemini_api_key
         self.config.github_token = github_token
+        # Initialize clients
         self.github_manager = GitHubManager(self.config)
+        self.knowledge_base = KnowledgeBase(self.config)
+        self.gemini_client = GeminiClient(self.config.gemini_api_key, self.config.gemini_model)
+        self.visualization_manager = RepositoryVisualizer(self.config)
     def load_repository(self, repository_url: str) -> Dict:
+        """Load and analyze a GitHub repository with improved parallelization"""
         result = {
             'success': False,
             'message': '',
             self.repository_analysis = {}
             self.visualizations = {}
+            # Load repository basic info
             print(f"Loading repository: {repository_url}")
             repo_loaded = self.github_manager.load_repository(repository_url)
             # Store repository URL
             self.repository_url = repository_url
+            # Use parallel processing for loading repository data
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                # Submit tasks
+                files_future = executor.submit(self.github_manager.load_files)
+                contributors_future = executor.submit(self.github_manager.load_contributors)
+                commits_future = executor.submit(self.github_manager.load_commits)
+                issues_future = executor.submit(self.github_manager.load_issues)
+                # Get results
+                files = files_future.result()
+                contributors = contributors_future.result()
+                commits = commits_future.result()
+                issues = issues_future.result()
             result['file_count'] = len(files)
             result['contributor_count'] = len(contributors)
+            # Initialize vector storage and build knowledge graph
+            # (These are kept sequential as they depend on previous steps)
+            print("Building knowledge base")
             self.knowledge_base.initialize_vector_storage(files)
             knowledge_graph = self.knowledge_base.build_knowledge_graph(
                 commits, self.github_manager.contributors_data
             )
                 issues
             )
+            # Use a separate thread for Gemini analysis which can be slower
+            # and doesn't block the main thread
+            def analyze_with_gemini():
+                print("Analyzing repository with Gemini")
+                return self.gemini_client.analyze_repository(
+                    self.github_manager.repo_data,
+                    files,
+                    commits,
+                    self.github_manager.contributors_data,
+                    insights
+                )
+            # Use another thread pool for visualization generation
+            def create_visualizations():
+                print("Creating repository visualizations")
+                repo_graph_path = self.visualization_manager.create_repository_graph(knowledge_graph)
+                activity_chart_path = self.visualization_manager.create_commit_activity_chart(commits)
+                contributor_network_path = self.visualization_manager.create_contributor_network(
+                    self.github_manager.contributors_data, commits
+                )
+                dependency_graph_path = self.visualization_manager.create_file_dependency_graph(files)
+                return {
+                    'repository_graph': repo_graph_path,
+                    'activity_chart': activity_chart_path,
+                    'contributor_network': contributor_network_path,
+                    'dependency_graph': dependency_graph_path,
+                }
+            # Run Gemini analysis and visualization generation in parallel
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                analysis_future = executor.submit(analyze_with_gemini)
+                viz_future = executor.submit(create_visualizations)
+                # Get results
+                self.repository_analysis = analysis_future.result()
+                self.visualizations = viz_future.result()
             # Update result
             result['success'] = True
             return result
         except Exception as e:
+            import traceback
+            print(f"Error loading repository: {str(e)}")
+            print(traceback.format_exc())
             result['message'] = f"Error loading repository: {str(e)}"
             return result
+    @lru_cache(maxsize=32)
     def answer_query(self, query: str) -> Dict:
+        """Answer a natural language query about the repository with caching"""
         if not self.repository_loaded:
             return {
                 'success': False,
                 'answer': ""
             }
+        # Check cache if enabled
+        cache_key = f"query_{hash(query)}"
+        if self.config.cache_enabled and cache_key in self.query_cache:
+            cached_result = self.query_cache[cache_key]
+            # Check if cache is still valid
+            if time.time() - cached_result['timestamp'] < self.config.cache_ttl:
+                return cached_result['result']
         try:
             # Search for relevant files
             similar_files = self.knowledge_base.search_similar_files(query)
                 self.knowledge_base.insights
             )
+            result = {
                 'success': True,
                 'message': "Query answered successfully",
                 'answer': answer,
                 'relevant_files': [f['file'] for f in similar_files]
             }
+            # Update cache
+            if self.config.cache_enabled:
+                self.query_cache[cache_key] = {
+                    'result': result,
+                    'timestamp': time.time()
+                }
+            return result
         except Exception as e:
             return {
                 'success': False,
             }
     def analyze_code(self, file_path: str = "", code_snippet: str = "", language: str = "") -> Dict:
+        """Analyze a code file or snippet with improved error handling"""
         if not file_path and not code_snippet:
             return {
                 'success': False,
             'success': True,
             'message': "Repository visualizations retrieved",
             'visualizations': self.visualizations
+        }
+    def clear_caches(self) -> None:
+        """Clear all caches"""
+        self.file_cache.clear()
+        self.contributor_cache.clear()
+        self.commit_cache.clear()
+        self.issue_cache.clear()
+        self.query_cache.clear()
+        # Clear LRU caches
+        self.answer_query.cache_clear()
+        if hasattr(self.knowledge_base, 'search_similar_files'):
+            self.knowledge_base.search_similar_files.cache_clear()