""" LLM 集成模組:使用 Ollama 進行本地 LLM 推理 """ from typing import Optional, Dict, List import logging import requests import json logger = logging.getLogger(__name__) class OllamaLLM: """使用 Ollama 進行本地 LLM 推理""" # 適合 16GB MacBook Air 的模型推薦 RECOMMENDED_MODELS = { "deepseek-r1:7b": { "name": "deepseek-r1:7b", "description": "DeepSeek R1 7B - 大模型,高質量", "memory_required": "~8GB", "quality": "優秀" }, "llama3.2:3b": { "name": "llama3.2:3b", "description": "Meta Llama 3.2 3B - 輕量級,適合 16GB 內存", "memory_required": "~4GB", "quality": "良好" }, "llama3.2:1b": { "name": "llama3.2:1b", "description": "Meta Llama 3.2 1B - 極輕量級,快速響應", "memory_required": "~2GB", "quality": "基礎" }, "phi3:mini": { "name": "phi3:mini", "description": "Microsoft Phi-3 Mini - 小模型,高質量", "memory_required": "~3GB", "quality": "良好" }, "gemma:2b": { "name": "gemma:2b", "description": "Google Gemma 2B - 輕量級,開源", "memory_required": "~3GB", "quality": "良好" }, "mistral:7b": { "name": "mistral:7b", "description": "Mistral 7B - 較大但質量高(如果內存足夠)", "memory_required": "~8GB", "quality": "優秀" } } def __init__( self, model_name: str = "llama3.2:3b", base_url: str = "http://localhost:11434", timeout: int = 120 ): """ 初始化 Ollama LLM Args: model_name: Ollama 模型名稱(預設: llama3.2:3b) base_url: Ollama API 基礎 URL timeout: 請求超時時間(秒) """ self.model_name = model_name self.base_url = base_url.rstrip('/') self.timeout = timeout self.api_url = f"{self.base_url}/api" # 檢查模型是否在推薦列表中 if model_name not in self.RECOMMENDED_MODELS: logger.warning( f"⚠️ 模型 '{model_name}' 不在推薦列表中。" f"推薦的模型: {', '.join(self.RECOMMENDED_MODELS.keys())}" ) logger.info(f"✅ Ollama LLM 初始化完成 (模型: {model_name})") def _check_ollama_connection(self) -> bool: """ 檢查 Ollama 服務是否可用 Returns: 是否連接成功 """ try: response = requests.get(f"{self.base_url}/api/tags", timeout=5) return response.status_code == 200 except Exception as e: logger.error(f"❌ 無法連接到 Ollama: {e}") logger.error(f" 請確保 Ollama 正在運行: ollama serve") return False def _check_model_available(self) -> bool: """ 檢查模型是否已下載 Returns: 模型是否可用 """ try: response = requests.get(f"{self.base_url}/api/tags", timeout=5) if response.status_code == 200: models = response.json().get('models', []) model_names = [m.get('name', '') for m in models] return any(self.model_name in name for name in model_names) return False except Exception as e: logger.error(f"❌ 檢查模型時出錯: {e}") return False def generate( self, prompt: str, temperature: float = 0.7, max_tokens: Optional[int] = None, stream: bool = False ) -> str: """ 生成回答 Args: prompt: 輸入 prompt temperature: 溫度參數(0.0-1.0),控制隨機性 max_tokens: 最大生成 token 數(None 表示使用模型預設) stream: 是否使用流式輸出 Returns: 生成的回答 """ # 檢查連接 if not self._check_ollama_connection(): raise ConnectionError( f"無法連接到 Ollama 服務 ({self.base_url})\n" f"請確保 Ollama 正在運行:\n" f" 1. 安裝 Ollama: https://ollama.ai\n" f" 2. 啟動服務: ollama serve\n" f" 3. 下載模型: ollama pull {self.model_name}" ) # 檢查模型 if not self._check_model_available(): logger.warning( f"⚠️ 模型 '{self.model_name}' 可能未下載。" f"請運行: ollama pull {self.model_name}" ) # 準備請求參數 payload = { "model": self.model_name, "prompt": prompt, "stream": stream, "options": { "temperature": temperature, } } if max_tokens: payload["options"]["num_predict"] = max_tokens try: # 發送請求 response = requests.post( f"{self.api_url}/generate", json=payload, timeout=self.timeout, stream=stream ) if response.status_code != 200: error_msg = response.text raise RuntimeError(f"Ollama API 錯誤: {error_msg}") if stream: # 流式處理 full_response = "" for line in response.iter_lines(): if line: try: data = json.loads(line) if 'response' in data: chunk = data['response'] full_response += chunk print(chunk, end='', flush=True) if data.get('done', False): break except json.JSONDecodeError: continue print() # 換行 return full_response else: # 非流式處理 data = response.json() return data.get('response', '') except requests.exceptions.Timeout: raise TimeoutError( f"請求超時({self.timeout}秒)。" f"可以嘗試增加 timeout 或使用更小的模型。" ) except requests.exceptions.ConnectionError: raise ConnectionError( f"無法連接到 Ollama 服務。" f"請確保 Ollama 正在運行:ollama serve" ) except Exception as e: logger.error(f"❌ 生成回答時出錯: {e}") raise def list_available_models(self) -> List[str]: """ 列出本地可用的模型 Returns: 可用模型名稱列表 """ try: response = requests.get(f"{self.base_url}/api/tags", timeout=5) if response.status_code == 200: models = response.json().get('models', []) return [m.get('name', '') for m in models] return [] except Exception as e: logger.error(f"❌ 獲取模型列表時出錯: {e}") return [] @classmethod def print_recommended_models(cls): """打印推薦的模型列表""" print("\n" + "="*60) print("適合 16GB MacBook Air 的 Ollama 模型推薦") print("="*60) print() for model_key, info in cls.RECOMMENDED_MODELS.items(): print(f"📦 {info['name']}") print(f" 描述: {info['description']}") print(f" 內存需求: {info['memory_required']}") print(f" 質量: {info['quality']}") print(f" 下載命令: ollama pull {info['name']}") print()