Spaces:

Etadingrui
/

PIWM

Sleeping

App Files Files Community

musictimer commited on Sep 6

Commit

ded2bd6

1 Parent(s): 7deb5ff

Fix bug 7

Browse files

Files changed (11) hide show

app.py +42 -50
config/agent/csgo.yaml +7 -1
src/__init__.pyc +0 -0
src/__pycache__/agent.cpython-310.pyc +0 -0
src/game/__init__.pyc +0 -0
src/game/__pycache__/dataset_env.cpython-310.pyc +0 -0
src/game/__pycache__/game.cpython-310.pyc +0 -0
src/game/__pycache__/play_env.cpython-310.pyc +0 -0
src/game/__pycache__/web_play_env.cpython-310.pyc +0 -0
src/game/play_env.py +5 -5
src/game/web_play_env.py +87 -133

app.py CHANGED Viewed

@@ -25,6 +25,14 @@ from omegaconf import DictConfig, OmegaConf
 from PIL import Image
 # Import your modules
 from src.agent import Agent
 from src.csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names
 from src.envs import WorldModelEnv
@@ -96,62 +104,24 @@ class WebGameEngine:
         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
             try:
-                # Direct download without any caching to avoid permission issues on HF Spaces
-                logger.info("Downloading model directly without caching...")
-                self.loading_status = "Downloading model without caching..."
                 self.download_progress = 10
                 model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
-                # Use requests to download directly into memory
-                import requests
-                import io
-                logger.info(f"Starting direct download from {model_url}")
-                response = requests.get(model_url, stream=True)
-                response.raise_for_status()
-                # Get the total file size for progress tracking
-                total_size = int(response.headers.get('content-length', 0))
-                logger.info(f"Model file size: {total_size / (1024*1024):.1f} MB")
-                # Download with progress tracking
-                downloaded_data = io.BytesIO()
-                downloaded_size = 0
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        downloaded_data.write(chunk)
-                        downloaded_size += len(chunk)
-                        # Update progress
-                        if total_size > 0:
-                            progress = min(50, int((downloaded_size / total_size) * 40) + 10)  # 10-50%
-                            if progress != self.download_progress:
-                                self.download_progress = progress
-                                logger.info(f"Download progress: {progress}%")
-                self.download_progress = 50
-                self.loading_status = "Download complete, loading model..."
-                logger.info("Download completed, loading state dict...")
-                # Reset to beginning of buffer and load
-                downloaded_data.seek(0)
-                state_dict = torch.load(downloaded_data, map_location=device)
-                logger.info("Successfully loaded model using direct download")
-            except Exception as e:
-                logger.error(f"Failed to download model directly: {e}")
-                raise Exception(f"Direct download failed: {str(e)}")
-            # Load state dict into agent using the new load_state_dict method
-            try:
-                logger.info("Model download completed, loading weights...")
                 self.download_progress = 60
                 self.loading_status = "Loading model weights into agent..."
-                # Use the agent's new load_state_dict method
-                agent.load_state_dict(state_dict)
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"
@@ -159,7 +129,7 @@ class WebGameEngine:
                 return True
             except Exception as e:
-                logger.error(f"Failed to load state dict into agent: {e}")
                 import traceback
                 traceback.print_exc()
                 return False
@@ -210,6 +180,15 @@ class WebGameEngine:
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             logger.info(f"Using device: {device}")
             # Initialize agent first
             num_actions = cfg.env.num_actions
             agent = Agent(instantiate(cfg.agent, num_actions=num_actions)).to(device).eval()
@@ -228,7 +207,7 @@ class WebGameEngine:
                     logger.info("Successfully loaded checkpoint from HF Hub")
                 else:
                     # Fallback to local checkpoint if available
-                    logger.warning("Failed to load from HF Hub, trying local checkpoint...")
                     checkpoint_path = web_config.get_checkpoint_path()
                     if checkpoint_path.exists():
                         logger.info(f"Loading local checkpoint: {checkpoint_path}")
@@ -236,6 +215,7 @@ class WebGameEngine:
                         agent.load(checkpoint_path)
                         logger.info(f"Successfully loaded local checkpoint: {checkpoint_path}")
                     else:
                         raise FileNotFoundError("No model checkpoint available (local or remote)")
             except Exception as e:
@@ -255,6 +235,18 @@ class WebGameEngine:
                 # Create play environment
                 self.play_env = WebPlayEnv(agent, wm_env, False, False, False)
                 # Model compilation causes 10-30s delay on first inference, so make it optional
                 # You can enable it by setting ENABLE_TORCH_COMPILE=1 environment variable
                 import os

 from PIL import Image
 # Import your modules
+import sys
+from pathlib import Path
+# Add project root to path for src package imports
+project_root = Path(__file__).parent
+if str(project_root) not in sys.path:
+    sys.path.insert(0, str(project_root))
 from src.agent import Agent
 from src.csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names
 from src.envs import WorldModelEnv
         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
             try:
+                logger.info("Loading model using torch.hub.load_state_dict_from_url...")
+                self.loading_status = "Downloading model..."
                 self.download_progress = 10
                 model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
+                # Use torch.hub to download and load state dict
+                logger.info(f"Loading state dict from {model_url}")
+                state_dict = torch.hub.load_state_dict_from_url(model_url, map_location=device)
                 self.download_progress = 60
                 self.loading_status = "Loading model weights into agent..."
+                logger.info("State dict loaded, applying to agent...")
+                # Load state dict into agent, but skip actor_critic if not present
+                has_actor_critic = any(k.startswith('actor_critic.') for k in state_dict.keys())
+                logger.info(f"Model has actor_critic weights: {has_actor_critic}")
+                agent.load_state_dict(state_dict, load_actor_critic=has_actor_critic)
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"
                 return True
             except Exception as e:
+                logger.error(f"Failed to load model: {e}")
                 import traceback
                 traceback.print_exc()
                 return False
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             logger.info(f"Using device: {device}")
+            # Log GPU availability and CUDA info for debugging HF Spaces
+            if torch.cuda.is_available():
+                logger.info(f"CUDA available: {torch.cuda.is_available()}")
+                logger.info(f"GPU device count: {torch.cuda.device_count()}")
+                logger.info(f"Current GPU: {torch.cuda.get_device_name(0)}")
+                logger.info(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+            else:
+                logger.info("CUDA not available, using CPU - this is normal for HF Spaces free tier")
             # Initialize agent first
             num_actions = cfg.env.num_actions
             agent = Agent(instantiate(cfg.agent, num_actions=num_actions)).to(device).eval()
                     logger.info("Successfully loaded checkpoint from HF Hub")
                 else:
                     # Fallback to local checkpoint if available
+                    logger.error("Failed to load from HF Hub! Check the detailed error above.")
                     checkpoint_path = web_config.get_checkpoint_path()
                     if checkpoint_path.exists():
                         logger.info(f"Loading local checkpoint: {checkpoint_path}")
                         agent.load(checkpoint_path)
                         logger.info(f"Successfully loaded local checkpoint: {checkpoint_path}")
                     else:
+                        logger.error(f"No local checkpoint found at: {checkpoint_path}")
                         raise FileNotFoundError("No model checkpoint available (local or remote)")
             except Exception as e:
                 # Create play environment
                 self.play_env = WebPlayEnv(agent, wm_env, False, False, False)
+                # Verify actor-critic is loaded and ready for inference
+                if agent.actor_critic is not None:
+                    logger.info(f"Actor-critic model loaded with {agent.actor_critic.lstm_dim} LSTM dimensions")
+                    logger.info(f"Actor-critic device: {agent.actor_critic.device}")
+                    # Force AI control for web demo
+                    self.play_env.is_human_player = False
+                    logger.info("WebPlayEnv set to AI control mode")
+                else:
+                    logger.warning("No actor-critic model found - AI inference will not work!")
+                    self.play_env.is_human_player = True
+                    logger.info("WebPlayEnv set to human control mode (fallback)")
                 # Model compilation causes 10-30s delay on first inference, so make it optional
                 # You can enable it by setting ENABLE_TORCH_COMPILE=1 environment variable
                 import os

config/agent/csgo.yaml CHANGED Viewed

@@ -31,4 +31,10 @@ upsampler:
     attn_depths: [0, 0, 0, 1]
 rew_end_model: null
-actor_critic: null

     attn_depths: [0, 0, 0, 1]
 rew_end_model: null
+actor_critic:
+  _target_: src.models.actor_critic.ActorCriticConfig
+  lstm_dim: 512
+  img_channels: 3
+  img_size: 64
+  channels: [32, 64, 128]
+  down: [2, 2, 2]

src/__init__.pyc ADDED Viewed

Binary file (102 Bytes). View file

src/__pycache__/agent.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/agent.cpython-310.pyc and b/src/__pycache__/agent.cpython-310.pyc differ

src/game/__init__.pyc ADDED Viewed

Binary file (366 Bytes). View file

src/game/__pycache__/dataset_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/dataset_env.cpython-310.pyc and b/src/game/__pycache__/dataset_env.cpython-310.pyc differ

src/game/__pycache__/game.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/game.cpython-310.pyc and b/src/game/__pycache__/game.cpython-310.pyc differ

src/game/__pycache__/play_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/play_env.cpython-310.pyc and b/src/game/__pycache__/play_env.cpython-310.pyc differ

src/game/__pycache__/web_play_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/web_play_env.cpython-310.pyc and b/src/game/__pycache__/web_play_env.cpython-310.pyc differ

src/game/play_env.py CHANGED Viewed

@@ -7,11 +7,11 @@ import pygame
 import torch
 from torch import Tensor
-from agent import Agent
-from csgo.action_processing import CSGOAction, decode_csgo_action, encode_csgo_action, print_csgo_action
-from csgo.keymap import CSGO_KEYMAP
-from data import Dataset, Episode
-from envs import WorldModelEnv
 NamedEnv = namedtuple("NamedEnv", "name env")

 import torch
 from torch import Tensor
+from ..agent import Agent
+from ..csgo.action_processing import CSGOAction, decode_csgo_action, encode_csgo_action, print_csgo_action
+from ..csgo.keymap import CSGO_KEYMAP
+from ..data import Dataset, Episode
+from ..envs import WorldModelEnv
 NamedEnv = namedtuple("NamedEnv", "name env")

src/game/web_play_env.py CHANGED Viewed

@@ -1,82 +1,68 @@
 """
-Web-compatible PlayEnv that works without pygame
 """
-from collections import defaultdict, namedtuple
-from pathlib import Path
-from typing import Any, Dict, List, Tuple, Set
 import torch
 from torch import Tensor
 from ..agent import Agent
-from ..csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names, encode_web_csgo_action
-from ..data import Dataset, Episode
 from ..envs import WorldModelEnv
-OneStepData = namedtuple("OneStepData", "obs act rew end trunc")
-class WebPlayEnv:
-    """Web-compatible version of PlayEnv without pygame dependencies"""
     def __init__(
         self,
         agent: Agent,
         wm_env: WorldModelEnv,
-        recording_mode: bool = False,
-        store_denoising_trajectory: bool = False,
-        store_original_obs: bool = False,
     ) -> None:
-        self.agent = agent
-        self.recording_mode = recording_mode
-        self.store_denoising_trajectory = store_denoising_trajectory
-        self.store_original_obs = store_original_obs
-        self.is_human_player = True
-        self.env_id = 0
-        self.env_name = "world model"
-        self.env = wm_env
-        self.obs, self.t, self.buffer, self.rec_dataset = (None,) * 4
-    def print_controls(self) -> None:
-        """Print available controls for web interface"""
-        print("\nWeb Environment Controls:\n")
-        controls = {
-            "W": "Move Forward",
-            "A": "Move Left",
-            "S": "Move Back",
-            "D": "Move Right",
-            "Space": "Jump",
-            "Ctrl": "Crouch",
-            "Shift": "Walk",
-            "1": "Weapon 1",
-            "2": "Weapon 2",
-            "3": "Weapon 3",
-            "R": "Reload",
-            "Arrow Keys": "Camera Movement",
-            "Left Click": "Primary Fire",
-            "Right Click": "Secondary Fire"
-        }
-        for key, action in controls.items():
-            print(f"{key}: {action}")
-    def step_from_web_input(self, pressed_keys: Set[str], mouse_x: float = 0, mouse_y: float = 0,
-                           l_click: bool = False, r_click: bool = False) -> Tuple[Tensor, Tensor, bool, bool, Dict]:
-        """
-        Step the environment using web input
-        Args:
-            pressed_keys: Set of currently pressed key codes (e.g., {'KeyW', 'KeyA'})
-            mouse_x, mouse_y: Mouse movement deltas
-            l_click, r_click: Mouse button states
-        Returns:
-            Tuple of (observation, reward, done, truncated, info)
         """
         # Convert web keys to action names
         action_names = web_keys_to_csgo_action_names(pressed_keys)
-        # Create WebCSGOAction
         web_action = WebCSGOAction(
             key_names=action_names,
             mouse_x=mouse_x,
@@ -85,84 +71,52 @@ class WebPlayEnv:
             r_click=r_click
         )
-        # Convert to tensor format for the model
-        action_tensor = encode_web_csgo_action(web_action, self.agent.device)
-        # Step the environment with the action tensor
-        return self.step_with_tensor(action_tensor)
-    def step_with_tensor(self, action_tensor: Tensor) -> Tuple[Tensor, Tensor, bool, bool, Dict]:
-        """Step environment with pre-encoded action tensor"""
-        if self.is_human_player:
-            # Use human action
-            act = action_tensor.unsqueeze(0)  # Add batch dimension
         else:
-            # Use AI agent action
-            with torch.no_grad():
-                act_logits, _ = self.agent.actor_critic.predict_act_value(self.obs.unsqueeze(0), self.hx_cx)
-                act = torch.distributions.Categorical(logits=act_logits).sample()
-        # Step environment
-        next_obs, rew, end, trunc, info = self.env.step(act)
-        # Handle episode completion
-        if end or trunc:
-            if self.recording_mode and self.rec_dataset is not None:
-                self.rec_dataset.save_episode()
-                print(f"Episode saved! Length: {len(self.buffer)}")
-        return next_obs[0], rew[0], end[0], trunc[0], info
-    def reset(self) -> Tuple[Tensor, Dict]:
-        """Reset the environment"""
-        self.obs = self.env.reset()[0]  # Get first observation from batch
-        self.t = 0
-        self.buffer = []
-        # Initialize actor-critic hidden state if using AI player
-        if hasattr(self.agent, 'actor_critic') and self.agent.actor_critic is not None:
-            self.hx_cx = (
-                torch.zeros(1, self.agent.actor_critic.lstm_dim, device=self.agent.device),
-                torch.zeros(1, self.agent.actor_critic.lstm_dim, device=self.agent.device)
-            )
-        else:
-            self.hx_cx = None
-        info = {"step": 0, "episode_return": 0}
-        return self.obs, info
-    def switch_controller(self) -> bool:
-        """Switch between human and AI control"""
-        self.is_human_player = not self.is_human_player
-        controller = "Human" if self.is_human_player else "AI"
-        print(f"Switched to {controller} control")
-        return True
-    def next_mode(self) -> bool:
-        """Switch control mode"""
-        return self.switch_controller()
-    def next_axis_1(self) -> bool:
-        """Placeholder for axis control"""
-        return False
-    def prev_axis_1(self) -> bool:
-        """Placeholder for axis control"""
-        return False
-    def next_axis_2(self) -> bool:
-        """Placeholder for axis control"""
-        return False
-    def prev_axis_2(self) -> bool:
-        """Placeholder for axis control"""
-        return False
-    def print_env(self) -> None:
-        """Print current environment info"""
-        print(f"> Environment: {self.env_name}")
-        print(f"> Controller: {'Human' if self.is_human_player else 'AI'}")
-    def str_control(self) -> str:
-        """Get control mode string"""
-        return "Human" if self.is_human_player else "AI"

 """
+Web-compatible PlayEnv that handles web input and AI inference
 """
+from typing import Any, Dict, List, Set, Tuple
 import torch
 from torch import Tensor
+from torch.distributions.categorical import Categorical
 from ..agent import Agent
 from ..envs import WorldModelEnv
+from ..csgo.web_action_processing import WebCSGOAction, web_keys_to_csgo_action_names, encode_web_csgo_action
+from .play_env import PlayEnv
+class WebPlayEnv(PlayEnv):
+    """Web-compatible version of PlayEnv that handles web input and AI inference"""
     def __init__(
         self,
         agent: Agent,
         wm_env: WorldModelEnv,
+        recording_mode: bool,
+        store_denoising_trajectory: bool,
+        store_original_obs: bool,
     ) -> None:
+        super().__init__(agent, wm_env, recording_mode, store_denoising_trajectory, store_original_obs)
+        # For web demo, we want AI control by default
+        self.is_human_player = False  # AI controls the actions
+        self.human_input_override = False  # Can be set to True to allow human input
+        # Initialize LSTM hidden states for actor-critic
+        self.hx = torch.zeros(1, agent.actor_critic.lstm_dim, device=agent.device)
+        self.cx = torch.zeros(1, agent.actor_critic.lstm_dim, device=agent.device)
+    def switch_controller(self) -> None:
+        """Switch between AI and human control"""
+        self.is_human_player = not self.is_human_player
+        print(f"Switched to {'human' if self.is_human_player else 'AI'} control")
+    def str_control(self) -> str:
+        """Return control mode string"""
+        if self.human_input_override:
+            return "Human Override"
+        return "Human" if self.is_human_player else "AI"
+    @torch.no_grad()
+    def step_from_web_input(
+        self,
+        pressed_keys: Set[str],
+        mouse_x: float,
+        mouse_y: float,
+        l_click: bool,
+        r_click: bool,
+    ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Dict[str, Any]]:
+        """
+        Step the environment with web input.
+        If AI mode is enabled, use AI inference. If human mode or override, use human input.
         """
         # Convert web keys to action names
         action_names = web_keys_to_csgo_action_names(pressed_keys)
+        # Create web CSGO action from input
         web_action = WebCSGOAction(
             key_names=action_names,
             mouse_x=mouse_x,
             r_click=r_click
         )
+        # If we have human input override or in human mode, use human input
+        if self.human_input_override or self.is_human_player:
+            # Encode the web action to tensor format
+            action = encode_web_csgo_action(web_action, device=self.agent.device)
         else:
+            # AI mode - use the agent's actor-critic to predict the action
+            try:
+                # Get current observation (ensure it has batch dimension)
+                obs = self.obs
+                if obs.ndim == 3:  # CHW -> BCHW
+                    obs = obs.unsqueeze(0)
+                # Detach hidden states to prevent gradient tracking
+                self.hx = self.hx.detach()
+                self.cx = self.cx.detach()
+                # Get action logits and value from actor-critic
+                logits_act, value, (self.hx, self.cx) = self.agent.actor_critic.predict_act_value(obs, (self.hx, self.cx))
+                # Sample action from logits
+                action_dist = Categorical(logits=logits_act)
+                action = action_dist.sample()
+                # Convert to proper shape (remove batch dimension if needed)
+                if action.ndim > 0 and action.size(0) == 1:
+                    action = action.squeeze(0)
+            except Exception as e:
+                print(f"AI inference failed: {e}")
+                import traceback
+                traceback.print_exc()
+                # Fallback to human input if AI fails
+                action = encode_web_csgo_action(web_action, device=self.agent.device)
+        # Step the environment with the chosen action
+        next_obs, rew, end, trunc, env_info = self.env.step(action)
+        # Update internal state
+        self.obs = next_obs
+        self.t += 1
+        # Reset hidden states on episode end
+        if end.any() or trunc.any():
+            self.hx.zero_()
+            self.cx.zero_()
+        # Return the step results
+        return next_obs, rew, end, trunc, env_info