Spaces:

Etadingrui
/

PIWM

Sleeping

App Files Files Community

musictimer commited on Sep 8

Commit

b8159f9

1 Parent(s): f1594be

Fix bug 10

Browse files

Files changed (4) hide show

app.py +12 -1
src/game/__pycache__/web_play_env.cpython-310.pyc +0 -0
src/game/dataset_env.py +9 -9
src/game/web_play_env.py +32 -0

app.py CHANGED Viewed

@@ -727,6 +727,10 @@ async def get_homepage():
                 ws.onopen = function(event) {
                     statusEl.textContent = 'Connected';
                     statusEl.style.color = '#00ff00';
                 };
                 ws.onmessage = function(event) {
@@ -857,7 +861,14 @@ async def get_homepage():
             canvas.addEventListener('click', () => {
                 canvas.focus();
                 if (!gameStarted) {
-                    startGame();
                 }
             });

                 ws.onopen = function(event) {
                     statusEl.textContent = 'Connected';
                     statusEl.style.color = '#00ff00';
+                    // If user already clicked to start before WS was ready, send start now
+                    if (gameStarted) {
+                        ws.send(JSON.stringify({ type: 'start' }));
+                    }
                 };
                 ws.onmessage = function(event) {
             canvas.addEventListener('click', () => {
                 canvas.focus();
                 if (!gameStarted) {
+                    // Queue start locally and send immediately if WS is open
+                    gameStarted = true;
+                    gameStatusEl.textContent = 'Starting AI...';
+                    gameStatusEl.style.color = '#ffff00';
+                    loadingEl.style.display = 'block';
+                    if (ws && ws.readyState === WebSocket.OPEN) {
+                        ws.send(JSON.stringify({ type: 'start' }));
+                    }
                 }
             });

src/game/__pycache__/web_play_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/web_play_env.cpython-310.pyc and b/src/game/__pycache__/web_play_env.cpython-310.pyc differ

src/game/dataset_env.py CHANGED Viewed

@@ -77,15 +77,15 @@ class DatasetEnv:
     @torch.no_grad()
     def step(self, act: int) -> Tuple[Tensor, Tensor, bool, bool, Dict[str, Any]]:
-        match act:
-            case 1:
-                self.set_timestep(self.t - 1)
-            case 2:
-                self.set_timestep(self.t + 1)
-            case 3:
-                self.set_timestep(self.t - 10)
-            case 4:
-                self.set_timestep(self.t + 10)
         n_digits = len(str(self.ep_length))

     @torch.no_grad()
     def step(self, act: int) -> Tuple[Tensor, Tensor, bool, bool, Dict[str, Any]]:
+        # Replaced Python 3.10 `match` statement with if/elif chain for Python 3.8/3.9 compatibility
+        if act == 1:
+            self.set_timestep(self.t - 1)
+        elif act == 2:
+            self.set_timestep(self.t + 1)
+        elif act == 3:
+            self.set_timestep(self.t - 10)
+        elif act == 4:
+            self.set_timestep(self.t + 10)
         n_digits = len(str(self.ep_length))

src/game/web_play_env.py CHANGED Viewed

@@ -6,6 +6,8 @@ from typing import Any, Dict, List, Set, Tuple
 import torch
 from torch import Tensor
 from torch.distributions.categorical import Categorical
 from ..agent import Agent
 from ..envs import WorldModelEnv
@@ -71,6 +73,14 @@ class WebPlayEnv(PlayEnv):
             r_click=r_click
         )
         # If we have human input override or in human mode, use human input
         if self.human_input_override or self.is_human_player:
             # Encode the web action to tensor format
@@ -83,11 +93,33 @@ class WebPlayEnv(PlayEnv):
                 obs = self.obs
                 if obs.ndim == 3:  # CHW -> BCHW
                     obs = obs.unsqueeze(0)
                 # Detach hidden states to prevent gradient tracking
                 self.hx = self.hx.detach()
                 self.cx = self.cx.detach()
                 # Get action logits and value from actor-critic
                 logits_act, value, (self.hx, self.cx) = self.agent.actor_critic.predict_act_value(obs, (self.hx, self.cx))

 import torch
 from torch import Tensor
 from torch.distributions.categorical import Categorical
+import torch.nn as nn
+import torch.nn.functional as F
 from ..agent import Agent
 from ..envs import WorldModelEnv
             r_click=r_click
         )
+        # Ensure we have a valid observation; if not, reset the environment
+        if self.obs is None:
+            try:
+                self.obs, _ = self.reset()
+            except Exception:
+                # If reset fails, fall back to human input below
+                pass
         # If we have human input override or in human mode, use human input
         if self.human_input_override or self.is_human_player:
             # Encode the web action to tensor format
                 obs = self.obs
                 if obs.ndim == 3:  # CHW -> BCHW
                     obs = obs.unsqueeze(0)
+                # Ensure obs is on the same device as the models
+                if obs.device != self.agent.device:
+                    obs = obs.to(self.agent.device, non_blocking=True)
                 # Detach hidden states to prevent gradient tracking
                 self.hx = self.hx.detach()
                 self.cx = self.cx.detach()
+                # Resize observation to match actor-critic expected encoder/LSTM input
+                # Count how many MaxPool2d layers are in the encoder to infer downsampling factor
+                if hasattr(self.agent, "actor_critic") and self.agent.actor_critic is not None:
+                    try:
+                        n_pools = sum(
+                            1 for m in self.agent.actor_critic.encoder.encoder if isinstance(m, nn.MaxPool2d)
+                        )
+                        # We want the spatial size after the encoder to be 1x1 so that
+                        # flattening matches the LSTM input size configured at init time.
+                        # With n_pools halvings, input size must be 2**n_pools.
+                        target_hw = 2 ** n_pools if n_pools > 0 else min(int(obs.size(-2)), int(obs.size(-1)))
+                        if obs.size(-2) != target_hw or obs.size(-1) != target_hw:
+                            obs = F.interpolate(
+                                obs, size=(target_hw, target_hw), mode="bilinear", align_corners=False
+                            )
+                    except Exception:
+                        # If anything goes wrong in the shape logic, fall back without resizing
+                        pass
                 # Get action logits and value from actor-critic
                 logits_act, value, (self.hx, self.cx) = self.agent.actor_critic.predict_act_value(obs, (self.hx, self.cx))