Spaces:

Etadingrui
/

PIWM

Sleeping

App Files Files Community

musictimer commited on Sep 6

Commit

a29f249

1 Parent(s): b0d3efd

Fix bug 3

Browse files

Files changed (16) hide show

app.py +16 -14
config/agent/csgo.yaml +5 -5
config/world_model_env/fast.yaml +3 -3
requirements.txt +3 -3
src/__pycache__/agent.cpython-310.pyc +0 -0
src/__pycache__/utils.cpython-310.pyc +0 -0
src/coroutines/__pycache__/env_loop.cpython-310.pyc +0 -0
src/csgo/__pycache__/web_action_processing.cpython-310.pyc +0 -0
src/data/__pycache__/dataset.cpython-310.pyc +0 -0
src/envs/__pycache__/world_model_env.cpython-310.pyc +0 -0
src/game/__pycache__/__init__.cpython-310.pyc +0 -0
src/game/__pycache__/web_play_env.cpython-310.pyc +0 -0
src/models/__pycache__/actor_critic.cpython-310.pyc +0 -0
src/models/__pycache__/rew_end_model.cpython-310.pyc +0 -0
src/models/diffusion/__pycache__/denoiser.cpython-310.pyc +0 -0
src/utils.py +17 -5

app.py CHANGED Viewed

@@ -99,12 +99,20 @@ class WebGameEngine:
             def progress_hook(block_num, block_size, total_size):
                 if total_size > 0:
                     progress = min(100, (block_num * block_size * 100) / total_size)
-                    self.download_progress = int(progress)
-                    if progress % 10 == 0:  # Log every 10%
-                        logger.info(f"Download progress: {self.download_progress}%")
             urllib.request.urlretrieve(url, filepath, reporthook=progress_hook)
             self.download_progress = 100
         # Run download in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
@@ -142,21 +150,14 @@ class WebGameEngine:
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             logger.info(f"Using device: {device}")
-            # Load model checkpoint
-            checkpoint_path = web_config.get_checkpoint_path()
-            if not checkpoint_path.exists():
-                logger.warning(f"No checkpoint found at {checkpoint_path} - using dummy mode")
-                self._init_dummy_mode()
-                return True
             # Get spawn directory
             spawn_dir = web_config.get_spawn_dir()
-            # Initialize agent
-            num_actions = cfg.env.num_actions
-            agent = Agent(instantiate(cfg.agent, num_actions=num_actions)).to(device).eval()
-            # Try to load checkpoint (remote or local)
             try:
                 # First try to download from Hugging Face Hub using direct URL
                 try:
@@ -192,6 +193,7 @@ class WebGameEngine:
                     logger.warning(f"Failed to download from HF Hub: {hub_error}")
                     # Fallback to local checkpoint if available
                     if checkpoint_path.exists():
                         logger.info(f"Falling back to local checkpoint: {checkpoint_path}")
                         agent.load(checkpoint_path)

             def progress_hook(block_num, block_size, total_size):
                 if total_size > 0:
                     progress = min(100, (block_num * block_size * 100) / total_size)
+                    new_progress = int(progress)
+                    # Update progress more frequently for smooth progress bar
+                    if new_progress != self.download_progress:
+                        self.download_progress = new_progress
+                        self.loading_status = f"Downloading AI model ({self.download_progress}%)"
+                        # Log every 5% instead of 10% for better feedback
+                        if self.download_progress % 5 == 0:
+                            logger.info(f"Download progress: {self.download_progress}%")
             urllib.request.urlretrieve(url, filepath, reporthook=progress_hook)
             self.download_progress = 100
+            self.loading_status = "Download complete! Loading model..."
         # Run download in thread pool to avoid blocking
         loop = asyncio.get_event_loop()
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             logger.info(f"Using device: {device}")
+            # Initialize agent first
+            num_actions = cfg.env.num_actions
+            agent = Agent(instantiate(cfg.agent, num_actions=num_actions)).to(device).eval()
             # Get spawn directory
             spawn_dir = web_config.get_spawn_dir()
+            # Try to load checkpoint (remote first, then local, then dummy mode)
             try:
                 # First try to download from Hugging Face Hub using direct URL
                 try:
                     logger.warning(f"Failed to download from HF Hub: {hub_error}")
                     # Fallback to local checkpoint if available
+                    checkpoint_path = web_config.get_checkpoint_path()
                     if checkpoint_path.exists():
                         logger.info(f"Falling back to local checkpoint: {checkpoint_path}")
                         agent.load(checkpoint_path)

config/agent/csgo.yaml CHANGED Viewed

@@ -1,13 +1,13 @@
-_target_: agent.AgentConfig
 denoiser:
-  _target_: models.diffusion.DenoiserConfig
   sigma_data: 0.5
   sigma_offset_noise: 0.1
   noise_previous_obs: true
   upsampling_factor: null
   inner_model:
-    _target_: models.diffusion.InnerModelConfig
     img_channels: 3
     num_steps_conditioning: 4
     cond_channels: 2048
@@ -16,13 +16,13 @@ denoiser:
     attn_depths: [0, 0, 1, 1]
 upsampler:
-  _target_: models.diffusion.DenoiserConfig
   sigma_data: 0.5
   sigma_offset_noise: 0.1
   noise_previous_obs: false
   upsampling_factor: 5
   inner_model:
-    _target_: models.diffusion.InnerModelConfig
     img_channels: 3
     num_steps_conditioning: 1
     cond_channels: 2048

+_target_: src.agent.AgentConfig
 denoiser:
+  _target_: src.models.diffusion.DenoiserConfig
   sigma_data: 0.5
   sigma_offset_noise: 0.1
   noise_previous_obs: true
   upsampling_factor: null
   inner_model:
+    _target_: src.models.diffusion.InnerModelConfig
     img_channels: 3
     num_steps_conditioning: 4
     cond_channels: 2048
     attn_depths: [0, 0, 1, 1]
 upsampler:
+  _target_: src.models.diffusion.DenoiserConfig
   sigma_data: 0.5
   sigma_offset_noise: 0.1
   noise_previous_obs: false
   upsampling_factor: 5
   inner_model:
+    _target_: src.models.diffusion.InnerModelConfig
     img_channels: 3
     num_steps_conditioning: 1
     cond_channels: 2048

config/world_model_env/fast.yaml CHANGED Viewed

@@ -1,15 +1,15 @@
-_target_: envs.WorldModelEnvConfig
 horizon: 1000
 num_batches_to_preload: 1
 diffusion_sampler_next_obs:
-  _target_: models.diffusion.DiffusionSamplerConfig
   num_steps_denoising: 6  # Balanced: better quality than 3, faster than 10
   sigma_min: 0.002
   sigma_max: 5.0
   rho: 7
   order: 1
 diffusion_sampler_upsampling:
-  _target_: models.diffusion.DiffusionSamplerConfig
   num_steps_denoising: 4  # Balanced: better quality than 2, faster than 5
   sigma_min: 0.002
   sigma_max: 5.0

+_target_: src.envs.WorldModelEnvConfig
 horizon: 1000
 num_batches_to_preload: 1
 diffusion_sampler_next_obs:
+  _target_: src.models.diffusion.DiffusionSamplerConfig
   num_steps_denoising: 6  # Balanced: better quality than 3, faster than 10
   sigma_min: 0.002
   sigma_max: 5.0
   rho: 7
   order: 1
 diffusion_sampler_upsampling:
+  _target_: src.models.diffusion.DiffusionSamplerConfig
   num_steps_denoising: 4  # Balanced: better quality than 2, faster than 5
   sigma_min: 0.002
   sigma_max: 5.0

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@
 torch>=1.13.0
 torchvision>=0.14.0
 torchaudio>=0.13.0
-numpy>=1.21.0
 # Configuration management
 hydra-core>=1.2.0
@@ -28,8 +28,8 @@ h5py>=3.7.0
 ale_py>=0.8.0
 gymnasium>=0.28.0
-# Experiment tracking (required by utils.py)
-wandb>=0.13.0
 # Metrics (required by rew_end_model.py)
 torcheval>=0.0.6

 torch>=1.13.0
 torchvision>=0.14.0
 torchaudio>=0.13.0
+numpy>=1.21.0,<2.0.0
 # Configuration management
 hydra-core>=1.2.0
 ale_py>=0.8.0
 gymnasium>=0.28.0
+# Experiment tracking (optional, for training only)
+# wandb>=0.13.0  # Commented out due to NumPy 2.0 compatibility issues
 # Metrics (required by rew_end_model.py)
 torcheval>=0.0.6

src/__pycache__/agent.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/agent.cpython-310.pyc and b/src/__pycache__/agent.cpython-310.pyc differ

src/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/utils.cpython-310.pyc and b/src/__pycache__/utils.cpython-310.pyc differ

src/coroutines/__pycache__/env_loop.cpython-310.pyc CHANGED Viewed

Binary files a/src/coroutines/__pycache__/env_loop.cpython-310.pyc and b/src/coroutines/__pycache__/env_loop.cpython-310.pyc differ

src/csgo/__pycache__/web_action_processing.cpython-310.pyc CHANGED Viewed

Binary files a/src/csgo/__pycache__/web_action_processing.cpython-310.pyc and b/src/csgo/__pycache__/web_action_processing.cpython-310.pyc differ

src/data/__pycache__/dataset.cpython-310.pyc CHANGED Viewed

Binary files a/src/data/__pycache__/dataset.cpython-310.pyc and b/src/data/__pycache__/dataset.cpython-310.pyc differ

src/envs/__pycache__/world_model_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/envs/__pycache__/world_model_env.cpython-310.pyc and b/src/envs/__pycache__/world_model_env.cpython-310.pyc differ

src/game/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/__init__.cpython-310.pyc and b/src/game/__pycache__/__init__.cpython-310.pyc differ

src/game/__pycache__/web_play_env.cpython-310.pyc CHANGED Viewed

Binary files a/src/game/__pycache__/web_play_env.cpython-310.pyc and b/src/game/__pycache__/web_play_env.cpython-310.pyc differ

src/models/__pycache__/actor_critic.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/__pycache__/actor_critic.cpython-310.pyc and b/src/models/__pycache__/actor_critic.cpython-310.pyc differ

src/models/__pycache__/rew_end_model.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/__pycache__/rew_end_model.cpython-310.pyc and b/src/models/__pycache__/rew_end_model.cpython-310.pyc differ

src/models/diffusion/__pycache__/denoiser.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/diffusion/__pycache__/denoiser.cpython-310.pyc and b/src/models/diffusion/__pycache__/denoiser.cpython-310.pyc differ

src/utils.py CHANGED Viewed

@@ -17,7 +17,13 @@ from torch.optim.lr_scheduler import LambdaLR
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.optim import AdamW
-import wandb
 ATARI_100K_GAMES = [
@@ -275,8 +281,12 @@ def prompt_atari_game():
 def prompt_run_name(game):
     cfg_file = Path("config/trainer.yaml")
-    cfg_name = OmegaConf.load(cfg_file).wandb.name
-    suffix = f"-{cfg_name}" if cfg_name is not None else ""
     name = game + suffix
     name_ = input(f"Confirm run name by pressing Enter (or enter a new name): {name}\n")
     if name_ != "":
@@ -329,5 +339,7 @@ def try_until_no_except(func: Callable) -> None:
 def wandb_log(logs: Logs, epoch: int):
-    for d in logs:
-        wandb.log({"epoch": epoch, **d})

 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.optim import AdamW
+try:
+    import wandb
+    WANDB_AVAILABLE = True
+except ImportError:
+    # wandb not available, set to None for graceful fallback
+    wandb = None
+    WANDB_AVAILABLE = False
 ATARI_100K_GAMES = [
 def prompt_run_name(game):
     cfg_file = Path("config/trainer.yaml")
+    try:
+        cfg_name = OmegaConf.load(cfg_file).wandb.name
+        suffix = f"-{cfg_name}" if cfg_name is not None else ""
+    except:
+        # If wandb config not available, use empty suffix
+        suffix = ""
     name = game + suffix
     name_ = input(f"Confirm run name by pressing Enter (or enter a new name): {name}\n")
     if name_ != "":
 def wandb_log(logs: Logs, epoch: int):
+    if WANDB_AVAILABLE and wandb is not None:
+        for d in logs:
+            wandb.log({"epoch": epoch, **d})
+    # If wandb not available, silently skip logging