Spaces:

Etadingrui
/

PIWM

Sleeping

App Files Files Community

musictimer commited on Sep 6

Commit

7deb5ff

1 Parent(s): bbfa773

Fix bug 6

Browse files

Files changed (4) hide show

app.py +46 -65
requirements.txt +1 -0
src/__pycache__/agent.cpython-310.pyc +0 -0
src/agent.py +16 -5

app.py CHANGED Viewed

@@ -95,82 +95,63 @@ class WebGameEngine:
         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
-            state_dict = None
-            # Try torch.hub method first
             try:
-                logger.info("Trying to load model using torch.hub...")
-                self.loading_status = "Downloading model with torch.hub..."
                 self.download_progress = 10
                 model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
-                state_dict = torch.hub.load_state_dict_from_url(
-                    model_url,
-                    map_location=device,
-                    progress=False,
-                    check_hash=False
-                )
-                logger.info("Successfully loaded model using torch.hub")
-            except Exception as e:
-                logger.warning(f"Failed to load model with torch.hub: {e}")
-                # Try huggingface_hub method as fallback
-                try:
-                    logger.info("Trying to load model using huggingface_hub...")
-                    self.loading_status = "Downloading model with huggingface_hub..."
-                    self.download_progress = 10
-                    from huggingface_hub import hf_hub_download
-                    # Download the file
-                    model_path = hf_hub_download(
-                        repo_id="Etadingrui/diamond-1B",
-                        filename="agent_epoch_00003.pt",
-                        cache_dir=None  # Use default cache
-                    )
-                    self.download_progress = 40
-                    self.loading_status = "Loading downloaded model..."
-                    # Load the state dict
-                    state_dict = torch.load(model_path, map_location=device)
-                    logger.info("Successfully loaded model using huggingface_hub")
-                except Exception as e2:
-                    logger.error(f"Failed to load model with huggingface_hub: {e2}")
-                    raise Exception("All model loading methods failed")
-            if state_dict is None:
-                raise Exception("Failed to load model state dict")
-            # Load state dict into agent
             try:
                 logger.info("Model download completed, loading weights...")
                 self.download_progress = 60
-                self.loading_status = "Model downloaded, loading weights..."
-                # Load each component of the agent using extract_state_dict (same as agent.load method)
-                if any(k.startswith("denoiser") for k in state_dict.keys()):
-                    agent.denoiser.load_state_dict(extract_state_dict(state_dict, "denoiser"))
-                    logger.info("Loaded denoiser weights")
-                self.download_progress = 70
-                self.loading_status = "Loading upsampler..."
-                if any(k.startswith("upsampler") for k in state_dict.keys()) and agent.upsampler is not None:
-                    agent.upsampler.load_state_dict(extract_state_dict(state_dict, "upsampler"))
-                    logger.info("Loaded upsampler weights")
-                self.download_progress = 80
-                self.loading_status = "Loading reward model..."
-                if any(k.startswith("rew_end_model") for k in state_dict.keys()) and agent.rew_end_model is not None:
-                    agent.rew_end_model.load_state_dict(extract_state_dict(state_dict, "rew_end_model"))
-                    logger.info("Loaded reward model weights")
-                self.download_progress = 90
-                self.loading_status = "Loading actor critic..."
-                if any(k.startswith("actor_critic") for k in state_dict.keys()) and agent.actor_critic is not None:
-                    agent.actor_critic.load_state_dict(extract_state_dict(state_dict, "actor_critic"))
-                    logger.info("Loaded actor critic weights")
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"

         def load_model_weights():
             """Load model weights in thread pool to avoid blocking"""
             try:
+                # Direct download without any caching to avoid permission issues on HF Spaces
+                logger.info("Downloading model directly without caching...")
+                self.loading_status = "Downloading model without caching..."
                 self.download_progress = 10
                 model_url = "https://huggingface.co/Etadingrui/diamond-1B/resolve/main/agent_epoch_00003.pt"
+                # Use requests to download directly into memory
+                import requests
+                import io
+                logger.info(f"Starting direct download from {model_url}")
+                response = requests.get(model_url, stream=True)
+                response.raise_for_status()
+                # Get the total file size for progress tracking
+                total_size = int(response.headers.get('content-length', 0))
+                logger.info(f"Model file size: {total_size / (1024*1024):.1f} MB")
+                # Download with progress tracking
+                downloaded_data = io.BytesIO()
+                downloaded_size = 0
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        downloaded_data.write(chunk)
+                        downloaded_size += len(chunk)
+                        # Update progress
+                        if total_size > 0:
+                            progress = min(50, int((downloaded_size / total_size) * 40) + 10)  # 10-50%
+                            if progress != self.download_progress:
+                                self.download_progress = progress
+                                logger.info(f"Download progress: {progress}%")
+                self.download_progress = 50
+                self.loading_status = "Download complete, loading model..."
+                logger.info("Download completed, loading state dict...")
+                # Reset to beginning of buffer and load
+                downloaded_data.seek(0)
+                state_dict = torch.load(downloaded_data, map_location=device)
+                logger.info("Successfully loaded model using direct download")
+            except Exception as e:
+                logger.error(f"Failed to download model directly: {e}")
+                raise Exception(f"Direct download failed: {str(e)}")
+            # Load state dict into agent using the new load_state_dict method
             try:
                 logger.info("Model download completed, loading weights...")
                 self.download_progress = 60
+                self.loading_status = "Loading model weights into agent..."
+                # Use the agent's new load_state_dict method
+                agent.load_state_dict(state_dict)
                 self.download_progress = 100
                 self.loading_status = "Model loaded successfully!"

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ fastapi>=0.68.0
 uvicorn>=0.15.0
 websockets>=10.0
 python-multipart>=0.0.5
 # Image processing
 opencv-python-headless>=4.5.0

 uvicorn>=0.15.0
 websockets>=10.0
 python-multipart>=0.0.5
+requests>=2.25.0
 # Image processing
 opencv-python-headless>=4.5.0

src/__pycache__/agent.cpython-310.pyc CHANGED Viewed

Binary files a/src/__pycache__/agent.cpython-310.pyc and b/src/__pycache__/agent.cpython-310.pyc differ

src/agent.py CHANGED Viewed

@@ -64,11 +64,22 @@ class Agent(nn.Module):
         load_actor_critic: bool = True,
     ) -> None:
         sd = torch.load(Path(path_to_ckpt), map_location=self.device)
         if load_denoiser:
-            self.denoiser.load_state_dict(extract_state_dict(sd, "denoiser"))
-        if load_upsampler:
-            self.upsampler.load_state_dict(extract_state_dict(sd, "upsampler"))
         if load_rew_end_model and self.rew_end_model is not None:
-            self.rew_end_model.load_state_dict(extract_state_dict(sd, "rew_end_model"))
         if load_actor_critic and self.actor_critic is not None:
-            self.actor_critic.load_state_dict(extract_state_dict(sd, "actor_critic"))

         load_actor_critic: bool = True,
     ) -> None:
         sd = torch.load(Path(path_to_ckpt), map_location=self.device)
+        self.load_state_dict(sd, load_denoiser, load_upsampler, load_rew_end_model, load_actor_critic)
+    def load_state_dict(
+        self,
+        state_dict: dict,
+        load_denoiser: bool = True,
+        load_upsampler: bool = True,
+        load_rew_end_model: bool = True,
+        load_actor_critic: bool = True,
+    ) -> None:
+        """Load state dict directly without file I/O"""
         if load_denoiser:
+            self.denoiser.load_state_dict(extract_state_dict(state_dict, "denoiser"))
+        if load_upsampler and self.upsampler is not None:
+            self.upsampler.load_state_dict(extract_state_dict(state_dict, "upsampler"))
         if load_rew_end_model and self.rew_end_model is not None:
+            self.rew_end_model.load_state_dict(extract_state_dict(state_dict, "rew_end_model"))
         if load_actor_critic and self.actor_critic is not None:
+            self.actor_critic.load_state_dict(extract_state_dict(state_dict, "actor_critic"))