""" DrivAerML Dataset with Memory-Efficient Presampling Support and Cloud Loading This dataset implements presampling functionality for validation and test data while maintaining random sampling for training data. The presampling feature ensures consistent validation and test results across different runs, with memory-efficient on-demand loading. The dataset can load data directly from Hugging Face cloud or from local .npy files. Presampling Workflow: 1. Set presampled=False in config to create presampled validation and test data 2. The system creates fixed samples for validation and test splits 3. Each run's presampled data is saved as individual files in a directory structure 4. Set presampled=True in config to use the saved presampled data for future runs 5. Training data always uses random sampling regardless of presampled setting Directory Structure: presampled_data_path/ ├── validation/ │ ├── run_1.npy │ ├── run_2.npy │ └── ... └── test/ ├── run_1.npy ├── run_2.npy └── ... Configuration Parameters: - presampled: Boolean flag to control whether to use presampled data - presampled_data_path: Base path where presampled data directory is created - use_cloud: Boolean flag to control whether to load from Hugging Face cloud - hf_dataset_name: Hugging Face dataset name (default: "neashton/drivaerml") Usage: - First run: Set presampled=False to create presampled data - Subsequent runs: Set presampled=True to use existing presampled data - Set use_cloud=True to load from Hugging Face cloud instead of local files """ import os import numpy as np import torch from torch.utils.data import Dataset, DataLoader from torch.utils.data import default_collate import json import re import requests import tempfile from pathlib import Path # Optional imports for cloud loading try: import vtk from vtk.util import numpy_support import pyvista as pv CLOUD_LOADING_AVAILABLE = True except ImportError as e: print(f"Warning: Cloud loading dependencies not available: {e}") print("Install with: pip install vtk pyvista") CLOUD_LOADING_AVAILABLE = False # Problematic runs as mentioned in the DrivAerML dataset documentation PROBLEMATIC_RUNS = {167, 211, 218, 221, 248, 282, 291, 295, 316, 325, 329, 364, 370, 376, 403, 473} def download_file_from_hf(url, temp_dir): """Download a file from Hugging Face and return the local path""" try: response = requests.get(url, stream=True) response.raise_for_status() # Extract filename from URL filename = url.split('/')[-1] local_path = os.path.join(temp_dir, filename) with open(local_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return local_path except Exception as e: print(f"Error downloading {url}: {e}") return None def download_vtu_file_from_hf(run_number, hf_dataset_name, temp_dir): """Download VTU file parts and concatenate them""" try: # Download part 1 url_part1 = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/volume_{run_number}.vtu.00.part" part1_path = download_file_from_hf(url_part1, temp_dir) if part1_path is None: print(f"Failed to download part 1 for run {run_number}") return None # Download part 2 url_part2 = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/volume_{run_number}.vtu.01.part" part2_path = download_file_from_hf(url_part2, temp_dir) if part2_path is None: print(f"Failed to download part 2 for run {run_number}") return None # Concatenate the parts final_path = os.path.join(temp_dir, f"volume_{run_number}.vtu") with open(final_path, 'wb') as outfile: # Write part 1 with open(part1_path, 'rb') as infile: outfile.write(infile.read()) # Write part 2 with open(part2_path, 'rb') as infile: outfile.write(infile.read()) print(f"Successfully downloaded and concatenated VTU file for run {run_number}") return final_path except Exception as e: print(f"Error downloading VTU file for run {run_number}: {e}") return None def process_vtp_file(vtp_path, surface_variables=["pMeanTrim", "wallShearStressMeanTrim"]): """Process a VTP file and extract surface mesh centers and fields""" if not CLOUD_LOADING_AVAILABLE: raise ImportError("Cloud loading dependencies (vtk, pyvista) are not available. Install with: pip install vtk pyvista") try: reader = vtk.vtkXMLPolyDataReader() reader.SetFileName(vtp_path) reader.Update() polydata = reader.GetOutput() # Convert node data to cell data c2p = vtk.vtkPointDataToCellData() c2p.SetInputData(polydata) c2p.Update() celldata_all = c2p.GetOutput() celldata = celldata_all.GetCellData() # Extract fields fields = [] for array_name in surface_variables: try: array = celldata.GetArray(array_name) if array is not None: array_data = numpy_support.vtk_to_numpy(array).reshape( array.GetNumberOfTuples(), array.GetNumberOfComponents() ) fields.append(array_data) else: # If field doesn't exist, create zeros num_cells = celldata_all.GetNumberOfCells() array_data = np.zeros((num_cells, 1)) fields.append(array_data) except Exception as e: print(f"Warning: Could not extract field {array_name}: {e}") # Create zeros for missing field num_cells = celldata_all.GetNumberOfCells() array_data = np.zeros((num_cells, 1)) fields.append(array_data) surface_fields = np.concatenate(fields, axis=-1) # Get cell centers mesh = pv.PolyData(polydata) surface_coordinates = np.array(mesh.cell_centers().points) return { "surface_mesh_centers": np.float32(surface_coordinates), "surface_fields": np.float32(surface_fields), } except Exception as e: print(f"Error processing VTP file {vtp_path}: {e}") return None def process_vtu_file(vtu_path, volume_variables=["UMean", "pMean"]): """Process a VTU file and extract volume data""" if not CLOUD_LOADING_AVAILABLE: raise ImportError("Cloud loading dependencies (vtk, pyvista) are not available. Install with: pip install vtk pyvista") try: reader = vtk.vtkXMLUnstructuredGridReader() reader.SetFileName(vtu_path) reader.Update() # Get the unstructured grid data polydata = reader.GetOutput() # Get vertices points = polydata.GetPoints() vertices = numpy_support.vtk_to_numpy(points.GetData()) # Get point data point_data = polydata.GetPointData() # Extract fields fields = [] for array_name in volume_variables: try: array = point_data.GetArray(array_name) if array is not None: array_data = numpy_support.vtk_to_numpy(array).reshape( array.GetNumberOfTuples(), array.GetNumberOfComponents() ) fields.append(array_data) else: # If field doesn't exist, create zeros num_points = polydata.GetNumberOfPoints() array_data = np.zeros((num_points, 3)) # Assume 3D vector field fields.append(array_data) except Exception as e: print(f"Warning: Could not extract field {array_name}: {e}") # Create zeros for missing field num_points = polydata.GetNumberOfPoints() array_data = np.zeros((num_points, 3)) # Assume 3D vector field fields.append(array_data) volume_fields = np.concatenate(fields, axis=-1) return { "volume_mesh_centers": np.float32(vertices), "volume_fields": np.float32(volume_fields), } except Exception as e: print(f"Error processing VTU file {vtu_path}: {e}") return None def create_presampled_data(cfg, splits, save_path): """ Create presampled validation and test data with fixed random sampling. Saves individual files for each run to enable on-demand loading. Args: cfg: Configuration object splits: Dictionary containing train/validation/test splits save_path: Base path for saving presampled data (directory will be created) """ print("Creating presampled validation and test data...") # Check cloud loading availability if needed use_cloud = getattr(cfg, 'use_cloud', False) if use_cloud and not CLOUD_LOADING_AVAILABLE: raise ImportError( "Cloud loading is enabled but dependencies (vtk, pyvista) are not available. " "Install with: pip install vtk pyvista, or set use_cloud=False to use local files." ) # Create directory structure for presampled data base_dir = os.path.splitext(save_path)[0] # Remove .npy extension if present os.makedirs(base_dir, exist_ok=True) # Set seed for reproducible sampling np.random.seed(0) # Create temporary directory for downloading files with tempfile.TemporaryDirectory() as temp_dir: for split_type in ['validation', 'test']: print(f"Processing {split_type} split...") split_runs = splits[split_type] # Create subdirectory for this split split_dir = os.path.join(base_dir, split_type) os.makedirs(split_dir, exist_ok=True) for run_number in split_runs: if run_number in PROBLEMATIC_RUNS: print(f"Skipping problematic run {run_number}") continue print(f"Processing run {run_number} for {split_type}...") if use_cloud: # Download from Hugging Face cloud hf_dataset_name = getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml') vtp_url = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/boundary_{run_number}.vtp" vtp_path = download_file_from_hf(vtp_url, temp_dir) if vtp_path is None: print(f"Failed to download VTP file for run {run_number}") continue # Process VTP file data = process_vtp_file(vtp_path) if data is None: print(f"Failed to process VTP file for run {run_number}") continue coordinates = data['surface_mesh_centers'] field = data['surface_fields'] else: # Load from local .npy file found_file = False for f in os.listdir(cfg.data_dir): if f.endswith('.npy'): match = re.search(r'run_(\d+)', f) if match and int(match.group(1)) == run_number: npy_file_path = os.path.join(cfg.data_dir, f) data = np.load(npy_file_path, allow_pickle=True).item() coordinates = data['surface_mesh_centers'] field = data['surface_fields'] found_file = True break if not found_file: print(f"No .npy file found for run {run_number}") continue # Sample points with fixed seed for reproducibility sample_indices = np.random.choice(coordinates.shape[0], cfg.num_points, replace=False) sampled_coordinates = coordinates[sample_indices, :] sampled_field = field[sample_indices, :] # Save individual presampled file for this run presampled_run_data = { 'surface_mesh_centers': sampled_coordinates, 'surface_fields': sampled_field } run_file_path = os.path.join(split_dir, f'run_{run_number}.npy') np.save(run_file_path, presampled_run_data) print(f"Presampled data saved to directory: {base_dir}") print(f"Structure: {base_dir}/{{validation,test}}/run_{{number}}.npy") return base_dir class DrivAerMLDataset(Dataset): def __init__(self, cfg, splits = None, split_type = 'train', presampled = False, save_presampled_data_path = None): """ Initializes the DrivAerMLDataset instance. Args: cfg: Configuration object containing data directory and number of points splits: List of run numbers to include, if None includes all files split_type: Type of split ('train', 'validation', 'test') presampled: Whether to use presampled data for validation/test save_presampled_data_path: Base path to the presampled data directory """ self.data_dir = cfg.data_dir self.splits = splits self.use_cloud = getattr(cfg, 'use_cloud', False) self.hf_dataset_name = getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml') # Volume data configuration self.include_volume = getattr(cfg, 'include_volume', False) self.volume_variables = getattr(cfg, 'volume_variables', ["UMean", "pMean"]) self.volume_num_points = getattr(cfg, 'volume_num_points', 50000) # Default for volume sampling # Check cloud loading availability if self.use_cloud and not CLOUD_LOADING_AVAILABLE: raise ImportError( "Cloud loading is enabled but dependencies (vtk, pyvista) are not available. " "Install with: pip install vtk pyvista, or set use_cloud=False to use local files." ) # Store only run numbers and create filename mapping for efficiency self.run_numbers = [] self.original_filenames = {} # run_number -> original filename if self.use_cloud: # For cloud loading, we don't need to scan local directory # Just filter out problematic runs for run_number in splits: if run_number not in PROBLEMATIC_RUNS: self.run_numbers.append(run_number) else: # Local file loading - scan directory for .npy files for f in os.listdir(cfg.data_dir): if f.endswith('.npy'): match = re.search(r'run_(\d+)', f) if match: run_number = int(match.group(1)) if run_number in splits and run_number not in PROBLEMATIC_RUNS: self.run_numbers.append(run_number) self.original_filenames[run_number] = f if len(self.run_numbers) == 0: raise ValueError(f"No valid runs found for splits: {splits}") self.num_points = cfg.num_points self.split_type = split_type self.presampled = presampled # Set up presampled data directory path (but don't load data yet) if self.split_type != 'train' and self.presampled and save_presampled_data_path: self.presampled_base_dir = os.path.splitext(save_presampled_data_path)[0] self.presampled_split_dir = os.path.join(self.presampled_base_dir, self.split_type) if not os.path.exists(self.presampled_split_dir): raise FileNotFoundError(f"Presampled data directory not found: {self.presampled_split_dir}") def __len__(self): return len(self.run_numbers) def __getitem__(self, idx): run_number = self.run_numbers[idx] # For cloud loading, always use random sampling (no presampling) # For local loading, use presampling for validation/test if enabled if self.use_cloud or (self.split_type == 'train') or not self.presampled: # Load original data (either from cloud or local) with random sampling if self.use_cloud: # Load from Hugging Face cloud with tempfile.TemporaryDirectory() as temp_dir: # Download and process surface data vtp_url = f"https://huggingface.co/datasets/{self.hf_dataset_name}/resolve/main/run_{run_number}/boundary_{run_number}.vtp" vtp_path = download_file_from_hf(vtp_url, temp_dir) if vtp_path is None: raise RuntimeError(f"Failed to download VTP file for run {run_number}") surface_data = process_vtp_file(vtp_path) if surface_data is None: raise RuntimeError(f"Failed to process VTP file for run {run_number}") coordinates = surface_data['surface_mesh_centers'] field = surface_data['surface_fields'][:,0:1] # Load volume data if requested volume_coordinates = None volume_field = None if self.include_volume: vtu_path = download_vtu_file_from_hf(run_number, self.hf_dataset_name, temp_dir) if vtu_path is not None: volume_data = process_vtu_file(vtu_path, self.volume_variables) if volume_data is not None: volume_coordinates = volume_data['volume_mesh_centers'] volume_field = volume_data['volume_fields'] print(f"Successfully loaded volume data for run {run_number}") else: print(f"Failed to process VTU file for run {run_number}") else: print(f"Failed to download VTU file for run {run_number}") else: # Load from local .npy file original_filename = self.original_filenames[run_number] original_file_path = os.path.join(self.data_dir, original_filename) data = np.load(original_file_path, allow_pickle=True).item() coordinates = data['surface_mesh_centers'] field = data['surface_fields'][:,0:1] # Load volume data if available and requested volume_coordinates = None volume_field = None if self.include_volume and 'volume_mesh_centers' in data and 'volume_fields' in data: volume_coordinates = data['volume_mesh_centers'] volume_field = data['volume_fields'] # Random sampling for surface data sample_indices = np.random.choice(coordinates.shape[0], self.num_points, replace=False) coordinates = coordinates[sample_indices,:] field = field[sample_indices,0:1] # Random sampling for volume data if available if volume_coordinates is not None and volume_field is not None: volume_sample_indices = np.random.choice(volume_coordinates.shape[0], self.volume_num_points, replace=False) volume_coordinates = volume_coordinates[volume_sample_indices,:] volume_field = volume_field[volume_sample_indices,:] else: # Load presampled data on-demand for validation/test (local loading only) presampled_file_path = os.path.join(self.presampled_split_dir, f'run_{run_number}.npy') if os.path.exists(presampled_file_path): data_dict = np.load(presampled_file_path, allow_pickle=True).item() coordinates = data_dict['surface_mesh_centers'] field = data_dict['surface_fields'][:,0:1] # Load volume data if available and requested volume_coordinates = None volume_field = None if self.include_volume and 'volume_mesh_centers' in data_dict and 'volume_fields' in data_dict: volume_coordinates = data_dict['volume_mesh_centers'] volume_field = data_dict['volume_fields'] else: raise FileNotFoundError(f"Presampled file not found: {presampled_file_path}") coordinates_tensor = torch.tensor(coordinates, dtype=torch.float32) field_tensor = torch.tensor(field, dtype=torch.float32) coordinates_tensor = (coordinates_tensor - input_pos_mins) / (input_pos_maxs - input_pos_mins) field_tensor = (field_tensor - PRESSURE_MEAN) / PRESSURE_STD data = {'input_pos': coordinates_tensor, 'output_feat': field_tensor, 'output_pos': coordinates_tensor} # Add volume data if available if volume_coordinates is not None and volume_field is not None: volume_coordinates_tensor = torch.tensor(volume_coordinates, dtype=torch.float32) volume_field_tensor = torch.tensor(volume_field, dtype=torch.float32) # Normalize volume coordinates using same normalization as surface volume_coordinates_tensor = (volume_coordinates_tensor - input_pos_mins) / (input_pos_maxs - input_pos_mins) data['volume_input_pos'] = volume_coordinates_tensor data['volume_output_feat'] = volume_field_tensor data['volume_output_pos'] = volume_coordinates_tensor return data def calculate_normalization_constants(dataloader): """ Calculate normalization constants for both pressure values and coordinate ranges across the entire training dataset. Args: dataloader: Training DataLoader Returns: tuple: (pressure_mean, pressure_std, coord_ranges) where coord_ranges = {'min_x', 'max_x', 'min_y', 'max_y', 'min_z', 'max_z'} """ all_pressures = [] # Initialize coordinate extremes max_x = float('-inf') max_y = float('-inf') max_z = float('-inf') min_x = float('inf') min_y = float('inf') min_z = float('inf') print("Calculating normalization constants...") for batch_idx, batch in enumerate(dataloader): # Process pressure values output_feat = batch['output_feat'] pressures = output_feat.flatten().numpy() all_pressures.extend(pressures) # print('pressures', pressures.shape) # Process coordinate ranges input_pos = batch['input_pos'] # Convert tensor to numpy for coordinate calculations input_pos_np = input_pos.numpy() max_x = max(max_x, np.max(input_pos_np[:,:,0])) max_y = max(max_y, np.max(input_pos_np[:,:,1])) max_z = max(max_z, np.max(input_pos_np[:,:,2])) min_x = min(min_x, np.min(input_pos_np[:,:,0])) min_y = min(min_y, np.min(input_pos_np[:,:,1])) min_z = min(min_z, np.min(input_pos_np[:,:,2])) if batch_idx % 10 == 0: # Print progress every 10 batches print(f"Processed {batch_idx + 1} batches...") # Convert to numpy array for efficient computation all_pressures = np.array(all_pressures) # Calculate pressure statistics pressure_mean = np.mean(all_pressures) pressure_std = np.std(all_pressures) # Store coordinate ranges coord_ranges = { 'min_x': min_x, 'max_x': max_x, 'min_y': min_y, 'max_y': max_y, 'min_z': min_z, 'max_z': max_z } # Print comprehensive statistics print(f"\nPressure statistics from {len(all_pressures)} data points:") print(f"Mean: {pressure_mean:.6f}") print(f"Std: {pressure_std:.6f}") print(f"Min: {np.min(all_pressures):.6f}") print(f"Max: {np.max(all_pressures):.6f}") print(f"\nCoordinate ranges:") print(f"X: [{min_x:.6f}, {max_x:.6f}]") print(f"Y: [{min_y:.6f}, {max_y:.6f}]") print(f"Z: [{min_z:.6f}, {max_z:.6f}]") return pressure_mean, pressure_std, coord_ranges def seed_worker(worker_id): worker_seed = torch.initial_seed() % 2**32 np.random.seed(worker_seed) g = torch.Generator() g.manual_seed(0) def get_dataloaders(cfg): splits = json.load(open(cfg.splits_file)) # Check if cloud loading is enabled use_cloud = getattr(cfg, 'use_cloud', False) if use_cloud: print("=" * 60) print("CLOUD LOADING MODE: Loading data directly from Hugging Face") print(f"Dataset: {getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml')}") print("Note: Presampling is disabled in cloud loading mode") print("=" * 60) else: print("=" * 60) print("LOCAL LOADING MODE: Loading data from local .npy files") print(f"Data directory: {cfg.data_dir}") print("=" * 60) # Handle presampling logic - disable presampling for cloud loading if use_cloud: # For cloud loading, always use random sampling (no presampling) use_presampled = False print("Cloud loading mode: Using random sampling for all splits (presampling disabled)") else: # For local loading, handle presampling as before presampled_data_path = getattr(cfg, 'presampled_data_path', os.path.join(cfg.data_dir, 'presampled_val_test_data.npy')) presampled_base_dir = os.path.splitext(presampled_data_path)[0] if not cfg.presampled_exists: # Create presampled data if it doesn't exist or if presampled=False if not os.path.exists(presampled_base_dir): print("=" * 60) print("PRESAMPLING MODE: Creating presampled validation and test data...") print(f"Presampled data will be saved to: {presampled_base_dir}") create_presampled_data(cfg, splits, presampled_data_path) print("Presampled data created successfully!") print("You can now set presampled=True in config for future runs to use this presampled data.") print("=" * 60) else: print(f"Presampled data directory already exists at: {presampled_base_dir}") print("Using existing presampled data. Set presampled=True to use it in future runs.") if cfg.presampled_exists and not os.path.exists(presampled_base_dir): print(f"Warning: presampled=True but presampled data directory not found at {presampled_base_dir}") print("Creating presampled data...") create_presampled_data(cfg, splits, presampled_data_path) use_presampled = True print(f"Using presampled validation and test data from: {presampled_base_dir}") train_dataset = DrivAerMLDataset(cfg, splits = splits['train'], split_type = 'train') val_dataset = DrivAerMLDataset(cfg, splits = splits['validation'], split_type = 'validation', presampled = use_presampled, save_presampled_data_path = None if use_cloud else presampled_data_path) test_dataset = DrivAerMLDataset(cfg, splits = splits['test'], split_type = 'test', presampled = use_presampled, save_presampled_data_path = None if use_cloud else presampled_data_path) if cfg.model == 'NeuralCFD' or cfg.model == 'NeuralCFDTransolver': collate_fn = DrivAerMLSimulationCollator(num_supernodes = cfg.num_supernodes) else: collate_fn = None train_dataloader = DataLoader( train_dataset, batch_size=cfg.batch_size, shuffle=True, drop_last=True, num_workers=cfg.num_workers, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g ) val_dataloader = DataLoader( val_dataset, batch_size=cfg.batch_size, shuffle=True, drop_last=True, num_workers=cfg.num_workers, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g ) test_dataloader = DataLoader( test_dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=cfg.num_workers, collate_fn=collate_fn, worker_init_fn=seed_worker, generator=g ) # # Calculate normalization constants # print('Calculating normalization constants...') # pressure_mean, pressure_std, coord_ranges = calculate_normalization_constants(train_dataloader) # exit() return train_dataloader, val_dataloader, test_dataloader # For NeuralCFD class DrivAerMLSimulationCollator: def __init__(self, num_supernodes): self.num_supernodes = num_supernodes # Create a generator for reproducible random permutations # This is needed for multiprocessing environments where workers have independent random states self.generator = torch.Generator() self.generator.manual_seed(0) def __call__(self, batch): collated_batch = {} # inputs to sparse tensors # position: batch_size * (num_inputs, ndim) -> (batch_size * num_inputs, ndim) # features: batch_size * (num_inputs, dim) -> (batch_size * num_inputs, dim) input_pos = [] input_lens = [] for i in range(len(batch)): pos = batch[i]["input_pos"] input_pos.append(pos) input_lens.append(len(pos)) collated_batch["input_pos"] = torch.concat(input_pos) # select supernodes with seeded generator for reproducibility supernodes_offset = 0 supernode_idxs = [] for i in range(len(input_lens)): # Use the seeded generator for reproducible permutations in multiprocessing perm = torch.randperm(len(input_pos[i]), generator=self.generator)[:self.num_supernodes] + supernodes_offset supernode_idxs.append(perm) supernodes_offset += input_lens[i] collated_batch["supernode_idxs"] = torch.concat(supernode_idxs) # create batch_idx tensor batch_idx = torch.empty(sum(input_lens), dtype=torch.long) start = 0 cur_batch_idx = 0 for i in range(len(input_lens)): end = start + input_lens[i] batch_idx[start:end] = cur_batch_idx start = end cur_batch_idx += 1 collated_batch["batch_idx"] = batch_idx # output_feat to sparse tensor output_feat = [] for i in range(len(batch)): feat = batch[i]["output_feat"] output_feat.append(feat) # output_feat is either list of tensors (for training) or list of list of tensors (for rollout) if torch.is_tensor(output_feat[0]): collated_batch["output_feat"] = torch.concat(output_feat) else: collated_batch["output_feat"] = output_feat # collate dense tensors collated_batch["output_pos"] = default_collate([batch[i]["output_pos"] for i in range(len(batch))]) return collated_batch # Pressure statistics from openfoam surface train dataset (10k points sampled): # Mean: -229.845718 # Std: 269.598572 # Min: -3651.057861 # Max: 859.160034 # Coordinate ranges: # X: [-0.941836, 4.131968] # Y: [-1.129535, 1.125530] # Z: [-0.317549, 1.244577] # Pressure statistics from full openfoam surface train dataset (3323811346 data points): # Mean: -229.266983 # Std: 269.226807 # Min: -111492.804688 # Max: 6382.190918 # Coordinate ranges: # X: [-0.942579, 4.132785] # Y: [-1.131676, 1.131676] # Z: [-0.317577, 1.244584] # With 10k points sampled # PRESSURE_MEAN = -229.845718 # PRESSURE_STD = 269.598572 # input_pos_mins = torch.tensor([-0.941836, -1.129535, -0.317549]) # input_pos_maxs = torch.tensor([4.131968, 1.125530, 1.244577]) # With full dataset PRESSURE_MEAN = -229.266983 PRESSURE_STD = 269.226807 input_pos_mins = torch.tensor([-0.942579, -1.131676, -0.317577]) input_pos_maxs = torch.tensor([4.132785, 1.131676, 1.244584])