"""
DrivAerML Dataset with Memory-Efficient Presampling Support and Cloud Loading

This dataset implements presampling functionality for validation and test data while maintaining
random sampling for training data. The presampling feature ensures consistent validation and 
test results across different runs, with memory-efficient on-demand loading.

The dataset can load data directly from Hugging Face cloud or from local .npy files.

Presampling Workflow:
1. Set presampled=False in config to create presampled validation and test data
2. The system creates fixed samples for validation and test splits
3. Each run's presampled data is saved as individual files in a directory structure
4. Set presampled=True in config to use the saved presampled data for future runs
5. Training data always uses random sampling regardless of presampled setting

Directory Structure:
presampled_data_path/
├── validation/
│   ├── run_1.npy
│   ├── run_2.npy
│   └── ...
└── test/
    ├── run_1.npy
    ├── run_2.npy
    └── ...

Configuration Parameters:
- presampled: Boolean flag to control whether to use presampled data
- presampled_data_path: Base path where presampled data directory is created
- use_cloud: Boolean flag to control whether to load from Hugging Face cloud
- hf_dataset_name: Hugging Face dataset name (default: "neashton/drivaerml")

Usage:
- First run: Set presampled=False to create presampled data
- Subsequent runs: Set presampled=True to use existing presampled data
- Set use_cloud=True to load from Hugging Face cloud instead of local files
"""

import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import default_collate
import json
import re
import requests
import tempfile
from pathlib import Path

# Optional imports for cloud loading
try:
    import vtk
    from vtk.util import numpy_support
    import pyvista as pv
    CLOUD_LOADING_AVAILABLE = True
except ImportError as e:
    print(f"Warning: Cloud loading dependencies not available: {e}")
    print("Install with: pip install vtk pyvista")
    CLOUD_LOADING_AVAILABLE = False

# Problematic runs as mentioned in the DrivAerML dataset documentation
PROBLEMATIC_RUNS = {167, 211, 218, 221, 248, 282, 291, 295, 316, 325, 329, 364, 370, 376, 403, 473}

def download_file_from_hf(url, temp_dir):
    """Download a file from Hugging Face and return the local path"""
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        # Extract filename from URL
        filename = url.split('/')[-1]
        local_path = os.path.join(temp_dir, filename)
        
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        return local_path
    except Exception as e:
        print(f"Error downloading {url}: {e}")
        return None

def download_vtu_file_from_hf(run_number, hf_dataset_name, temp_dir):
    """Download VTU file parts and concatenate them"""
    try:
        # Download part 1
        url_part1 = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/volume_{run_number}.vtu.00.part"
        part1_path = download_file_from_hf(url_part1, temp_dir)
        
        if part1_path is None:
            print(f"Failed to download part 1 for run {run_number}")
            return None
        
        # Download part 2
        url_part2 = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/volume_{run_number}.vtu.01.part"
        part2_path = download_file_from_hf(url_part2, temp_dir)
        
        if part2_path is None:
            print(f"Failed to download part 2 for run {run_number}")
            return None
        
        # Concatenate the parts
        final_path = os.path.join(temp_dir, f"volume_{run_number}.vtu")
        with open(final_path, 'wb') as outfile:
            # Write part 1
            with open(part1_path, 'rb') as infile:
                outfile.write(infile.read())
            # Write part 2
            with open(part2_path, 'rb') as infile:
                outfile.write(infile.read())
        
        print(f"Successfully downloaded and concatenated VTU file for run {run_number}")
        return final_path
        
    except Exception as e:
        print(f"Error downloading VTU file for run {run_number}: {e}")
        return None

def process_vtp_file(vtp_path, surface_variables=["pMeanTrim", "wallShearStressMeanTrim"]):
    """Process a VTP file and extract surface mesh centers and fields"""
    if not CLOUD_LOADING_AVAILABLE:
        raise ImportError("Cloud loading dependencies (vtk, pyvista) are not available. Install with: pip install vtk pyvista")
    
    try:
        reader = vtk.vtkXMLPolyDataReader()
        reader.SetFileName(vtp_path)
        reader.Update()
        polydata = reader.GetOutput()

        # Convert node data to cell data
        c2p = vtk.vtkPointDataToCellData()
        c2p.SetInputData(polydata)
        c2p.Update()
        celldata_all = c2p.GetOutput()
        celldata = celldata_all.GetCellData()
        
        # Extract fields
        fields = []
        for array_name in surface_variables:
            try:
                array = celldata.GetArray(array_name)
                if array is not None:
                    array_data = numpy_support.vtk_to_numpy(array).reshape(
                        array.GetNumberOfTuples(), array.GetNumberOfComponents()
                    )
                    fields.append(array_data)
                else:
                    # If field doesn't exist, create zeros
                    num_cells = celldata_all.GetNumberOfCells()
                    array_data = np.zeros((num_cells, 1))
                    fields.append(array_data)
            except Exception as e:
                print(f"Warning: Could not extract field {array_name}: {e}")
                # Create zeros for missing field
                num_cells = celldata_all.GetNumberOfCells()
                array_data = np.zeros((num_cells, 1))
                fields.append(array_data)
        
        surface_fields = np.concatenate(fields, axis=-1)

        # Get cell centers
        mesh = pv.PolyData(polydata)
        surface_coordinates = np.array(mesh.cell_centers().points)

        return {
            "surface_mesh_centers": np.float32(surface_coordinates),
            "surface_fields": np.float32(surface_fields),
        }
    except Exception as e:
        print(f"Error processing VTP file {vtp_path}: {e}")
        return None

def process_vtu_file(vtu_path, volume_variables=["UMean", "pMean"]):
    """Process a VTU file and extract volume data"""
    if not CLOUD_LOADING_AVAILABLE:
        raise ImportError("Cloud loading dependencies (vtk, pyvista) are not available. Install with: pip install vtk pyvista")
    
    try:
        reader = vtk.vtkXMLUnstructuredGridReader()
        reader.SetFileName(vtu_path)
        reader.Update()
        
        # Get the unstructured grid data
        polydata = reader.GetOutput()
        
        # Get vertices
        points = polydata.GetPoints()
        vertices = numpy_support.vtk_to_numpy(points.GetData())
        
        # Get point data
        point_data = polydata.GetPointData()
        
        # Extract fields
        fields = []
        for array_name in volume_variables:
            try:
                array = point_data.GetArray(array_name)
                if array is not None:
                    array_data = numpy_support.vtk_to_numpy(array).reshape(
                        array.GetNumberOfTuples(), array.GetNumberOfComponents()
                    )
                    fields.append(array_data)
                else:
                    # If field doesn't exist, create zeros
                    num_points = polydata.GetNumberOfPoints()
                    array_data = np.zeros((num_points, 3))  # Assume 3D vector field
                    fields.append(array_data)
            except Exception as e:
                print(f"Warning: Could not extract field {array_name}: {e}")
                # Create zeros for missing field
                num_points = polydata.GetNumberOfPoints()
                array_data = np.zeros((num_points, 3))  # Assume 3D vector field
                fields.append(array_data)
        
        volume_fields = np.concatenate(fields, axis=-1)
        
        return {
            "volume_mesh_centers": np.float32(vertices),
            "volume_fields": np.float32(volume_fields),
        }
    except Exception as e:
        print(f"Error processing VTU file {vtu_path}: {e}")
        return None

def create_presampled_data(cfg, splits, save_path):
    """
    Create presampled validation and test data with fixed random sampling.
    Saves individual files for each run to enable on-demand loading.
    
    Args:
        cfg: Configuration object
        splits: Dictionary containing train/validation/test splits
        save_path: Base path for saving presampled data (directory will be created)
    """
    print("Creating presampled validation and test data...")
    
    # Check cloud loading availability if needed
    use_cloud = getattr(cfg, 'use_cloud', False)
    if use_cloud and not CLOUD_LOADING_AVAILABLE:
        raise ImportError(
            "Cloud loading is enabled but dependencies (vtk, pyvista) are not available. "
            "Install with: pip install vtk pyvista, or set use_cloud=False to use local files."
        )
    
    # Create directory structure for presampled data
    base_dir = os.path.splitext(save_path)[0]  # Remove .npy extension if present
    os.makedirs(base_dir, exist_ok=True)
    
    # Set seed for reproducible sampling
    np.random.seed(0)
    
    # Create temporary directory for downloading files
    with tempfile.TemporaryDirectory() as temp_dir:
        for split_type in ['validation', 'test']:
            print(f"Processing {split_type} split...")
            split_runs = splits[split_type]
            
            # Create subdirectory for this split
            split_dir = os.path.join(base_dir, split_type)
            os.makedirs(split_dir, exist_ok=True)
            
            for run_number in split_runs:
                if run_number in PROBLEMATIC_RUNS:
                    print(f"Skipping problematic run {run_number}")
                    continue
                    
                print(f"Processing run {run_number} for {split_type}...")
                
                if use_cloud:
                    # Download from Hugging Face cloud
                    hf_dataset_name = getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml')
                    vtp_url = f"https://huggingface.co/datasets/{hf_dataset_name}/resolve/main/run_{run_number}/boundary_{run_number}.vtp"
                    
                    vtp_path = download_file_from_hf(vtp_url, temp_dir)
                    if vtp_path is None:
                        print(f"Failed to download VTP file for run {run_number}")
                        continue
                    
                    # Process VTP file
                    data = process_vtp_file(vtp_path)
                    if data is None:
                        print(f"Failed to process VTP file for run {run_number}")
                        continue
                        
                    coordinates = data['surface_mesh_centers']
                    field = data['surface_fields']
                else:
                    # Load from local .npy file
                    found_file = False
                    for f in os.listdir(cfg.data_dir):
                        if f.endswith('.npy'):
                            match = re.search(r'run_(\d+)', f)
                            if match and int(match.group(1)) == run_number:
                                npy_file_path = os.path.join(cfg.data_dir, f)
                                data = np.load(npy_file_path, allow_pickle=True).item()
                                coordinates = data['surface_mesh_centers']
                                field = data['surface_fields']
                                found_file = True
                                break
                    
                    if not found_file:
                        print(f"No .npy file found for run {run_number}")
                        continue
                
                # Sample points with fixed seed for reproducibility
                sample_indices = np.random.choice(coordinates.shape[0], cfg.num_points, replace=False)
                sampled_coordinates = coordinates[sample_indices, :]
                sampled_field = field[sample_indices, :]
                
                # Save individual presampled file for this run
                presampled_run_data = {
                    'surface_mesh_centers': sampled_coordinates,
                    'surface_fields': sampled_field
                }
                
                run_file_path = os.path.join(split_dir, f'run_{run_number}.npy')
                np.save(run_file_path, presampled_run_data)
    
    print(f"Presampled data saved to directory: {base_dir}")
    print(f"Structure: {base_dir}/{{validation,test}}/run_{{number}}.npy")
    
    return base_dir

class DrivAerMLDataset(Dataset):
    def __init__(self, cfg, splits = None, split_type = 'train', presampled = False, save_presampled_data_path = None):
        """
        Initializes the DrivAerMLDataset instance.

        Args:
            cfg: Configuration object containing data directory and number of points
            splits: List of run numbers to include, if None includes all files
            split_type: Type of split ('train', 'validation', 'test')
            presampled: Whether to use presampled data for validation/test
            save_presampled_data_path: Base path to the presampled data directory
        """
        self.data_dir = cfg.data_dir
        self.splits = splits
        self.use_cloud = getattr(cfg, 'use_cloud', False)
        self.hf_dataset_name = getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml')
        
        # Volume data configuration
        self.include_volume = getattr(cfg, 'include_volume', False)
        self.volume_variables = getattr(cfg, 'volume_variables', ["UMean", "pMean"])
        self.volume_num_points = getattr(cfg, 'volume_num_points', 50000)  # Default for volume sampling
        
        # Check cloud loading availability
        if self.use_cloud and not CLOUD_LOADING_AVAILABLE:
            raise ImportError(
                "Cloud loading is enabled but dependencies (vtk, pyvista) are not available. "
                "Install with: pip install vtk pyvista, or set use_cloud=False to use local files."
            )
        
        # Store only run numbers and create filename mapping for efficiency
        self.run_numbers = []
        self.original_filenames = {}  # run_number -> original filename
        
        if self.use_cloud:
            # For cloud loading, we don't need to scan local directory
            # Just filter out problematic runs
            for run_number in splits:
                if run_number not in PROBLEMATIC_RUNS:
                    self.run_numbers.append(run_number)
        else:
            # Local file loading - scan directory for .npy files
            for f in os.listdir(cfg.data_dir):
                if f.endswith('.npy'):
                    match = re.search(r'run_(\d+)', f)
                    if match:
                        run_number = int(match.group(1))
                        if run_number in splits and run_number not in PROBLEMATIC_RUNS:
                            self.run_numbers.append(run_number)
                            self.original_filenames[run_number] = f
        
        if len(self.run_numbers) == 0:
            raise ValueError(f"No valid runs found for splits: {splits}")
            
        self.num_points = cfg.num_points
        self.split_type = split_type
        self.presampled = presampled
        
        # Set up presampled data directory path (but don't load data yet)
        if self.split_type != 'train' and self.presampled and save_presampled_data_path:
            self.presampled_base_dir = os.path.splitext(save_presampled_data_path)[0]
            self.presampled_split_dir = os.path.join(self.presampled_base_dir, self.split_type)
            if not os.path.exists(self.presampled_split_dir):
                raise FileNotFoundError(f"Presampled data directory not found: {self.presampled_split_dir}")

    def __len__(self):
        return len(self.run_numbers)

    def __getitem__(self, idx):
        run_number = self.run_numbers[idx]
        
        # For cloud loading, always use random sampling (no presampling)
        # For local loading, use presampling for validation/test if enabled
        if self.use_cloud or (self.split_type == 'train') or not self.presampled:
            # Load original data (either from cloud or local) with random sampling
            if self.use_cloud:
                # Load from Hugging Face cloud
                with tempfile.TemporaryDirectory() as temp_dir:
                    # Download and process surface data
                    vtp_url = f"https://huggingface.co/datasets/{self.hf_dataset_name}/resolve/main/run_{run_number}/boundary_{run_number}.vtp"
                    vtp_path = download_file_from_hf(vtp_url, temp_dir)
                    
                    if vtp_path is None:
                        raise RuntimeError(f"Failed to download VTP file for run {run_number}")
                    
                    surface_data = process_vtp_file(vtp_path)
                    if surface_data is None:
                        raise RuntimeError(f"Failed to process VTP file for run {run_number}")
                    
                    coordinates = surface_data['surface_mesh_centers']
                    field = surface_data['surface_fields'][:,0:1]
                    
                    # Load volume data if requested
                    volume_coordinates = None
                    volume_field = None
                    if self.include_volume:
                        vtu_path = download_vtu_file_from_hf(run_number, self.hf_dataset_name, temp_dir)
                        if vtu_path is not None:
                            volume_data = process_vtu_file(vtu_path, self.volume_variables)
                            if volume_data is not None:
                                volume_coordinates = volume_data['volume_mesh_centers']
                                volume_field = volume_data['volume_fields']
                                print(f"Successfully loaded volume data for run {run_number}")
                            else:
                                print(f"Failed to process VTU file for run {run_number}")
                        else:
                            print(f"Failed to download VTU file for run {run_number}")
            else:
                # Load from local .npy file
                original_filename = self.original_filenames[run_number]
                original_file_path = os.path.join(self.data_dir, original_filename)
                data = np.load(original_file_path, allow_pickle=True).item()
                coordinates = data['surface_mesh_centers']
                field = data['surface_fields'][:,0:1]
                
                # Load volume data if available and requested
                volume_coordinates = None
                volume_field = None
                if self.include_volume and 'volume_mesh_centers' in data and 'volume_fields' in data:
                    volume_coordinates = data['volume_mesh_centers']
                    volume_field = data['volume_fields']
            
            # Random sampling for surface data
            sample_indices = np.random.choice(coordinates.shape[0], self.num_points, replace=False)
            coordinates = coordinates[sample_indices,:]
            field = field[sample_indices,0:1]
            
            # Random sampling for volume data if available
            if volume_coordinates is not None and volume_field is not None:
                volume_sample_indices = np.random.choice(volume_coordinates.shape[0], self.volume_num_points, replace=False)
                volume_coordinates = volume_coordinates[volume_sample_indices,:]
                volume_field = volume_field[volume_sample_indices,:]
        else:
            # Load presampled data on-demand for validation/test (local loading only)
            presampled_file_path = os.path.join(self.presampled_split_dir, f'run_{run_number}.npy')
            if os.path.exists(presampled_file_path):
                data_dict = np.load(presampled_file_path, allow_pickle=True).item()
                coordinates = data_dict['surface_mesh_centers']
                field = data_dict['surface_fields'][:,0:1]
                
                # Load volume data if available and requested
                volume_coordinates = None
                volume_field = None
                if self.include_volume and 'volume_mesh_centers' in data_dict and 'volume_fields' in data_dict:
                    volume_coordinates = data_dict['volume_mesh_centers']
                    volume_field = data_dict['volume_fields']
            else:
                raise FileNotFoundError(f"Presampled file not found: {presampled_file_path}")
        
        coordinates_tensor = torch.tensor(coordinates, dtype=torch.float32)
        field_tensor = torch.tensor(field, dtype=torch.float32)

        coordinates_tensor = (coordinates_tensor - input_pos_mins) / (input_pos_maxs - input_pos_mins)
        field_tensor = (field_tensor - PRESSURE_MEAN) / PRESSURE_STD

        data = {'input_pos': coordinates_tensor, 'output_feat': field_tensor, 'output_pos': coordinates_tensor}
        
        # Add volume data if available
        if volume_coordinates is not None and volume_field is not None:
            volume_coordinates_tensor = torch.tensor(volume_coordinates, dtype=torch.float32)
            volume_field_tensor = torch.tensor(volume_field, dtype=torch.float32)
            
            # Normalize volume coordinates using same normalization as surface
            volume_coordinates_tensor = (volume_coordinates_tensor - input_pos_mins) / (input_pos_maxs - input_pos_mins)
            
            data['volume_input_pos'] = volume_coordinates_tensor
            data['volume_output_feat'] = volume_field_tensor
            data['volume_output_pos'] = volume_coordinates_tensor

        return data


def calculate_normalization_constants(dataloader):
    """
    Calculate normalization constants for both pressure values and coordinate ranges
    across the entire training dataset.
    
    Args:
        dataloader: Training DataLoader
        
    Returns:
        tuple: (pressure_mean, pressure_std, coord_ranges)
               where coord_ranges = {'min_x', 'max_x', 'min_y', 'max_y', 'min_z', 'max_z'}
    """
    all_pressures = []
    
    # Initialize coordinate extremes
    max_x = float('-inf')
    max_y = float('-inf')
    max_z = float('-inf')
    min_x = float('inf')
    min_y = float('inf')
    min_z = float('inf')
    
    print("Calculating normalization constants...")
    for batch_idx, batch in enumerate(dataloader):
        # Process pressure values
        output_feat = batch['output_feat']
        pressures = output_feat.flatten().numpy()
        all_pressures.extend(pressures)
        # print('pressures', pressures.shape)
        
        # Process coordinate ranges
        input_pos = batch['input_pos']
        # Convert tensor to numpy for coordinate calculations
        input_pos_np = input_pos.numpy()
        max_x = max(max_x, np.max(input_pos_np[:,:,0]))
        max_y = max(max_y, np.max(input_pos_np[:,:,1]))
        max_z = max(max_z, np.max(input_pos_np[:,:,2]))
        min_x = min(min_x, np.min(input_pos_np[:,:,0]))
        min_y = min(min_y, np.min(input_pos_np[:,:,1]))
        min_z = min(min_z, np.min(input_pos_np[:,:,2]))
        
        if batch_idx % 10 == 0:  # Print progress every 10 batches
            print(f"Processed {batch_idx + 1} batches...")
    
    # Convert to numpy array for efficient computation
    all_pressures = np.array(all_pressures)
    
    # Calculate pressure statistics
    pressure_mean = np.mean(all_pressures)
    pressure_std = np.std(all_pressures)
    
    # Store coordinate ranges
    coord_ranges = {
        'min_x': min_x, 'max_x': max_x,
        'min_y': min_y, 'max_y': max_y,
        'min_z': min_z, 'max_z': max_z
    }
    
    # Print comprehensive statistics
    print(f"\nPressure statistics from {len(all_pressures)} data points:")
    print(f"Mean: {pressure_mean:.6f}")
    print(f"Std:  {pressure_std:.6f}")
    print(f"Min:  {np.min(all_pressures):.6f}")
    print(f"Max:  {np.max(all_pressures):.6f}")
    
    print(f"\nCoordinate ranges:")
    print(f"X: [{min_x:.6f}, {max_x:.6f}]")
    print(f"Y: [{min_y:.6f}, {max_y:.6f}]")
    print(f"Z: [{min_z:.6f}, {max_z:.6f}]")
    
    return pressure_mean, pressure_std, coord_ranges


def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

def get_dataloaders(cfg):
    splits = json.load(open(cfg.splits_file))
    
    # Check if cloud loading is enabled
    use_cloud = getattr(cfg, 'use_cloud', False)
    if use_cloud:
        print("=" * 60)
        print("CLOUD LOADING MODE: Loading data directly from Hugging Face")
        print(f"Dataset: {getattr(cfg, 'hf_dataset_name', 'neashton/drivaerml')}")
        print("Note: Presampling is disabled in cloud loading mode")
        print("=" * 60)
    else:
        print("=" * 60)
        print("LOCAL LOADING MODE: Loading data from local .npy files")
        print(f"Data directory: {cfg.data_dir}")
        print("=" * 60)
    
    # Handle presampling logic - disable presampling for cloud loading
    if use_cloud:
        # For cloud loading, always use random sampling (no presampling)
        use_presampled = False
        print("Cloud loading mode: Using random sampling for all splits (presampling disabled)")
    else:
        # For local loading, handle presampling as before
        presampled_data_path = getattr(cfg, 'presampled_data_path', os.path.join(cfg.data_dir, 'presampled_val_test_data.npy'))
        presampled_base_dir = os.path.splitext(presampled_data_path)[0]
        
        if not cfg.presampled_exists:
            # Create presampled data if it doesn't exist or if presampled=False
            if not os.path.exists(presampled_base_dir):
                print("=" * 60)
                print("PRESAMPLING MODE: Creating presampled validation and test data...")
                print(f"Presampled data will be saved to: {presampled_base_dir}")
                create_presampled_data(cfg, splits, presampled_data_path)
                print("Presampled data created successfully!")
                print("You can now set presampled=True in config for future runs to use this presampled data.")
                print("=" * 60)
            else:
                print(f"Presampled data directory already exists at: {presampled_base_dir}")
                print("Using existing presampled data. Set presampled=True to use it in future runs.")
        
        if cfg.presampled_exists and not os.path.exists(presampled_base_dir):
            print(f"Warning: presampled=True but presampled data directory not found at {presampled_base_dir}")
            print("Creating presampled data...")
            create_presampled_data(cfg, splits, presampled_data_path)

        use_presampled = True
        print(f"Using presampled validation and test data from: {presampled_base_dir}")

    
    train_dataset = DrivAerMLDataset(cfg, splits = splits['train'], split_type = 'train')
    val_dataset = DrivAerMLDataset(cfg, splits = splits['validation'], split_type = 'validation',
                                   presampled = use_presampled, save_presampled_data_path = None if use_cloud else presampled_data_path)
    test_dataset = DrivAerMLDataset(cfg, splits = splits['test'], split_type = 'test',
                                    presampled = use_presampled, save_presampled_data_path = None if use_cloud else presampled_data_path)

    if cfg.model == 'NeuralCFD' or cfg.model == 'NeuralCFDTransolver':
        collate_fn = DrivAerMLSimulationCollator(num_supernodes = cfg.num_supernodes)
    else:
        collate_fn = None

    train_dataloader = DataLoader(
        train_dataset, batch_size=cfg.batch_size, shuffle=True,
        drop_last=True, num_workers=cfg.num_workers, collate_fn=collate_fn, 
        worker_init_fn=seed_worker, generator=g
    )
    val_dataloader = DataLoader(
        val_dataset, batch_size=cfg.batch_size, shuffle=True,
        drop_last=True, num_workers=cfg.num_workers, collate_fn=collate_fn,
        worker_init_fn=seed_worker, generator=g
    )
    test_dataloader = DataLoader(
        test_dataset, batch_size=1, shuffle=False,
        drop_last=False, num_workers=cfg.num_workers, collate_fn=collate_fn,
        worker_init_fn=seed_worker, generator=g
    )

    # # Calculate normalization constants
    # print('Calculating normalization constants...')
    # pressure_mean, pressure_std, coord_ranges = calculate_normalization_constants(train_dataloader)
    # exit()

    return train_dataloader, val_dataloader, test_dataloader


# For NeuralCFD
class DrivAerMLSimulationCollator:
    def __init__(self, num_supernodes):
        self.num_supernodes = num_supernodes
        # Create a generator for reproducible random permutations
        # This is needed for multiprocessing environments where workers have independent random states
        self.generator = torch.Generator()
        self.generator.manual_seed(0)

    def __call__(self, batch):
        collated_batch = {}

        # inputs to sparse tensors
        # position: batch_size * (num_inputs, ndim) -> (batch_size * num_inputs, ndim)
        # features: batch_size * (num_inputs, dim) -> (batch_size * num_inputs, dim)
        input_pos = []
        input_lens = []
        for i in range(len(batch)):
            pos = batch[i]["input_pos"]
            input_pos.append(pos)
            input_lens.append(len(pos))
        collated_batch["input_pos"] = torch.concat(input_pos)

        # select supernodes with seeded generator for reproducibility
        supernodes_offset = 0
        supernode_idxs = []
        for i in range(len(input_lens)):
            # Use the seeded generator for reproducible permutations in multiprocessing
            perm = torch.randperm(len(input_pos[i]), generator=self.generator)[:self.num_supernodes] + supernodes_offset
            supernode_idxs.append(perm)
            supernodes_offset += input_lens[i]
        collated_batch["supernode_idxs"] = torch.concat(supernode_idxs)

        # create batch_idx tensor
        batch_idx = torch.empty(sum(input_lens), dtype=torch.long)
        start = 0
        cur_batch_idx = 0
        for i in range(len(input_lens)):
            end = start + input_lens[i]
            batch_idx[start:end] = cur_batch_idx
            start = end
            cur_batch_idx += 1
        collated_batch["batch_idx"] = batch_idx
        
        
        # output_feat to sparse tensor
        output_feat = []
        for i in range(len(batch)):
            feat = batch[i]["output_feat"]
            output_feat.append(feat)
        # output_feat is either list of tensors (for training) or list of list of tensors (for rollout)
        if torch.is_tensor(output_feat[0]):
            collated_batch["output_feat"] = torch.concat(output_feat)
        else:
            collated_batch["output_feat"] = output_feat

        # collate dense tensors
        collated_batch["output_pos"] = default_collate([batch[i]["output_pos"] for i in range(len(batch))])

        return collated_batch


# Pressure statistics from openfoam surface train dataset (10k points sampled):
# Mean: -229.845718
# Std:  269.598572
# Min:  -3651.057861
# Max:  859.160034

# Coordinate ranges:
# X: [-0.941836, 4.131968]
# Y: [-1.129535, 1.125530]
# Z: [-0.317549, 1.244577]

# Pressure statistics from full openfoam surface train dataset (3323811346 data points):
# Mean: -229.266983
# Std:  269.226807
# Min:  -111492.804688
# Max:  6382.190918

# Coordinate ranges:
# X: [-0.942579, 4.132785]
# Y: [-1.131676, 1.131676]
# Z: [-0.317577, 1.244584]

# With 10k points sampled
# PRESSURE_MEAN = -229.845718
# PRESSURE_STD = 269.598572
# input_pos_mins = torch.tensor([-0.941836, -1.129535, -0.317549])
# input_pos_maxs = torch.tensor([4.131968, 1.125530, 1.244577])

# With full dataset
PRESSURE_MEAN = -229.266983
PRESSURE_STD = 269.226807
input_pos_mins = torch.tensor([-0.942579, -1.131676, -0.317577])
input_pos_maxs = torch.tensor([4.132785, 1.131676, 1.244584])