Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import torch | |
| from torch.utils.data import Dataset, DataLoader | |
| from torch.utils.data import default_collate | |
| import json | |
| import re | |
| import vtk | |
| from vtk.util.numpy_support import vtk_to_numpy | |
| from sklearn.neighbors import NearestNeighbors | |
| import meshio | |
| def load_unstructured_grid_data(file_name): | |
| reader = vtk.vtkUnstructuredGridReader() | |
| reader.SetFileName(file_name) | |
| reader.Update() | |
| output = reader.GetOutput() | |
| return output | |
| def preprocess_data(root, samples, savedir=None): | |
| for s in samples: | |
| file_name_press_vtk = os.path.join(root, os.path.join(s, 'quadpress_smpl.vtk')) | |
| file_name_velo_vtk = os.path.join(root, os.path.join(s, 'hexvelo_smpl.vtk')) | |
| file_name_press = os.path.join(root, os.path.join(s, 'press.npy')) | |
| if not os.path.exists(file_name_press_vtk) or not os.path.exists(file_name_velo_vtk): | |
| continue | |
| surface_mesh = meshio.read(file_name_press_vtk) | |
| assert len(surface_mesh.cells) == 1 | |
| cell_block = surface_mesh.cells[0] | |
| assert cell_block.type == "quad" | |
| unique = np.unique(cell_block.data) | |
| surface_points = torch.from_numpy(surface_mesh.points[unique]).float() | |
| surface_pressure = torch.from_numpy(np.load(file_name_press)[unique]).float() | |
| unstructured_grid_data_press = load_unstructured_grid_data(file_name_press_vtk) | |
| unstructured_grid_data_velo = load_unstructured_grid_data(file_name_velo_vtk) | |
| velo = vtk_to_numpy(unstructured_grid_data_velo.GetPointData().GetVectors()) | |
| points_velo = vtk_to_numpy(unstructured_grid_data_velo.GetPoints().GetData()) | |
| points_press = vtk_to_numpy(unstructured_grid_data_press.GetPoints().GetData()) | |
| surface = {tuple(p) for p in points_press} | |
| exterior_indices = [i for i, p in enumerate(points_velo) if tuple(p) not in surface] | |
| volume_points = points_velo[exterior_indices] | |
| volume_velocities = velo[exterior_indices] | |
| save_path = os.path.join(savedir, s) | |
| if not os.path.exists(save_path): | |
| os.makedirs(save_path) | |
| np.save(os.path.join(save_path, 'volume_points.npy'), volume_points) | |
| np.save(os.path.join(save_path, 'volume_velocities.npy'), volume_velocities) | |
| np.save(os.path.join(save_path, 'surface_points.npy'), surface_points) | |
| np.save(os.path.join(save_path, 'surface_pressure.npy'), surface_pressure) | |
| def get_coef_norm(samples, savedir): | |
| # Initialize arrays to collect all data | |
| all_volume_points = [] | |
| all_surface_points = [] | |
| all_volume_velocities = [] | |
| all_surface_pressure = [] | |
| for s in samples: | |
| save_path = os.path.join(savedir, s) | |
| if not os.path.exists(save_path): | |
| continue | |
| # Load data for each sample | |
| volume_points = np.load(os.path.join(save_path, 'volume_points.npy')) | |
| volume_velocities = np.load(os.path.join(save_path, 'volume_velocities.npy')) | |
| surface_points = np.load(os.path.join(save_path, 'surface_points.npy')) | |
| surface_pressure = np.load(os.path.join(save_path, 'surface_pressure.npy')) | |
| # Collect data | |
| all_volume_points.append(volume_points) | |
| all_surface_points.append(surface_points) | |
| all_volume_velocities.append(volume_velocities) | |
| all_surface_pressure.append(surface_pressure) | |
| # Concatenate all data | |
| all_volume_points = np.concatenate(all_volume_points, axis=0) | |
| all_surface_points = np.concatenate(all_surface_points, axis=0) | |
| all_volume_velocities = np.concatenate(all_volume_velocities, axis=0) | |
| all_surface_pressure = np.concatenate(all_surface_pressure, axis=0) | |
| # Calculate normalization coefficients | |
| # Volume points: max and min for each column | |
| volume_points_min = np.min(all_volume_points, axis=0) | |
| volume_points_max = np.max(all_volume_points, axis=0) | |
| # Surface points: max and min for each column | |
| surface_points_min = np.min(all_surface_points, axis=0) | |
| surface_points_max = np.max(all_surface_points, axis=0) | |
| # Volume velocities: mean and std for each column | |
| volume_velocities_mean = np.mean(all_volume_velocities, axis=0) | |
| volume_velocities_std = np.std(all_volume_velocities, axis=0) | |
| # Surface pressure: mean and std (assuming it's a 1D array) | |
| surface_pressure_mean = np.mean(all_surface_pressure) | |
| surface_pressure_std = np.std(all_surface_pressure) | |
| # Return normalization coefficients as a tuple | |
| # Format: (volume_points_min, volume_points_max, surface_points_min, surface_points_max, | |
| # volume_velocities_mean, volume_velocities_std, surface_pressure_mean, surface_pressure_std) | |
| coef_norm = ( | |
| torch.tensor(volume_points_min), torch.tensor(volume_points_max), | |
| torch.tensor(surface_points_min), torch.tensor(surface_points_max), | |
| torch.tensor(volume_velocities_mean), torch.tensor(volume_velocities_std), | |
| torch.tensor(surface_pressure_mean), torch.tensor(surface_pressure_std) | |
| ) | |
| return coef_norm | |
| class ShapenetCarPVDataset(Dataset): | |
| def __init__(self, cfg, data, split='train', coef_norm=None): | |
| self.coef_norm = coef_norm | |
| self.data = data | |
| self.save_dir = cfg.save_dir | |
| self.split = split | |
| self.train_num_points_volume = cfg.train_num_points_volume | |
| self.train_num_points_surface = cfg.train_num_points_surface | |
| def __len__(self): | |
| return len(self.data) | |
| def __getitem__(self, idx): | |
| data = self.data[idx] | |
| save_path = os.path.join(self.save_dir, data) | |
| volume_points = torch.from_numpy(np.load(os.path.join(save_path, 'volume_points.npy'))).float() | |
| volume_velocities = torch.from_numpy(np.load(os.path.join(save_path, 'volume_velocities.npy'))).float() | |
| surface_points = torch.from_numpy(np.load(os.path.join(save_path, 'surface_points.npy'))).float() | |
| surface_pressure = torch.from_numpy(np.load(os.path.join(save_path, 'surface_pressure.npy'))).float() | |
| # Move coef_norm to the same device as the data | |
| device = volume_points.device | |
| coef_norm_device = tuple(t.to(device) for t in self.coef_norm) | |
| # Normalize data: (x - min) / (max - min) for points, (x - mean) / std for velocities and pressure | |
| volume_points = ((volume_points - coef_norm_device[0]) / (coef_norm_device[1] - coef_norm_device[0])) | |
| surface_points = ((surface_points - coef_norm_device[2]) / (coef_norm_device[3] - coef_norm_device[2])) | |
| volume_velocities = ((volume_velocities - coef_norm_device[4]) / (coef_norm_device[5] + 1e-8)) | |
| surface_pressure = ((surface_pressure - coef_norm_device[6]) / (coef_norm_device[7] + 1e-8)).view(-1, 1) | |
| if self.split == 'train': | |
| indices_volume = torch.randperm(volume_points.shape[0])[:self.train_num_points_volume] | |
| indices_surface = torch.randperm(surface_points.shape[0])[:self.train_num_points_surface] | |
| volume_points = volume_points[indices_volume] | |
| volume_velocities = volume_velocities[indices_volume] | |
| surface_points = surface_points[indices_surface] | |
| surface_pressure = surface_pressure[indices_surface] | |
| vol_kv_indices = torch.arange(volume_points.shape[0]) | |
| surf_kv_indices = torch.arange(surface_points.shape[0]) | |
| else: | |
| vol_kv_indices = torch.randperm(volume_points.shape[0])[:self.train_num_points_volume] | |
| surf_kv_indices = torch.randperm(surface_points.shape[0])[:self.train_num_points_surface] | |
| num_pressure_points = surface_points.shape[0] | |
| num_volume_points = volume_points.shape[0] | |
| points = torch.cat([volume_points, surface_points], dim=0) | |
| features = torch.cat([volume_velocities, torch.zeros((num_pressure_points, 3))], dim=0) | |
| features = torch.cat([features, torch.cat([torch.zeros((num_volume_points, 1)), surface_pressure], dim=0)], dim=1) | |
| surf = torch.cat([torch.zeros(num_volume_points), torch.ones(num_pressure_points)]).bool() | |
| data = {'input_pos': points, 'output_feat': features,'output_pos':points, 'surf': surf, | |
| 'input_pos_volume': volume_points, 'input_pos_surface': surface_points, | |
| 'output_feat_volume': volume_velocities, 'output_feat_surface': surface_pressure, | |
| 'output_pos_volume': volume_points, 'output_pos_surface': surface_points, | |
| 'vol_kv_indices': vol_kv_indices, 'surf_kv_indices': surf_kv_indices} | |
| #input_pos_all, input_pos_surf, input_pos_ext, input_feat_all, input_feat_surf, input_feat_ext, output_feat_all, output_feat_surf, output_feat_ext | |
| return data | |
| def seed_worker(worker_id): | |
| worker_seed = torch.initial_seed() % 2**32 | |
| np.random.seed(worker_seed) | |
| g = torch.Generator() | |
| g.manual_seed(0) | |
| def get_samples(root): | |
| folds = [f'param{i}' for i in range(9)] | |
| samples = [] | |
| for fold in folds: | |
| fold_samples = [] | |
| files = os.listdir(os.path.join(root, fold)) | |
| for file in files: | |
| path = os.path.join(root, os.path.join(fold, file)) | |
| if os.path.isdir(path): | |
| fold_samples.append(os.path.join(fold, file)) | |
| samples.append(fold_samples) | |
| return samples # 100 + 99 + 97 + 100 + 100 + 96 + 100 + 98 + 99 = 889 samples | |
| def get_dataloaders(cfg): | |
| samples = get_samples(cfg.data_dir) | |
| trainlst = [] | |
| for i in range(len(samples)): | |
| if i == cfg.fold_id: | |
| continue | |
| trainlst += samples[i] | |
| vallst = samples[cfg.fold_id] if 0 <= cfg.fold_id < len(samples) else None | |
| if not cfg.preprocessed: | |
| preprocess_data(cfg.data_dir, trainlst, savedir=cfg.save_dir) | |
| preprocess_data(cfg.data_dir, vallst, savedir=cfg.save_dir) | |
| coef_norm = get_coef_norm(trainlst, savedir=cfg.save_dir) | |
| train_dataset = ShapenetCarPVDataset(cfg, trainlst, split='train', coef_norm=coef_norm) | |
| val_dataset = ShapenetCarPVDataset(cfg, vallst, split='val', coef_norm=coef_norm) | |
| print('len(train_dataset)',len(train_dataset)) | |
| print('len(val_dataset)',len(val_dataset)) | |
| train_dataloader = DataLoader( | |
| train_dataset, batch_size=cfg.batch_size, shuffle=True, | |
| drop_last=True, num_workers=cfg.num_workers, | |
| worker_init_fn=seed_worker, generator=g | |
| ) | |
| val_dataloader = DataLoader( | |
| val_dataset, batch_size=1, shuffle=False, | |
| drop_last=False, num_workers=cfg.num_workers, | |
| worker_init_fn=seed_worker, generator=g | |
| ) | |
| return train_dataloader, val_dataloader | |