AnsysLPFMTrame-App / datasets /shapenet_car_pv /dataset_shapenet_car_pv.py
udbhav
Recreate Trame_app branch with clean history
67fb03c
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import default_collate
import json
import re
import vtk
from vtk.util.numpy_support import vtk_to_numpy
from sklearn.neighbors import NearestNeighbors
import meshio
def load_unstructured_grid_data(file_name):
reader = vtk.vtkUnstructuredGridReader()
reader.SetFileName(file_name)
reader.Update()
output = reader.GetOutput()
return output
def preprocess_data(root, samples, savedir=None):
for s in samples:
file_name_press_vtk = os.path.join(root, os.path.join(s, 'quadpress_smpl.vtk'))
file_name_velo_vtk = os.path.join(root, os.path.join(s, 'hexvelo_smpl.vtk'))
file_name_press = os.path.join(root, os.path.join(s, 'press.npy'))
if not os.path.exists(file_name_press_vtk) or not os.path.exists(file_name_velo_vtk):
continue
surface_mesh = meshio.read(file_name_press_vtk)
assert len(surface_mesh.cells) == 1
cell_block = surface_mesh.cells[0]
assert cell_block.type == "quad"
unique = np.unique(cell_block.data)
surface_points = torch.from_numpy(surface_mesh.points[unique]).float()
surface_pressure = torch.from_numpy(np.load(file_name_press)[unique]).float()
unstructured_grid_data_press = load_unstructured_grid_data(file_name_press_vtk)
unstructured_grid_data_velo = load_unstructured_grid_data(file_name_velo_vtk)
velo = vtk_to_numpy(unstructured_grid_data_velo.GetPointData().GetVectors())
points_velo = vtk_to_numpy(unstructured_grid_data_velo.GetPoints().GetData())
points_press = vtk_to_numpy(unstructured_grid_data_press.GetPoints().GetData())
surface = {tuple(p) for p in points_press}
exterior_indices = [i for i, p in enumerate(points_velo) if tuple(p) not in surface]
volume_points = points_velo[exterior_indices]
volume_velocities = velo[exterior_indices]
save_path = os.path.join(savedir, s)
if not os.path.exists(save_path):
os.makedirs(save_path)
np.save(os.path.join(save_path, 'volume_points.npy'), volume_points)
np.save(os.path.join(save_path, 'volume_velocities.npy'), volume_velocities)
np.save(os.path.join(save_path, 'surface_points.npy'), surface_points)
np.save(os.path.join(save_path, 'surface_pressure.npy'), surface_pressure)
def get_coef_norm(samples, savedir):
# Initialize arrays to collect all data
all_volume_points = []
all_surface_points = []
all_volume_velocities = []
all_surface_pressure = []
for s in samples:
save_path = os.path.join(savedir, s)
if not os.path.exists(save_path):
continue
# Load data for each sample
volume_points = np.load(os.path.join(save_path, 'volume_points.npy'))
volume_velocities = np.load(os.path.join(save_path, 'volume_velocities.npy'))
surface_points = np.load(os.path.join(save_path, 'surface_points.npy'))
surface_pressure = np.load(os.path.join(save_path, 'surface_pressure.npy'))
# Collect data
all_volume_points.append(volume_points)
all_surface_points.append(surface_points)
all_volume_velocities.append(volume_velocities)
all_surface_pressure.append(surface_pressure)
# Concatenate all data
all_volume_points = np.concatenate(all_volume_points, axis=0)
all_surface_points = np.concatenate(all_surface_points, axis=0)
all_volume_velocities = np.concatenate(all_volume_velocities, axis=0)
all_surface_pressure = np.concatenate(all_surface_pressure, axis=0)
# Calculate normalization coefficients
# Volume points: max and min for each column
volume_points_min = np.min(all_volume_points, axis=0)
volume_points_max = np.max(all_volume_points, axis=0)
# Surface points: max and min for each column
surface_points_min = np.min(all_surface_points, axis=0)
surface_points_max = np.max(all_surface_points, axis=0)
# Volume velocities: mean and std for each column
volume_velocities_mean = np.mean(all_volume_velocities, axis=0)
volume_velocities_std = np.std(all_volume_velocities, axis=0)
# Surface pressure: mean and std (assuming it's a 1D array)
surface_pressure_mean = np.mean(all_surface_pressure)
surface_pressure_std = np.std(all_surface_pressure)
# Return normalization coefficients as a tuple
# Format: (volume_points_min, volume_points_max, surface_points_min, surface_points_max,
# volume_velocities_mean, volume_velocities_std, surface_pressure_mean, surface_pressure_std)
coef_norm = (
torch.tensor(volume_points_min), torch.tensor(volume_points_max),
torch.tensor(surface_points_min), torch.tensor(surface_points_max),
torch.tensor(volume_velocities_mean), torch.tensor(volume_velocities_std),
torch.tensor(surface_pressure_mean), torch.tensor(surface_pressure_std)
)
return coef_norm
class ShapenetCarPVDataset(Dataset):
def __init__(self, cfg, data, split='train', coef_norm=None):
self.coef_norm = coef_norm
self.data = data
self.save_dir = cfg.save_dir
self.split = split
self.train_num_points_volume = cfg.train_num_points_volume
self.train_num_points_surface = cfg.train_num_points_surface
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
data = self.data[idx]
save_path = os.path.join(self.save_dir, data)
volume_points = torch.from_numpy(np.load(os.path.join(save_path, 'volume_points.npy'))).float()
volume_velocities = torch.from_numpy(np.load(os.path.join(save_path, 'volume_velocities.npy'))).float()
surface_points = torch.from_numpy(np.load(os.path.join(save_path, 'surface_points.npy'))).float()
surface_pressure = torch.from_numpy(np.load(os.path.join(save_path, 'surface_pressure.npy'))).float()
# Move coef_norm to the same device as the data
device = volume_points.device
coef_norm_device = tuple(t.to(device) for t in self.coef_norm)
# Normalize data: (x - min) / (max - min) for points, (x - mean) / std for velocities and pressure
volume_points = ((volume_points - coef_norm_device[0]) / (coef_norm_device[1] - coef_norm_device[0]))
surface_points = ((surface_points - coef_norm_device[2]) / (coef_norm_device[3] - coef_norm_device[2]))
volume_velocities = ((volume_velocities - coef_norm_device[4]) / (coef_norm_device[5] + 1e-8))
surface_pressure = ((surface_pressure - coef_norm_device[6]) / (coef_norm_device[7] + 1e-8)).view(-1, 1)
if self.split == 'train':
indices_volume = torch.randperm(volume_points.shape[0])[:self.train_num_points_volume]
indices_surface = torch.randperm(surface_points.shape[0])[:self.train_num_points_surface]
volume_points = volume_points[indices_volume]
volume_velocities = volume_velocities[indices_volume]
surface_points = surface_points[indices_surface]
surface_pressure = surface_pressure[indices_surface]
vol_kv_indices = torch.arange(volume_points.shape[0])
surf_kv_indices = torch.arange(surface_points.shape[0])
else:
vol_kv_indices = torch.randperm(volume_points.shape[0])[:self.train_num_points_volume]
surf_kv_indices = torch.randperm(surface_points.shape[0])[:self.train_num_points_surface]
num_pressure_points = surface_points.shape[0]
num_volume_points = volume_points.shape[0]
points = torch.cat([volume_points, surface_points], dim=0)
features = torch.cat([volume_velocities, torch.zeros((num_pressure_points, 3))], dim=0)
features = torch.cat([features, torch.cat([torch.zeros((num_volume_points, 1)), surface_pressure], dim=0)], dim=1)
surf = torch.cat([torch.zeros(num_volume_points), torch.ones(num_pressure_points)]).bool()
data = {'input_pos': points, 'output_feat': features,'output_pos':points, 'surf': surf,
'input_pos_volume': volume_points, 'input_pos_surface': surface_points,
'output_feat_volume': volume_velocities, 'output_feat_surface': surface_pressure,
'output_pos_volume': volume_points, 'output_pos_surface': surface_points,
'vol_kv_indices': vol_kv_indices, 'surf_kv_indices': surf_kv_indices}
#input_pos_all, input_pos_surf, input_pos_ext, input_feat_all, input_feat_surf, input_feat_ext, output_feat_all, output_feat_surf, output_feat_ext
return data
def seed_worker(worker_id):
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)
def get_samples(root):
folds = [f'param{i}' for i in range(9)]
samples = []
for fold in folds:
fold_samples = []
files = os.listdir(os.path.join(root, fold))
for file in files:
path = os.path.join(root, os.path.join(fold, file))
if os.path.isdir(path):
fold_samples.append(os.path.join(fold, file))
samples.append(fold_samples)
return samples # 100 + 99 + 97 + 100 + 100 + 96 + 100 + 98 + 99 = 889 samples
def get_dataloaders(cfg):
samples = get_samples(cfg.data_dir)
trainlst = []
for i in range(len(samples)):
if i == cfg.fold_id:
continue
trainlst += samples[i]
vallst = samples[cfg.fold_id] if 0 <= cfg.fold_id < len(samples) else None
if not cfg.preprocessed:
preprocess_data(cfg.data_dir, trainlst, savedir=cfg.save_dir)
preprocess_data(cfg.data_dir, vallst, savedir=cfg.save_dir)
coef_norm = get_coef_norm(trainlst, savedir=cfg.save_dir)
train_dataset = ShapenetCarPVDataset(cfg, trainlst, split='train', coef_norm=coef_norm)
val_dataset = ShapenetCarPVDataset(cfg, vallst, split='val', coef_norm=coef_norm)
print('len(train_dataset)',len(train_dataset))
print('len(val_dataset)',len(val_dataset))
train_dataloader = DataLoader(
train_dataset, batch_size=cfg.batch_size, shuffle=True,
drop_last=True, num_workers=cfg.num_workers,
worker_init_fn=seed_worker, generator=g
)
val_dataloader = DataLoader(
val_dataset, batch_size=1, shuffle=False,
drop_last=False, num_workers=cfg.num_workers,
worker_init_fn=seed_worker, generator=g
)
return train_dataloader, val_dataloader