Spaces:
Sleeping
Sleeping
File size: 5,614 Bytes
67fb03c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import pyvista as pv
import numpy as np
import matplotlib.pyplot as plt
import os
from umap.umap_ import UMAP # Correct import for umap-learn
# Configuration
dataset = "plane_transonic"
train_folder = f'/raid/ansysai/pkakka/6-Transformers/comparePhysicsLM/Data/{dataset}/'
train_save_dir = os.path.join(train_folder, f"../../metrics/{dataset}/")
os.makedirs(train_save_dir, exist_ok=True)
def normalize_points(points):
"""Center and scale points to unit bounding box."""
points = points - np.mean(points, axis=0)
max_dist = np.max(np.linalg.norm(points, axis=1))
if max_dist > 0:
points = points / max_dist
return points
def get_points(file_path, max_points=1000):
"""Extract and subsample point cloud from VTP file."""
try:
mesh = pv.read(file_path)
points = mesh.points
if len(points) > max_points:
indices = np.random.choice(len(points), max_points, replace=False)
points = points[indices]
return normalize_points(points).flatten() # Flatten to 1D for UMAP
except Exception as e:
raise ValueError(f"Error reading {file_path}: {e}")
def compute_umap_embeddings(train_folder, output_file='umap_embeddings.npz'):
"""Compute UMAP embeddings for training and test VTPs."""
train_txt_path = os.path.join(train_folder, '1_VTK_surface/train.txt')
test_txt_path = os.path.join(train_folder, '1_VTK_surface/test.txt')
for path in [train_txt_path, test_txt_path]:
if not os.path.exists(path):
raise ValueError(f"{path} not found")
with open(train_txt_path, 'r') as f:
train_names = [line.strip() for line in f if line.strip()]
with open(test_txt_path, 'r') as f:
test_names = [line.strip() for line in f if line.strip()]
train_files = []
for name in train_names:
vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
train_files.append((name, vtp_file))
else:
print(f"Warning: Training VTP not found: {vtp_file}")
test_files = []
for name in test_names:
vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
test_files.append((name, vtp_file))
else:
print(f"Warning: Test VTP not found: {vtp_file}")
if not train_files or not test_files:
raise ValueError("No valid training or test VTPs found")
train_features = [get_points(vtp) for _, vtp in train_files]
test_features = [get_points(vtp) for _, vtp in test_files]
all_features = train_features + test_features
reducer = UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1)
embeddings = reducer.fit_transform(all_features)
train_embeddings = embeddings[:len(train_files)]
test_embeddings = embeddings[len(train_files):]
output_path = os.path.join(train_save_dir, output_file)
np.savez(output_path, train_emb=train_embeddings, test_emb=test_embeddings,
train_names=[n for n, _ in train_files], test_names=[n for n, _ in test_files])
print(f"UMAP embeddings saved: {output_path}")
return train_embeddings, test_embeddings, [n for n, _ in train_files], [n for n, _ in test_files]
def compute_umap_scores(train_embeddings, test_embeddings, test_names, output_file='test_umap_scores.txt'):
"""Compute similarity scores based on UMAP space distances."""
scores = []
output_path = os.path.join(train_save_dir, output_file)
with open(output_path, 'w') as f:
f.write("Test_File\tUMAP_Score\n")
for i, (test_emb, name) in enumerate(zip(test_embeddings, test_names)):
distances = np.linalg.norm(train_embeddings - test_emb, axis=1)
median_dist = np.median(distances)
score = 1 / (1 + median_dist) # 0-1, higher = closer
scores.append(score)
print(f"{name}: {score:.4f}")
f.write(f"{name}\t{score:.6f}\n")
if scores:
print(f"\nAverage Score: {np.mean(scores):.4f} ± {np.std(scores):.4f}")
print(f"Min/Max: {np.min(scores):.4f} / {np.max(scores):.4f}")
plt.figure(figsize=(8, 6))
plt.scatter(train_embeddings[:, 0], train_embeddings[:, 1], c='blue', label='Train', alpha=0.6)
plt.scatter(test_embeddings[:, 0], test_embeddings[:, 1], c='red', label='Test', alpha=0.6)
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.title('UMAP Embeddings of Geometries')
plt.legend()
plt.savefig(os.path.join(train_save_dir, 'umap_plot.png'))
plt.close()
return scores
def visualize_sample(test_folder):
"""Plot the first test geometry."""
test_txt_path = os.path.join(test_folder, '1_VTK_surface/test.txt')
with open(test_txt_path, 'r') as f:
folder_names = [line.strip() for line in f if line.strip()]
if folder_names:
name = folder_names[0]
vtp_file = os.path.join(test_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
mesh = pv.read(vtp_file)
plotter = pv.Plotter()
plotter.add_mesh(mesh, color='blue', show_edges=True)
plotter.add_title(f'Sample Geometry: {name}')
plotter.show()
# Run analysis
if __name__ == "__main__":
train_emb, test_emb, train_names, test_names = compute_umap_embeddings(train_folder)
scores = compute_umap_scores(train_emb, test_emb, test_names)
visualize_sample(train_folder) |