udbhav
Recreate Trame_app branch with clean history
67fb03c
import pyvista as pv
import numpy as np
import matplotlib.pyplot as plt
import os
from umap.umap_ import UMAP # Correct import for umap-learn
# Configuration
dataset = "plane_transonic"
train_folder = f'/raid/ansysai/pkakka/6-Transformers/comparePhysicsLM/Data/{dataset}/'
train_save_dir = os.path.join(train_folder, f"../../metrics/{dataset}/")
os.makedirs(train_save_dir, exist_ok=True)
def normalize_points(points):
"""Center and scale points to unit bounding box."""
points = points - np.mean(points, axis=0)
max_dist = np.max(np.linalg.norm(points, axis=1))
if max_dist > 0:
points = points / max_dist
return points
def get_points(file_path, max_points=1000):
"""Extract and subsample point cloud from VTP file."""
try:
mesh = pv.read(file_path)
points = mesh.points
if len(points) > max_points:
indices = np.random.choice(len(points), max_points, replace=False)
points = points[indices]
return normalize_points(points).flatten() # Flatten to 1D for UMAP
except Exception as e:
raise ValueError(f"Error reading {file_path}: {e}")
def compute_umap_embeddings(train_folder, output_file='umap_embeddings.npz'):
"""Compute UMAP embeddings for training and test VTPs."""
train_txt_path = os.path.join(train_folder, '1_VTK_surface/train.txt')
test_txt_path = os.path.join(train_folder, '1_VTK_surface/test.txt')
for path in [train_txt_path, test_txt_path]:
if not os.path.exists(path):
raise ValueError(f"{path} not found")
with open(train_txt_path, 'r') as f:
train_names = [line.strip() for line in f if line.strip()]
with open(test_txt_path, 'r') as f:
test_names = [line.strip() for line in f if line.strip()]
train_files = []
for name in train_names:
vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
train_files.append((name, vtp_file))
else:
print(f"Warning: Training VTP not found: {vtp_file}")
test_files = []
for name in test_names:
vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
test_files.append((name, vtp_file))
else:
print(f"Warning: Test VTP not found: {vtp_file}")
if not train_files or not test_files:
raise ValueError("No valid training or test VTPs found")
train_features = [get_points(vtp) for _, vtp in train_files]
test_features = [get_points(vtp) for _, vtp in test_files]
all_features = train_features + test_features
reducer = UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1)
embeddings = reducer.fit_transform(all_features)
train_embeddings = embeddings[:len(train_files)]
test_embeddings = embeddings[len(train_files):]
output_path = os.path.join(train_save_dir, output_file)
np.savez(output_path, train_emb=train_embeddings, test_emb=test_embeddings,
train_names=[n for n, _ in train_files], test_names=[n for n, _ in test_files])
print(f"UMAP embeddings saved: {output_path}")
return train_embeddings, test_embeddings, [n for n, _ in train_files], [n for n, _ in test_files]
def compute_umap_scores(train_embeddings, test_embeddings, test_names, output_file='test_umap_scores.txt'):
"""Compute similarity scores based on UMAP space distances."""
scores = []
output_path = os.path.join(train_save_dir, output_file)
with open(output_path, 'w') as f:
f.write("Test_File\tUMAP_Score\n")
for i, (test_emb, name) in enumerate(zip(test_embeddings, test_names)):
distances = np.linalg.norm(train_embeddings - test_emb, axis=1)
median_dist = np.median(distances)
score = 1 / (1 + median_dist) # 0-1, higher = closer
scores.append(score)
print(f"{name}: {score:.4f}")
f.write(f"{name}\t{score:.6f}\n")
if scores:
print(f"\nAverage Score: {np.mean(scores):.4f} ± {np.std(scores):.4f}")
print(f"Min/Max: {np.min(scores):.4f} / {np.max(scores):.4f}")
plt.figure(figsize=(8, 6))
plt.scatter(train_embeddings[:, 0], train_embeddings[:, 1], c='blue', label='Train', alpha=0.6)
plt.scatter(test_embeddings[:, 0], test_embeddings[:, 1], c='red', label='Test', alpha=0.6)
plt.xlabel('UMAP 1')
plt.ylabel('UMAP 2')
plt.title('UMAP Embeddings of Geometries')
plt.legend()
plt.savefig(os.path.join(train_save_dir, 'umap_plot.png'))
plt.close()
return scores
def visualize_sample(test_folder):
"""Plot the first test geometry."""
test_txt_path = os.path.join(test_folder, '1_VTK_surface/test.txt')
with open(test_txt_path, 'r') as f:
folder_names = [line.strip() for line in f if line.strip()]
if folder_names:
name = folder_names[0]
vtp_file = os.path.join(test_folder, '1_VTK_surface', name, f'{name}.vtp')
if os.path.exists(vtp_file):
mesh = pv.read(vtp_file)
plotter = pv.Plotter()
plotter.add_mesh(mesh, color='blue', show_edges=True)
plotter.add_title(f'Sample Geometry: {name}')
plotter.show()
# Run analysis
if __name__ == "__main__":
train_emb, test_emb, train_names, test_names = compute_umap_embeddings(train_folder)
scores = compute_umap_scores(train_emb, test_emb, test_names)
visualize_sample(train_folder)