import pyvista as pv import numpy as np import matplotlib.pyplot as plt import os from umap.umap_ import UMAP # Correct import for umap-learn # Configuration dataset = "plane_transonic" train_folder = f'/raid/ansysai/pkakka/6-Transformers/comparePhysicsLM/Data/{dataset}/' train_save_dir = os.path.join(train_folder, f"../../metrics/{dataset}/") os.makedirs(train_save_dir, exist_ok=True) def normalize_points(points): """Center and scale points to unit bounding box.""" points = points - np.mean(points, axis=0) max_dist = np.max(np.linalg.norm(points, axis=1)) if max_dist > 0: points = points / max_dist return points def get_points(file_path, max_points=1000): """Extract and subsample point cloud from VTP file.""" try: mesh = pv.read(file_path) points = mesh.points if len(points) > max_points: indices = np.random.choice(len(points), max_points, replace=False) points = points[indices] return normalize_points(points).flatten() # Flatten to 1D for UMAP except Exception as e: raise ValueError(f"Error reading {file_path}: {e}") def compute_umap_embeddings(train_folder, output_file='umap_embeddings.npz'): """Compute UMAP embeddings for training and test VTPs.""" train_txt_path = os.path.join(train_folder, '1_VTK_surface/train.txt') test_txt_path = os.path.join(train_folder, '1_VTK_surface/test.txt') for path in [train_txt_path, test_txt_path]: if not os.path.exists(path): raise ValueError(f"{path} not found") with open(train_txt_path, 'r') as f: train_names = [line.strip() for line in f if line.strip()] with open(test_txt_path, 'r') as f: test_names = [line.strip() for line in f if line.strip()] train_files = [] for name in train_names: vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp') if os.path.exists(vtp_file): train_files.append((name, vtp_file)) else: print(f"Warning: Training VTP not found: {vtp_file}") test_files = [] for name in test_names: vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp') if os.path.exists(vtp_file): test_files.append((name, vtp_file)) else: print(f"Warning: Test VTP not found: {vtp_file}") if not train_files or not test_files: raise ValueError("No valid training or test VTPs found") train_features = [get_points(vtp) for _, vtp in train_files] test_features = [get_points(vtp) for _, vtp in test_files] all_features = train_features + test_features reducer = UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1) embeddings = reducer.fit_transform(all_features) train_embeddings = embeddings[:len(train_files)] test_embeddings = embeddings[len(train_files):] output_path = os.path.join(train_save_dir, output_file) np.savez(output_path, train_emb=train_embeddings, test_emb=test_embeddings, train_names=[n for n, _ in train_files], test_names=[n for n, _ in test_files]) print(f"UMAP embeddings saved: {output_path}") return train_embeddings, test_embeddings, [n for n, _ in train_files], [n for n, _ in test_files] def compute_umap_scores(train_embeddings, test_embeddings, test_names, output_file='test_umap_scores.txt'): """Compute similarity scores based on UMAP space distances.""" scores = [] output_path = os.path.join(train_save_dir, output_file) with open(output_path, 'w') as f: f.write("Test_File\tUMAP_Score\n") for i, (test_emb, name) in enumerate(zip(test_embeddings, test_names)): distances = np.linalg.norm(train_embeddings - test_emb, axis=1) median_dist = np.median(distances) score = 1 / (1 + median_dist) # 0-1, higher = closer scores.append(score) print(f"{name}: {score:.4f}") f.write(f"{name}\t{score:.6f}\n") if scores: print(f"\nAverage Score: {np.mean(scores):.4f} ± {np.std(scores):.4f}") print(f"Min/Max: {np.min(scores):.4f} / {np.max(scores):.4f}") plt.figure(figsize=(8, 6)) plt.scatter(train_embeddings[:, 0], train_embeddings[:, 1], c='blue', label='Train', alpha=0.6) plt.scatter(test_embeddings[:, 0], test_embeddings[:, 1], c='red', label='Test', alpha=0.6) plt.xlabel('UMAP 1') plt.ylabel('UMAP 2') plt.title('UMAP Embeddings of Geometries') plt.legend() plt.savefig(os.path.join(train_save_dir, 'umap_plot.png')) plt.close() return scores def visualize_sample(test_folder): """Plot the first test geometry.""" test_txt_path = os.path.join(test_folder, '1_VTK_surface/test.txt') with open(test_txt_path, 'r') as f: folder_names = [line.strip() for line in f if line.strip()] if folder_names: name = folder_names[0] vtp_file = os.path.join(test_folder, '1_VTK_surface', name, f'{name}.vtp') if os.path.exists(vtp_file): mesh = pv.read(vtp_file) plotter = pv.Plotter() plotter.add_mesh(mesh, color='blue', show_edges=True) plotter.add_title(f'Sample Geometry: {name}') plotter.show() # Run analysis if __name__ == "__main__": train_emb, test_emb, train_names, test_names = compute_umap_embeddings(train_folder) scores = compute_umap_scores(train_emb, test_emb, test_names) visualize_sample(train_folder)