File size: 5,614 Bytes
67fb03c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import pyvista as pv
import numpy as np
import matplotlib.pyplot as plt
import os
from umap.umap_ import UMAP  # Correct import for umap-learn

# Configuration
dataset = "plane_transonic"
train_folder = f'/raid/ansysai/pkakka/6-Transformers/comparePhysicsLM/Data/{dataset}/'
train_save_dir = os.path.join(train_folder, f"../../metrics/{dataset}/")
os.makedirs(train_save_dir, exist_ok=True)

def normalize_points(points):
    """Center and scale points to unit bounding box."""
    points = points - np.mean(points, axis=0)
    max_dist = np.max(np.linalg.norm(points, axis=1))
    if max_dist > 0:
        points = points / max_dist
    return points

def get_points(file_path, max_points=1000):
    """Extract and subsample point cloud from VTP file."""
    try:
        mesh = pv.read(file_path)
        points = mesh.points
        if len(points) > max_points:
            indices = np.random.choice(len(points), max_points, replace=False)
            points = points[indices]
        return normalize_points(points).flatten()  # Flatten to 1D for UMAP
    except Exception as e:
        raise ValueError(f"Error reading {file_path}: {e}")

def compute_umap_embeddings(train_folder, output_file='umap_embeddings.npz'):
    """Compute UMAP embeddings for training and test VTPs."""
    train_txt_path = os.path.join(train_folder, '1_VTK_surface/train.txt')
    test_txt_path = os.path.join(train_folder, '1_VTK_surface/test.txt')
    
    for path in [train_txt_path, test_txt_path]:
        if not os.path.exists(path):
            raise ValueError(f"{path} not found")
    
    with open(train_txt_path, 'r') as f:
        train_names = [line.strip() for line in f if line.strip()]
    with open(test_txt_path, 'r') as f:
        test_names = [line.strip() for line in f if line.strip()]
    
    train_files = []
    for name in train_names:
        vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
        if os.path.exists(vtp_file):
            train_files.append((name, vtp_file))
        else:
            print(f"Warning: Training VTP not found: {vtp_file}")
    
    test_files = []
    for name in test_names:
        vtp_file = os.path.join(train_folder, '1_VTK_surface', name, f'{name}.vtp')
        if os.path.exists(vtp_file):
            test_files.append((name, vtp_file))
        else:
            print(f"Warning: Test VTP not found: {vtp_file}")
    
    if not train_files or not test_files:
        raise ValueError("No valid training or test VTPs found")
    
    train_features = [get_points(vtp) for _, vtp in train_files]
    test_features = [get_points(vtp) for _, vtp in test_files]
    all_features = train_features + test_features
    
    reducer = UMAP(n_components=2, random_state=42, n_neighbors=15, min_dist=0.1)
    embeddings = reducer.fit_transform(all_features)
    
    train_embeddings = embeddings[:len(train_files)]
    test_embeddings = embeddings[len(train_files):]
    
    output_path = os.path.join(train_save_dir, output_file)
    np.savez(output_path, train_emb=train_embeddings, test_emb=test_embeddings, 
             train_names=[n for n, _ in train_files], test_names=[n for n, _ in test_files])
    print(f"UMAP embeddings saved: {output_path}")
    
    return train_embeddings, test_embeddings, [n for n, _ in train_files], [n for n, _ in test_files]

def compute_umap_scores(train_embeddings, test_embeddings, test_names, output_file='test_umap_scores.txt'):
    """Compute similarity scores based on UMAP space distances."""
    scores = []
    output_path = os.path.join(train_save_dir, output_file)
    
    with open(output_path, 'w') as f:
        f.write("Test_File\tUMAP_Score\n")
        for i, (test_emb, name) in enumerate(zip(test_embeddings, test_names)):
            distances = np.linalg.norm(train_embeddings - test_emb, axis=1)
            median_dist = np.median(distances)
            score = 1 / (1 + median_dist)  # 0-1, higher = closer
            scores.append(score)
            print(f"{name}: {score:.4f}")
            f.write(f"{name}\t{score:.6f}\n")
    
    if scores:
        print(f"\nAverage Score: {np.mean(scores):.4f} ± {np.std(scores):.4f}")
        print(f"Min/Max: {np.min(scores):.4f} / {np.max(scores):.4f}")
        
        plt.figure(figsize=(8, 6))
        plt.scatter(train_embeddings[:, 0], train_embeddings[:, 1], c='blue', label='Train', alpha=0.6)
        plt.scatter(test_embeddings[:, 0], test_embeddings[:, 1], c='red', label='Test', alpha=0.6)
        plt.xlabel('UMAP 1')
        plt.ylabel('UMAP 2')
        plt.title('UMAP Embeddings of Geometries')
        plt.legend()
        plt.savefig(os.path.join(train_save_dir, 'umap_plot.png'))
        plt.close()
    
    return scores

def visualize_sample(test_folder):
    """Plot the first test geometry."""
    test_txt_path = os.path.join(test_folder, '1_VTK_surface/test.txt')
    with open(test_txt_path, 'r') as f:
        folder_names = [line.strip() for line in f if line.strip()]
    
    if folder_names:
        name = folder_names[0]
        vtp_file = os.path.join(test_folder, '1_VTK_surface', name, f'{name}.vtp')
        if os.path.exists(vtp_file):
            mesh = pv.read(vtp_file)
            plotter = pv.Plotter()
            plotter.add_mesh(mesh, color='blue', show_edges=True)
            plotter.add_title(f'Sample Geometry: {name}')
            plotter.show()

# Run analysis
if __name__ == "__main__":
    train_emb, test_emb, train_names, test_names = compute_umap_embeddings(train_folder)
    scores = compute_umap_scores(train_emb, test_emb, test_names)
    visualize_sample(train_folder)