added structured clustering

2019-08-12 19:13:04 +02:00 · 2019-08-12 19:13:04 +02:00 · 557d49fefc
commit 557d49fefc
parent e4cc447f68
2 changed files with 198 additions and 258 deletions
--- a/pointcloud.py
+++ b/pointcloud.py
@ -37,7 +37,7 @@ def mini_color_table(index, norm=True):
    return color
-def clusterToColor(cluster, cluster_idx):
+def cluster2Color(cluster, cluster_idx):
    colors = np.zeros(shape=(len(cluster), 3))
    point_idx = 0
@ -48,6 +48,21 @@ def clusterToColor(cluster, cluster_idx):
    return colors
 def label2color(labels):
    '''
    labels: np.ndarray with shape (n, )
    colors(return): np.ndarray with shape (n, 3)
    '''
    num = labels.shape[0]
    colors = np.zeros((num, 3))
    minl, maxl = np.min(labels), np.max(labels)
    for l in range(minl, maxl + 1):
        colors[labels == l, :] = mini_color_table(l)
    return colors
 def read_pointcloud(path, delimiter=' ', hasHeader=True):
    with open(path, 'r') as f:
        if hasHeader:
@ -174,7 +189,7 @@ def draw_clusters(clusters):
        cloud = o3d.PointCloud()
        cloud.points = o3d.Vector3dVector(cluster[:,:3])
-        cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
+        cloud.colors = o3d.Vector3dVector(cluster2Color(cluster, cluster_idx))
        clouds.append(cloud)
    o3d.draw_geometries(clouds)
@ -186,14 +201,26 @@ def write_clusters(path, clusters, type_column=6):
    file.write(str(len(clusters)) + "\n")
    for cluster in clusters:
-        print("Types: ", cluster[:, type_column])
+        # print("Types: ", cluster[:, type_column])
-        types = np.unique(cluster[:, type_column], axis=0)
+        types = np.unique(cluster[:, type_column], axis=0).astype(int)
-        np.savetxt(file, types, header='', comments='')
+        print("Types: ", types)
        np.savetxt(file, types, header='', comments='', fmt='%i')
        np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='')
 def draw_sample_data(sample_data, colored_normals = False):
    cloud = o3d.PointCloud()
    cloud.points = o3d.Vector3dVector(sample_data[:, :3])
    cloud.colors = \
        o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
    o3d.draw_geometries([cloud])
 def normalize_pointcloud(pc, factor=1.0):
    max = pc.max(axis=0)
@ -221,12 +248,12 @@ def hierarchical_clustering(data, selected_indices_0, selected_indices_1, eps, m
    return total_clusters
-def filter_clusters(clusters, min_size):
+def filter_clusters(clusters, filter):
    filtered_clusters = []
    for c in clusters:
-        if len(c) >= min_size:
+        if filter(c):
            filtered_clusters.append(c)
    return filtered_clusters
@ -238,4 +265,13 @@ def split_outliers(pc, columns):
    # LOF, kNN
-    return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
+    return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
 def append_onehotencoded_type(data, factor = 1.0):
    types = data[:, 6].astype(int)
    res = np.zeros((len(types), 4))
    res[np.arange(len(types)), types] = factor
    return np.column_stack((data, res))
--- a/predict/predict.py
+++ b/predict/predict.py
@ -2,21 +2,19 @@ import sys
 import os
 import shutil
 import math
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
 from dataset.shapenet import ShapeNetPartSegDataset
 from model.pointnet2_part_seg import PointNet2PartSegmentNet
 import torch_geometric.transforms as GT
 import torch
 import argparse
 from distutils.util import strtobool
 import numpy as np
 from sklearn.cluster import DBSCAN
 from sklearn.preprocessing import StandardScaler
 import open3d as o3d
 import pointcloud as pc
 import operator
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
 def eval_sample(net, sample):
    '''
@ -42,78 +40,6 @@ def eval_sample(net, sample):
        return (pred_label, gt_label)
 def mini_color_table(index, norm=True):
    colors = [
        [0.5000, 0.5400, 0.5300], [0.8900, 0.1500, 0.2100], [0.6400, 0.5800, 0.5000],
        [1.0000, 0.3800, 0.0100], [1.0000, 0.6600, 0.1400], [0.4980, 1.0000, 0.0000],
        [0.4980, 1.0000, 0.8314], [0.9412, 0.9725, 1.0000], [0.5412, 0.1686, 0.8863],
        [0.5765, 0.4392, 0.8588], [0.3600, 0.1400, 0.4300], [0.5600, 0.3700, 0.6000],
    ]
    color = colors[index % len(colors)]
    if not norm:
        color[0] *= 255
        color[1] *= 255
        color[2] *= 255
    return color
 def label2color(labels):
    '''
    labels: np.ndarray with shape (n, )
    colors(return): np.ndarray with shape (n, 3)
    '''
    num = labels.shape[0]
    colors = np.zeros((num, 3))
    minl, maxl = np.min(labels), np.max(labels)
    for l in range(minl, maxl + 1):
        colors[labels == l, :] = mini_color_table(l)
    return colors
 def clusterToColor(cluster, cluster_idx):
    colors = np.zeros(shape=(len(cluster), 3))
    point_idx = 0
    for point in cluster:
        colors[point_idx, :] = mini_color_table(cluster_idx)
        point_idx += 1
    return colors
 def farthest_point_sampling(pts, K):
    if pts.shape[0] < K:
        return pts
    def calc_distances(p0, points):
        return ((p0[:3] - points[:, :3]) ** 2).sum(axis=1)
    farthest_pts = np.zeros((K, pts.shape[1]))
    farthest_pts[0] = pts[np.random.randint(len(pts))]
    distances = calc_distances(farthest_pts[0], pts)
    for i in range(1, K):
        farthest_pts[i] = pts[np.argmax(distances)]
        distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
    return farthest_pts
 def append_onehotencoded_type(data, factor = 1.0):
    types = data[:, 6].astype(int)
    res = np.zeros((len(types), 4))
    res[np.arange(len(types)), types] = factor
    return np.column_stack((data, res))
 def append_normal_angles(data):
    def func(x):
@ -128,64 +54,6 @@ def append_normal_angles(data):
    return np.column_stack((data, res))
 def extract_clusters(data, selected_indices, eps, min_samples, metric='euclidean', algo='auto'):
    min_samples = min_samples * len(data)
    print('Clustering. Min Samples: ' + str(min_samples) + ' EPS: ' + str(eps))
    # 0,1,2 :   pos
    # 3,4,5 :   normal
    # 6:        type index
    # 7,8,9,10: type index one hot encoded
    # 11,12:    normal as angles
    db_res = DBSCAN(eps=eps, metric=metric, n_jobs=-1, algorithm=algo, min_samples=min_samples).fit(data[:, selected_indices])
    labels = db_res.labels_
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise = list(labels).count(-1)
    print("Noise: " + str(n_noise) + " Clusters: " + str(n_clusters))
    clusters = {}
    for idx, l in enumerate(labels):
        if l is -1:
            continue
        clusters.setdefault(str(l), []).append(data[idx, :])
    npClusters = []
    for cluster in clusters.values():
        npClusters.append(np.array(cluster))
    return npClusters
 def draw_clusters(clusters):
    clouds = []
    cluster_idx = 0
    for cluster in clusters:
        cloud = o3d.PointCloud()
        cloud.points = o3d.Vector3dVector(cluster[:,:3])
        cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
        clouds.append(cloud)
        cluster_idx += 1
    o3d.draw_geometries(clouds)
 def draw_sample_data(sample_data, colored_normals = False):
    cloud = o3d.PointCloud()
    cloud.points = o3d.Vector3dVector(sample_data[:, :3])
    cloud.colors = \
        o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
    o3d.draw_geometries([cloud])
 def recreate_folder(folder):
    if os.path.exists(folder) and os.path.isdir(folder):
        shutil.rmtree(folder)
@ -194,8 +62,8 @@ def recreate_folder(folder):
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
 parser = argparse.ArgumentParser()
-parser.add_argument('--npoints', type=int, default=2048, help='resample points number')
+parser.add_argument('--npoints', type=int, default=4096, help='resample points number')
-parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_3.pth', help='model path')
+parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path')
 parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
 parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
 parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals')
@ -203,139 +71,175 @@ parser.add_argument('--collate_per_segment', type=strtobool, default=True, help=
 parser.add_argument('--has_variations', type=strtobool, default=False,
                    help='whether a single pointcloud has variations '
                         'named int(id)_pc.(xyz|dat) look at pointclouds or sub')
 opt = parser.parse_args()
 print(opt)
 if __name__ == '__main__':
-    # Create dataset
+    # # ------------------------------------------------------------------------------------------------------------------
-    print('Create data set ..')
+    # # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction
    # # ------------------------------------------------------------------------------------------------------------------
    #
    # # Create dataset
    # print('Create data set ..')
    #
    # dataset_folder = './data/raw/predict/'
    # pointcloud_file = './pointclouds/m3.xyz'
    #
    # # Load and pre-process point cloud
    # pcloud = pc.read_pointcloud(pointcloud_file)
    # pcloud = pc.normalize_pointcloud(pcloud, 1)
    #
    # #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
    # #                                         selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
    #
    # pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
    # print("Pre-Processing: Clustering")
    # pc.draw_clusters(pc_clusters)
    #
    # recreate_folder(dataset_folder)
    # for idx, pcc in enumerate(pc_clusters):
    #
    #     pcc = pc.farthest_point_sampling(pcc, opt.npoints)
    #     recreate_folder(dataset_folder + str(idx) + '/')
    #     pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
    #     #draw_sample_data(pcc, True)
    #
    # # ------------------------------------------------------------------------------------------------------------------
    # # Load point cloud clusters and model.
    # # ------------------------------------------------------------------------------------------------------------------
    #
    # # Load dataset
    # print('load dataset ..')
    # test_transform = GT.Compose([GT.NormalizeScale(), ])
    #
    # test_dataset = ShapeNetPartSegDataset(
    #     mode='predict',
    #     root_dir='data',
    #     with_normals=opt.with_normals,
    #     npoints=opt.npoints,
    #     refresh=True,
    #     collate_per_segment=opt.collate_per_segment,
    #     has_variations=opt.has_variations,
    #     headers=opt.headers
    # )
    # num_classes = test_dataset.num_classes()
    #
    # # Load model
    # print('Construct model ..')
    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # dtype = torch.float
    #
    # net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
    # net.load_state_dict(torch.load(opt.model, map_location=device.type))
    # net = net.to(device, dtype)
    # net.eval()
    #
    # # ------------------------------------------------------------------------------------------------------------------
    # # Predict per cluster.
    # # ------------------------------------------------------------------------------------------------------------------
    #
    # labeled_dataset = None
    # result_clusters = []
    #
    # # Iterate over all the samples and predict
    # for idx, sample in enumerate(test_dataset):
    #
    #     predicted_label, _ = eval_sample(net, sample)
    #     if opt.with_normals:
    #         sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
    #     else:
    #         sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
    #
    #     result_clusters.append(sample_data)
    #
    #     if labeled_dataset is None:
    #         labeled_dataset = sample_data
    #     else:
    #         labeled_dataset = np.vstack((labeled_dataset, sample_data))
    #
    #     print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
    #
    # # ------------------------------------------------------------------------------------------------------------------
    # # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
    # # ------------------------------------------------------------------------------------------------------------------
    #
    # min_cluster_size = 10
    # contamination = 0.01
    #
    # filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
    # type_filtered_clusters = []
    # for c in filtered_clusters:
    #
    #     prim_types = np.unique(c[:, 6])
    #     pt_count = {}
    #     for pt in prim_types:
    #         pt_count[pt] = len(c[c[:, 6] == pt])
    #
    #     max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
    #     min_size = pt_count[max_pt] * contamination
    #
    #     valid_types = []
    #     for pt in prim_types:
    #         if pt_count[pt] > min_size:
    #             valid_types.append(pt)
    #
    #     filtered_c = c[np.isin(c[:, 6], valid_types)]
    #     type_filtered_clusters.append(filtered_c)
    # result_clusters = type_filtered_clusters
    #
    # labeled_dataset = np.vstack(result_clusters)
    #
    # np.savetxt('labeled_dataset.txt', labeled_dataset)
-    dataset_folder = './data/raw/predict/'
+    # ------------------------------------------------------------------------------------------------------------------
-    pointcloud_file = './pointclouds/0_0.xyz'
+    # Clustering that results in per-primitive type clusters
    # ------------------------------------------------------------------------------------------------------------------
-    # Load and pre-process point cloud
+    labeled_dataset = np.loadtxt('labeled_dataset.txt')
-    pcloud = pc.read_pointcloud(pointcloud_file)
+    # pc.draw_sample_data(labeled_dataset)
    pcloud = pc.normalize_pointcloud(pcloud, 1)
    #a, b = pc.split_outliers(pcloud, [3, 4, 5])
    #draw_sample_data(a, True)
    #draw_sample_data(b, True)
    #pcloud = a
-    # for 0_0.xyz: pc.hierarchical_clustering(pcloud, [0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
+    # TODO: Filter labeled dataset to get rid of edge clusters
    print("Final clustering..")
-    #pc_clusters = pc.cluster_dbscan(pcloud, [0, 1, 2, 3,4,5], eps=0.5, min_samples=5)
+    labeled_dataset = pc.append_onehotencoded_type(labeled_dataset)
    #pc_clusters = pc.filter_clusters(pc_clusters, 100)
-    #pc_clusters = [pcloud]
+    print("Test row: ", labeled_dataset[:1, :])
-    #print("NUM CLUSTERS: ", len(pc_clusters))
+    total_clusters = []
-    #draw_clusters(pc_clusters)
+    clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50)
-    #for c in pc_clusters:
+    print("Pre-clustering done. Clusters: ", len(clusters))
-        #draw_sample_data(c, True)
+    pc.draw_clusters(clusters)
    #    print("Cluster Size: ", len(c))
    for cluster in clusters:
        #cluster = pc.normalize_pointcloud(cluster)
        print("2nd level clustering ..")
-    # draw_sample_data(pcloud)
+        prim_types_in_cluster = len(np.unique(cluster[:, 6], axis=0))
-
+        if prim_types_in_cluster == 1:
-    pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
+            print("No need for 2nd level clustering since there is only a single primitive type in the cluster.")
-                                             selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
+            total_clusters.append(cluster)
    # pc.cluster_cubes(pcloud, [4, 4, 4])
    recreate_folder(dataset_folder)
    for idx, pcc in enumerate(pc_clusters):
        pcc = farthest_point_sampling(pcc, opt.npoints)
        recreate_folder(dataset_folder + str(idx) + '/')
        pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
        # draw_sample_data(pcc, False)
    # Load dataset
    print('load dataset ..')
    test_transform = GT.Compose([GT.NormalizeScale(), ])
    test_dataset = ShapeNetPartSegDataset(
        mode='predict',
        root_dir='data',
        with_normals=opt.with_normals,
        npoints=opt.npoints,
        refresh=True,
        collate_per_segment=opt.collate_per_segment,
        has_variations=opt.has_variations,
        headers=opt.headers
    )
    num_classes = test_dataset.num_classes()
    # Load model
    print('Construct model ..')
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    dtype = torch.float
    # net = PointNetPartSegmentNet(num_classes)
    net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
    net.load_state_dict(torch.load(opt.model, map_location=device.type))
    net = net.to(device, dtype)
    net.eval()
    labeled_dataset = None
    result_clusters = []
    # Iterate over all the samples and predict
    for sample in test_dataset:
        # Predict
        pred_label, gt_label = eval_sample(net, sample)
        if opt.with_normals:
            sample_data = np.column_stack((sample["points"].numpy(), pred_label.numpy()))
        else:
-            sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], pred_label.numpy()))
+            sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50)
            print("Sub clusters: ", len(sub_clusters))
            total_clusters.extend(sub_clusters)
-        # draw_sample_data(sample_data, False)
+    result_clusters = list(filter(lambda c: c.shape[0] > 100, total_clusters))
-        #print("Sample Datat: ", sample_data[:5, :])
+    for cluster in result_clusters:
-        #print('Eval done.')
+        print("Cluster: ", cluster.shape[0])
        print("PRED LABEL: ", pred_label)
-        #sample_data = normalize_pointcloud(sample_data)
+        pc.draw_sample_data(cluster, False)
        #sample_data = append_onehotencoded_type(sample_data, 1.0)
        #sample_data = append_normal_angles(sample_data)
-        # print('Clustering ..')
+    print("Number of clusters: ", len(result_clusters))
        # print('Shape: ' + str(sample_data.shape))
-        # clusters = extract_clusters(sample_data, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10], eps=0.1, min_samples=0.0001, metric='euclidean', algo='auto')
+    pc.draw_clusters(result_clusters)
        # print('Clustering done. ' + str(len(clusters)) + " Clusters.")
        # print(sample_data[:, 6])
        # draw_sample_data(sample_data, False)
        # result_clusters.extend(clusters)
        result_clusters.append(sample_data)
        if labeled_dataset is None:
            labeled_dataset = sample_data
        else:
            labeled_dataset = np.vstack((labeled_dataset, sample_data))
        print("prediction done")
    draw_sample_data(labeled_dataset, False)
    print("point cloud size: ", labeled_dataset.shape)
    print("Min: ", np.min(labeled_dataset[:, :3]))
    print("Max: ", np.max(labeled_dataset[:, :3]))
    print("Min: ", np.min(pcloud[:, :3]))
    print("Max: ", np.max(pcloud[:, :3]))
    # TODO: Take result clusters and cluster them by primitive type.
    # ------------------------------------------------------------------------------------------------------------------
    # Write clusters to file.
    # ------------------------------------------------------------------------------------------------------------------
    pc.write_clusters("clusters.txt", result_clusters)