From 557d49fefcaf4d4f0745f7c57f355a19b8b7324b Mon Sep 17 00:00:00 2001
From: Markus Friedrich <markus.friedrich@ifi.lmu.de>
Date: Mon, 12 Aug 2019 19:13:04 +0200
Subject: [PATCH] added structured clustering

---
 pointcloud.py      |  52 +++++-
 predict/predict.py | 404 +++++++++++++++++----------------------------
 2 files changed, 198 insertions(+), 258 deletions(-)

diff --git a/pointcloud.py b/pointcloud.py
index beea392..53f7d29 100644
--- a/pointcloud.py
+++ b/pointcloud.py
@@ -37,7 +37,7 @@ def mini_color_table(index, norm=True):
     return color
 
 
-def clusterToColor(cluster, cluster_idx):
+def cluster2Color(cluster, cluster_idx):
 
     colors = np.zeros(shape=(len(cluster), 3))
     point_idx = 0
@@ -48,6 +48,21 @@ def clusterToColor(cluster, cluster_idx):
     return colors
 
 
+def label2color(labels):
+    '''
+    labels: np.ndarray with shape (n, )
+    colors(return): np.ndarray with shape (n, 3)
+    '''
+    num = labels.shape[0]
+    colors = np.zeros((num, 3))
+
+    minl, maxl = np.min(labels), np.max(labels)
+    for l in range(minl, maxl + 1):
+        colors[labels == l, :] = mini_color_table(l)
+
+    return colors
+
+
 def read_pointcloud(path, delimiter=' ', hasHeader=True):
     with open(path, 'r') as f:
         if hasHeader:
@@ -174,7 +189,7 @@ def draw_clusters(clusters):
 
         cloud = o3d.PointCloud()
         cloud.points = o3d.Vector3dVector(cluster[:,:3])
-        cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
+        cloud.colors = o3d.Vector3dVector(cluster2Color(cluster, cluster_idx))
         clouds.append(cloud)
 
     o3d.draw_geometries(clouds)
@@ -186,14 +201,26 @@ def write_clusters(path, clusters, type_column=6):
     file.write(str(len(clusters)) + "\n")
 
     for cluster in clusters:
-        print("Types: ", cluster[:, type_column])
+        # print("Types: ", cluster[:, type_column])
 
-        types = np.unique(cluster[:, type_column], axis=0)
+        types = np.unique(cluster[:, type_column], axis=0).astype(int)
 
-        np.savetxt(file, types, header='', comments='')
+        print("Types: ", types)
+
+        np.savetxt(file, types, header='', comments='', fmt='%i')
         np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='')
 
 
+def draw_sample_data(sample_data, colored_normals = False):
+
+    cloud = o3d.PointCloud()
+    cloud.points = o3d.Vector3dVector(sample_data[:, :3])
+    cloud.colors = \
+        o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
+
+    o3d.draw_geometries([cloud])
+
+
 def normalize_pointcloud(pc, factor=1.0):
 
     max = pc.max(axis=0)
@@ -221,12 +248,12 @@ def hierarchical_clustering(data, selected_indices_0, selected_indices_1, eps, m
     return total_clusters
 
 
-def filter_clusters(clusters, min_size):
+def filter_clusters(clusters, filter):
 
     filtered_clusters = []
 
     for c in clusters:
-        if len(c) >= min_size:
+        if filter(c):
             filtered_clusters.append(c)
 
     return filtered_clusters
@@ -238,4 +265,13 @@ def split_outliers(pc, columns):
 
     # LOF, kNN
 
-    return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
\ No newline at end of file
+    return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
+
+
+def append_onehotencoded_type(data, factor = 1.0):
+
+    types = data[:, 6].astype(int)
+    res = np.zeros((len(types), 4))
+    res[np.arange(len(types)), types] = factor
+
+    return np.column_stack((data, res))
diff --git a/predict/predict.py b/predict/predict.py
index 12849f1..e926f99 100644
--- a/predict/predict.py
+++ b/predict/predict.py
@@ -2,21 +2,19 @@ import sys
 import os
 import shutil
 import math
-
-sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
-
 from dataset.shapenet import ShapeNetPartSegDataset
 from model.pointnet2_part_seg import PointNet2PartSegmentNet
 import torch_geometric.transforms as GT
 import torch
 import argparse
 from distutils.util import strtobool
-
 import numpy as np
-from sklearn.cluster import DBSCAN
-from sklearn.preprocessing import StandardScaler
-import open3d as o3d
 import pointcloud as pc
+import operator
+
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
+
 
 def eval_sample(net, sample):
     '''
@@ -42,78 +40,6 @@ def eval_sample(net, sample):
         return (pred_label, gt_label)
 
 
-def mini_color_table(index, norm=True):
-    colors = [
-        [0.5000, 0.5400, 0.5300], [0.8900, 0.1500, 0.2100], [0.6400, 0.5800, 0.5000],
-        [1.0000, 0.3800, 0.0100], [1.0000, 0.6600, 0.1400], [0.4980, 1.0000, 0.0000],
-        [0.4980, 1.0000, 0.8314], [0.9412, 0.9725, 1.0000], [0.5412, 0.1686, 0.8863],
-        [0.5765, 0.4392, 0.8588], [0.3600, 0.1400, 0.4300], [0.5600, 0.3700, 0.6000],
-    ]
-
-    color = colors[index % len(colors)]
-
-    if not norm:
-        color[0] *= 255
-        color[1] *= 255
-        color[2] *= 255
-
-    return color
-
-
-def label2color(labels):
-    '''
-    labels: np.ndarray with shape (n, )
-    colors(return): np.ndarray with shape (n, 3)
-    '''
-    num = labels.shape[0]
-    colors = np.zeros((num, 3))
-
-    minl, maxl = np.min(labels), np.max(labels)
-    for l in range(minl, maxl + 1):
-        colors[labels == l, :] = mini_color_table(l)
-
-    return colors
-
-
-def clusterToColor(cluster, cluster_idx):
-
-    colors = np.zeros(shape=(len(cluster), 3))
-    point_idx = 0
-    for point in cluster:
-        colors[point_idx, :] = mini_color_table(cluster_idx)
-        point_idx += 1
-
-    return colors
-
-
-
-def farthest_point_sampling(pts, K):
-
-    if pts.shape[0] < K:
-        return pts
-
-    def calc_distances(p0, points):
-        return ((p0[:3] - points[:, :3]) ** 2).sum(axis=1)
-
-    farthest_pts = np.zeros((K, pts.shape[1]))
-    farthest_pts[0] = pts[np.random.randint(len(pts))]
-    distances = calc_distances(farthest_pts[0], pts)
-    for i in range(1, K):
-        farthest_pts[i] = pts[np.argmax(distances)]
-        distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
-
-    return farthest_pts
-
-
-def append_onehotencoded_type(data, factor = 1.0):
-
-    types = data[:, 6].astype(int)
-    res = np.zeros((len(types), 4))
-    res[np.arange(len(types)), types] = factor
-
-    return np.column_stack((data, res))
-
-
 def append_normal_angles(data):
 
     def func(x):
@@ -128,64 +54,6 @@ def append_normal_angles(data):
     return np.column_stack((data, res))
 
 
-def extract_clusters(data, selected_indices, eps, min_samples, metric='euclidean', algo='auto'):
-
-    min_samples = min_samples * len(data)
-
-    print('Clustering. Min Samples: ' + str(min_samples) + ' EPS: ' + str(eps))
-
-    # 0,1,2 :   pos
-    # 3,4,5 :   normal
-    # 6:        type index
-    # 7,8,9,10: type index one hot encoded
-    # 11,12:    normal as angles
-
-    db_res = DBSCAN(eps=eps, metric=metric, n_jobs=-1, algorithm=algo, min_samples=min_samples).fit(data[:, selected_indices])
-
-    labels = db_res.labels_
-    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
-    n_noise = list(labels).count(-1)
-    print("Noise: " + str(n_noise) + " Clusters: " + str(n_clusters))
-
-    clusters = {}
-    for idx, l in enumerate(labels):
-        if l is -1:
-            continue
-        clusters.setdefault(str(l), []).append(data[idx, :])
-
-    npClusters = []
-    for cluster in clusters.values():
-        npClusters.append(np.array(cluster))
-
-    return npClusters
-
-
-def draw_clusters(clusters):
-
-    clouds = []
-
-    cluster_idx = 0
-    for cluster in clusters:
-
-        cloud = o3d.PointCloud()
-        cloud.points = o3d.Vector3dVector(cluster[:,:3])
-        cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
-        clouds.append(cloud)
-        cluster_idx += 1
-
-    o3d.draw_geometries(clouds)
-
-
-def draw_sample_data(sample_data, colored_normals = False):
-
-    cloud = o3d.PointCloud()
-    cloud.points = o3d.Vector3dVector(sample_data[:, :3])
-    cloud.colors = \
-        o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
-
-    o3d.draw_geometries([cloud])
-
-
 def recreate_folder(folder):
     if os.path.exists(folder) and os.path.isdir(folder):
         shutil.rmtree(folder)
@@ -194,8 +62,8 @@ def recreate_folder(folder):
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--npoints', type=int, default=2048, help='resample points number')
-parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_3.pth', help='model path')
+parser.add_argument('--npoints', type=int, default=4096, help='resample points number')
+parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path')
 parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
 parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
 parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals')
@@ -203,139 +71,175 @@ parser.add_argument('--collate_per_segment', type=strtobool, default=True, help=
 parser.add_argument('--has_variations', type=strtobool, default=False,
                     help='whether a single pointcloud has variations '
                          'named int(id)_pc.(xyz|dat) look at pointclouds or sub')
-
 opt = parser.parse_args()
 print(opt)
 
 if __name__ == '__main__':
 
-    # Create dataset
-    print('Create data set ..')
+    # # ------------------------------------------------------------------------------------------------------------------
+    # # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction
+    # # ------------------------------------------------------------------------------------------------------------------
+    #
+    # # Create dataset
+    # print('Create data set ..')
+    #
+    # dataset_folder = './data/raw/predict/'
+    # pointcloud_file = './pointclouds/m3.xyz'
+    #
+    # # Load and pre-process point cloud
+    # pcloud = pc.read_pointcloud(pointcloud_file)
+    # pcloud = pc.normalize_pointcloud(pcloud, 1)
+    #
+    # #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
+    # #                                         selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
+    #
+    # pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
+    # print("Pre-Processing: Clustering")
+    # pc.draw_clusters(pc_clusters)
+    #
+    # recreate_folder(dataset_folder)
+    # for idx, pcc in enumerate(pc_clusters):
+    #
+    #     pcc = pc.farthest_point_sampling(pcc, opt.npoints)
+    #     recreate_folder(dataset_folder + str(idx) + '/')
+    #     pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
+    #     #draw_sample_data(pcc, True)
+    #
+    # # ------------------------------------------------------------------------------------------------------------------
+    # # Load point cloud clusters and model.
+    # # ------------------------------------------------------------------------------------------------------------------
+    #
+    # # Load dataset
+    # print('load dataset ..')
+    # test_transform = GT.Compose([GT.NormalizeScale(), ])
+    #
+    # test_dataset = ShapeNetPartSegDataset(
+    #     mode='predict',
+    #     root_dir='data',
+    #     with_normals=opt.with_normals,
+    #     npoints=opt.npoints,
+    #     refresh=True,
+    #     collate_per_segment=opt.collate_per_segment,
+    #     has_variations=opt.has_variations,
+    #     headers=opt.headers
+    # )
+    # num_classes = test_dataset.num_classes()
+    #
+    # # Load model
+    # print('Construct model ..')
+    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    # dtype = torch.float
+    #
+    # net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
+    # net.load_state_dict(torch.load(opt.model, map_location=device.type))
+    # net = net.to(device, dtype)
+    # net.eval()
+    #
+    # # ------------------------------------------------------------------------------------------------------------------
+    # # Predict per cluster.
+    # # ------------------------------------------------------------------------------------------------------------------
+    #
+    # labeled_dataset = None
+    # result_clusters = []
+    #
+    # # Iterate over all the samples and predict
+    # for idx, sample in enumerate(test_dataset):
+    #
+    #     predicted_label, _ = eval_sample(net, sample)
+    #     if opt.with_normals:
+    #         sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
+    #     else:
+    #         sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
+    #
+    #     result_clusters.append(sample_data)
+    #
+    #     if labeled_dataset is None:
+    #         labeled_dataset = sample_data
+    #     else:
+    #         labeled_dataset = np.vstack((labeled_dataset, sample_data))
+    #
+    #     print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
+    #
+    # # ------------------------------------------------------------------------------------------------------------------
+    # # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
+    # # ------------------------------------------------------------------------------------------------------------------
+    #
+    # min_cluster_size = 10
+    # contamination = 0.01
+    #
+    # filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
+    # type_filtered_clusters = []
+    # for c in filtered_clusters:
+    #
+    #     prim_types = np.unique(c[:, 6])
+    #     pt_count = {}
+    #     for pt in prim_types:
+    #         pt_count[pt] = len(c[c[:, 6] == pt])
+    #
+    #     max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
+    #     min_size = pt_count[max_pt] * contamination
+    #
+    #     valid_types = []
+    #     for pt in prim_types:
+    #         if pt_count[pt] > min_size:
+    #             valid_types.append(pt)
+    #
+    #     filtered_c = c[np.isin(c[:, 6], valid_types)]
+    #     type_filtered_clusters.append(filtered_c)
+    # result_clusters = type_filtered_clusters
+    #
+    # labeled_dataset = np.vstack(result_clusters)
+    #
+    # np.savetxt('labeled_dataset.txt', labeled_dataset)
 
-    dataset_folder = './data/raw/predict/'
-    pointcloud_file = './pointclouds/0_0.xyz'
+    # ------------------------------------------------------------------------------------------------------------------
+    # Clustering that results in per-primitive type clusters
+    # ------------------------------------------------------------------------------------------------------------------
 
-    # Load and pre-process point cloud
-    pcloud = pc.read_pointcloud(pointcloud_file)
-    pcloud = pc.normalize_pointcloud(pcloud, 1)
-
-    #a, b = pc.split_outliers(pcloud, [3, 4, 5])
-    #draw_sample_data(a, True)
-    #draw_sample_data(b, True)
-    #pcloud = a
+    labeled_dataset = np.loadtxt('labeled_dataset.txt')
+    # pc.draw_sample_data(labeled_dataset)
 
 
-    # for 0_0.xyz: pc.hierarchical_clustering(pcloud, [0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
+    # TODO: Filter labeled dataset to get rid of edge clusters
 
+    print("Final clustering..")
 
-    #pc_clusters = pc.cluster_dbscan(pcloud, [0, 1, 2, 3,4,5], eps=0.5, min_samples=5)
-    #pc_clusters = pc.filter_clusters(pc_clusters, 100)
+    labeled_dataset = pc.append_onehotencoded_type(labeled_dataset)
 
-    #pc_clusters = [pcloud]
+    print("Test row: ", labeled_dataset[:1, :])
 
-    #print("NUM CLUSTERS: ", len(pc_clusters))
+    total_clusters = []
 
-    #draw_clusters(pc_clusters)
-    #for c in pc_clusters:
-        #draw_sample_data(c, True)
-    #    print("Cluster Size: ", len(c))
+    clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50)
+    print("Pre-clustering done. Clusters: ", len(clusters))
+    pc.draw_clusters(clusters)
 
+    for cluster in clusters:
+        #cluster = pc.normalize_pointcloud(cluster)
 
+        print("2nd level clustering ..")
 
-    # draw_sample_data(pcloud)
-
-    pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
-                                             selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
-    # pc.cluster_cubes(pcloud, [4, 4, 4])
-
-    recreate_folder(dataset_folder)
-    for idx, pcc in enumerate(pc_clusters):
-
-        pcc = farthest_point_sampling(pcc, opt.npoints)
-        recreate_folder(dataset_folder + str(idx) + '/')
-        pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
-        # draw_sample_data(pcc, False)
-
-    # Load dataset
-    print('load dataset ..')
-    test_transform = GT.Compose([GT.NormalizeScale(), ])
-
-    test_dataset = ShapeNetPartSegDataset(
-        mode='predict',
-        root_dir='data',
-        with_normals=opt.with_normals,
-        npoints=opt.npoints,
-        refresh=True,
-        collate_per_segment=opt.collate_per_segment,
-        has_variations=opt.has_variations,
-        headers=opt.headers
-    )
-
-    num_classes = test_dataset.num_classes()
-
-    # Load model
-    print('Construct model ..')
-    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-    dtype = torch.float
-
-    # net = PointNetPartSegmentNet(num_classes)
-    net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
-
-    net.load_state_dict(torch.load(opt.model, map_location=device.type))
-    net = net.to(device, dtype)
-    net.eval()
-
-    labeled_dataset = None
-    result_clusters = []
-    # Iterate over all the samples and predict
-    for sample in test_dataset:
-
-        # Predict
-
-        pred_label, gt_label = eval_sample(net, sample)
-        if opt.with_normals:
-            sample_data = np.column_stack((sample["points"].numpy(), pred_label.numpy()))
+        prim_types_in_cluster = len(np.unique(cluster[:, 6], axis=0))
+        if prim_types_in_cluster == 1:
+            print("No need for 2nd level clustering since there is only a single primitive type in the cluster.")
+            total_clusters.append(cluster)
         else:
-            sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], pred_label.numpy()))
+            sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50)
+            print("Sub clusters: ", len(sub_clusters))
+            total_clusters.extend(sub_clusters)
 
-        # draw_sample_data(sample_data, False)
+    result_clusters = list(filter(lambda c: c.shape[0] > 100, total_clusters))
 
-        #print("Sample Datat: ", sample_data[:5, :])
-        #print('Eval done.')
-        print("PRED LABEL: ", pred_label)
+    for cluster in result_clusters:
+        print("Cluster: ", cluster.shape[0])
 
-        #sample_data = normalize_pointcloud(sample_data)
-        #sample_data = append_onehotencoded_type(sample_data, 1.0)
-        #sample_data = append_normal_angles(sample_data)
+        pc.draw_sample_data(cluster, False)
 
-        # print('Clustering ..')
-        # print('Shape: ' + str(sample_data.shape))
+    print("Number of clusters: ", len(result_clusters))
 
-        # clusters = extract_clusters(sample_data, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10], eps=0.1, min_samples=0.0001, metric='euclidean', algo='auto')
-        # print('Clustering done. ' + str(len(clusters)) + " Clusters.")
-        # print(sample_data[:, 6])
-
-        # draw_sample_data(sample_data, False)
-
-        # result_clusters.extend(clusters)
-        result_clusters.append(sample_data)
-
-        if labeled_dataset is None:
-            labeled_dataset = sample_data
-        else:
-            labeled_dataset = np.vstack((labeled_dataset, sample_data))
-
-        print("prediction done")
-
-    draw_sample_data(labeled_dataset, False)
-    print("point cloud size: ", labeled_dataset.shape)
-
-    print("Min: ", np.min(labeled_dataset[:, :3]))
-    print("Max: ", np.max(labeled_dataset[:, :3]))
-    print("Min: ", np.min(pcloud[:, :3]))
-    print("Max: ", np.max(pcloud[:, :3]))
-
-
-    # TODO: Take result clusters and cluster them by primitive type.
+    pc.draw_clusters(result_clusters)
 
+    # ------------------------------------------------------------------------------------------------------------------
+    # Write clusters to file.
+    # ------------------------------------------------------------------------------------------------------------------
     pc.write_clusters("clusters.txt", result_clusters)
\ No newline at end of file