added structured clustering

2019-08-13 18:46:40 +02:00
parent 557d49fefc
commit 07043bd39b
5 changed files with 74718 additions and 127 deletions
@@ -130,5 +130,4 @@ dmypy.json
 /vis/data/
 /vis/checkpoint
 /predict/data/
-/predict/checkpoint/

@@ -207,7 +207,7 @@ def write_clusters(path, clusters, type_column=6):

        print("Types: ", types)

-        np.savetxt(file, types, header='', comments='', fmt='%i')
+        np.savetxt(file, types.reshape(1, types.shape[0]),delimiter=';', header='', comments='', fmt='%i')
        np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='')


@@ -260,7 +260,7 @@ def filter_clusters(clusters, filter):


 def split_outliers(pc, columns):
-    clf = KNN()#FeatureBagging() # detector_list=[LOF(), KNN()]
+    clf = LOF()# FeatureBagging() # detector_list=[LOF(), KNN()]
    clf.fit(pc[:, columns])

    # LOF, kNN
@@ -62,8 +62,8 @@ def recreate_folder(folder):
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../')  # add project root directory

 parser = argparse.ArgumentParser()
-parser.add_argument('--npoints', type=int, default=4096, help='resample points number')
-parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path')
+parser.add_argument('--npoints', type=int, default=2048, help='resample points number')
+parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_131.pth', help='model path')
 parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
 parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
 parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals')
@@ -76,131 +76,132 @@ print(opt)

 if __name__ == '__main__':

-    # # ------------------------------------------------------------------------------------------------------------------
-    # # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction
-    # # ------------------------------------------------------------------------------------------------------------------
-    #
-    # # Create dataset
-    # print('Create data set ..')
-    #
-    # dataset_folder = './data/raw/predict/'
-    # pointcloud_file = './pointclouds/m3.xyz'
-    #
-    # # Load and pre-process point cloud
-    # pcloud = pc.read_pointcloud(pointcloud_file)
-    # pcloud = pc.normalize_pointcloud(pcloud, 1)
-    #
-    # #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
-    # #                                         selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
-    #
-    # pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
-    # print("Pre-Processing: Clustering")
-    # pc.draw_clusters(pc_clusters)
-    #
-    # recreate_folder(dataset_folder)
-    # for idx, pcc in enumerate(pc_clusters):
-    #
-    #     pcc = pc.farthest_point_sampling(pcc, opt.npoints)
-    #     recreate_folder(dataset_folder + str(idx) + '/')
-    #     pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
-    #     #draw_sample_data(pcc, True)
-    #
-    # # ------------------------------------------------------------------------------------------------------------------
-    # # Load point cloud clusters and model.
-    # # ------------------------------------------------------------------------------------------------------------------
-    #
-    # # Load dataset
-    # print('load dataset ..')
-    # test_transform = GT.Compose([GT.NormalizeScale(), ])
-    #
-    # test_dataset = ShapeNetPartSegDataset(
-    #     mode='predict',
-    #     root_dir='data',
-    #     with_normals=opt.with_normals,
-    #     npoints=opt.npoints,
-    #     refresh=True,
-    #     collate_per_segment=opt.collate_per_segment,
-    #     has_variations=opt.has_variations,
-    #     headers=opt.headers
-    # )
-    # num_classes = test_dataset.num_classes()
-    #
-    # # Load model
-    # print('Construct model ..')
-    # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
-    # dtype = torch.float
-    #
-    # net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
-    # net.load_state_dict(torch.load(opt.model, map_location=device.type))
-    # net = net.to(device, dtype)
-    # net.eval()
-    #
-    # # ------------------------------------------------------------------------------------------------------------------
-    # # Predict per cluster.
-    # # ------------------------------------------------------------------------------------------------------------------
-    #
-    # labeled_dataset = None
-    # result_clusters = []
-    #
-    # # Iterate over all the samples and predict
-    # for idx, sample in enumerate(test_dataset):
-    #
-    #     predicted_label, _ = eval_sample(net, sample)
-    #     if opt.with_normals:
-    #         sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
-    #     else:
-    #         sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
-    #
-    #     result_clusters.append(sample_data)
-    #
-    #     if labeled_dataset is None:
-    #         labeled_dataset = sample_data
-    #     else:
-    #         labeled_dataset = np.vstack((labeled_dataset, sample_data))
-    #
-    #     print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
-    #
-    # # ------------------------------------------------------------------------------------------------------------------
-    # # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
-    # # ------------------------------------------------------------------------------------------------------------------
-    #
-    # min_cluster_size = 10
-    # contamination = 0.01
-    #
-    # filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
-    # type_filtered_clusters = []
-    # for c in filtered_clusters:
-    #
-    #     prim_types = np.unique(c[:, 6])
-    #     pt_count = {}
-    #     for pt in prim_types:
-    #         pt_count[pt] = len(c[c[:, 6] == pt])
-    #
-    #     max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
-    #     min_size = pt_count[max_pt] * contamination
-    #
-    #     valid_types = []
-    #     for pt in prim_types:
-    #         if pt_count[pt] > min_size:
-    #             valid_types.append(pt)
-    #
-    #     filtered_c = c[np.isin(c[:, 6], valid_types)]
-    #     type_filtered_clusters.append(filtered_c)
-    # result_clusters = type_filtered_clusters
-    #
-    # labeled_dataset = np.vstack(result_clusters)
-    #
-    # np.savetxt('labeled_dataset.txt', labeled_dataset)
+    # ------------------------------------------------------------------------------------------------------------------
+    # Load point cloud, cluster it and store clusters as point cloud cluster files again for later prediction
+    # ------------------------------------------------------------------------------------------------------------------
+
+    # Create dataset
+    print('Create data set ..')
+
+    dataset_folder = './data/raw/predict/'
+    pointcloud_file = './pointclouds/m3.xyz'
+
+    # Load and pre-process point cloud
+    pcloud = pc.read_pointcloud(pointcloud_file)
+    pcloud = pc.normalize_pointcloud(pcloud, 1)
+
+    #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
+    #                                         selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
+
+    pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
+    print("Pre-Processing: Clustering")
+    pc.draw_clusters(pc_clusters)
+
+    recreate_folder(dataset_folder)
+    for idx, pcc in enumerate(pc_clusters):
+         pcc = pc.farthest_point_sampling(pcc, opt.npoints)
+         recreate_folder(dataset_folder + str(idx) + '/')
+         pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
+         #draw_sample_data(pcc, True)
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Load point cloud clusters and model.
+    # ------------------------------------------------------------------------------------------------------------------
+
+    # Load dataset
+    print('load dataset ..')
+    test_transform = GT.Compose([GT.NormalizeScale(), ])
+
+    test_dataset = ShapeNetPartSegDataset(
+         mode='predict',
+         root_dir='data',
+         with_normals=opt.with_normals,
+         npoints=opt.npoints,
+         refresh=True,
+         collate_per_segment=opt.collate_per_segment,
+         has_variations=opt.has_variations,
+         headers=opt.headers
+    )
+    num_classes = test_dataset.num_classes()
+
+    # Load model
+    print('Construct model ..')
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    dtype = torch.float
+
+    net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
+    net.load_state_dict(torch.load(opt.model, map_location=device.type))
+    net = net.to(device, dtype)
+    net.eval()
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Predict per cluster.
+    # ------------------------------------------------------------------------------------------------------------------
+
+    labeled_dataset = None
+    result_clusters = []
+
+    # Iterate over all the samples and predict
+    for idx, sample in enumerate(test_dataset):
+
+         predicted_label, _ = eval_sample(net, sample)
+         if opt.with_normals:
+             sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
+         else:
+             sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
+
+         result_clusters.append(sample_data)
+
+         if labeled_dataset is None:
+             labeled_dataset = sample_data
+         else:
+             labeled_dataset = np.vstack((labeled_dataset, sample_data))
+
+         print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
+    # ------------------------------------------------------------------------------------------------------------------
+
+    min_cluster_size = 10
+    contamination = 0.01
+
+    filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
+    type_filtered_clusters = []
+    for c in filtered_clusters:
+
+        prim_types = np.unique(c[:, 6])
+        pt_count = {}
+        for pt in prim_types:
+            pt_count[pt] = len(c[c[:, 6] == pt])
+
+        max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
+        min_size = pt_count[max_pt] * contamination
+
+        valid_types = []
+        for pt in prim_types:
+            if pt_count[pt] > min_size:
+                valid_types.append(pt)
+
+        filtered_c = c[np.isin(c[:, 6], valid_types)]
+        type_filtered_clusters.append(filtered_c)
+
+    result_clusters = type_filtered_clusters
+
+    labeled_dataset = np.vstack(result_clusters)
+
+    np.savetxt('labeled_dataset.txt', labeled_dataset)

    # ------------------------------------------------------------------------------------------------------------------
    # Clustering that results in per-primitive type clusters
    # ------------------------------------------------------------------------------------------------------------------

-    labeled_dataset = np.loadtxt('labeled_dataset.txt')
-    # pc.draw_sample_data(labeled_dataset)
+    # labeled_dataset = np.loadtxt('labeled_dataset.txt')
+    pc.draw_sample_data(labeled_dataset)

-
-    # TODO: Filter labeled dataset to get rid of edge clusters
+    # Try to get rid of outliers.
+    labeled_dataset,outliers = pc.split_outliers(labeled_dataset, columns=[0,1,2,3,4,5])
+    pc.draw_sample_data(outliers, False)

    print("Final clustering..")

@@ -210,7 +211,7 @@ if __name__ == '__main__':

    total_clusters = []

-    clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50)
+    clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=100)
    print("Pre-clustering done. Clusters: ", len(clusters))
    pc.draw_clusters(clusters)

@@ -224,7 +225,7 @@ if __name__ == '__main__':
            print("No need for 2nd level clustering since there is only a single primitive type in the cluster.")
            total_clusters.append(cluster)
        else:
-            sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50)
+            sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=100)
            print("Sub clusters: ", len(sub_clusters))
            total_clusters.extend(sub_clusters)

@@ -233,7 +234,7 @@ if __name__ == '__main__':
    for cluster in result_clusters:
        print("Cluster: ", cluster.shape[0])

-        pc.draw_sample_data(cluster, False)
+        # pc.draw_sample_data(cluster, False)

    print("Number of clusters: ", len(result_clusters))