From 557d49fefcaf4d4f0745f7c57f355a19b8b7324b Mon Sep 17 00:00:00 2001 From: Markus Friedrich Date: Mon, 12 Aug 2019 19:13:04 +0200 Subject: [PATCH] added structured clustering --- pointcloud.py | 52 +++++- predict/predict.py | 404 +++++++++++++++++---------------------------- 2 files changed, 198 insertions(+), 258 deletions(-) diff --git a/pointcloud.py b/pointcloud.py index beea392..53f7d29 100644 --- a/pointcloud.py +++ b/pointcloud.py @@ -37,7 +37,7 @@ def mini_color_table(index, norm=True): return color -def clusterToColor(cluster, cluster_idx): +def cluster2Color(cluster, cluster_idx): colors = np.zeros(shape=(len(cluster), 3)) point_idx = 0 @@ -48,6 +48,21 @@ def clusterToColor(cluster, cluster_idx): return colors +def label2color(labels): + ''' + labels: np.ndarray with shape (n, ) + colors(return): np.ndarray with shape (n, 3) + ''' + num = labels.shape[0] + colors = np.zeros((num, 3)) + + minl, maxl = np.min(labels), np.max(labels) + for l in range(minl, maxl + 1): + colors[labels == l, :] = mini_color_table(l) + + return colors + + def read_pointcloud(path, delimiter=' ', hasHeader=True): with open(path, 'r') as f: if hasHeader: @@ -174,7 +189,7 @@ def draw_clusters(clusters): cloud = o3d.PointCloud() cloud.points = o3d.Vector3dVector(cluster[:,:3]) - cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx)) + cloud.colors = o3d.Vector3dVector(cluster2Color(cluster, cluster_idx)) clouds.append(cloud) o3d.draw_geometries(clouds) @@ -186,14 +201,26 @@ def write_clusters(path, clusters, type_column=6): file.write(str(len(clusters)) + "\n") for cluster in clusters: - print("Types: ", cluster[:, type_column]) + # print("Types: ", cluster[:, type_column]) - types = np.unique(cluster[:, type_column], axis=0) + types = np.unique(cluster[:, type_column], axis=0).astype(int) - np.savetxt(file, types, header='', comments='') + print("Types: ", types) + + np.savetxt(file, types, header='', comments='', fmt='%i') np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='') +def draw_sample_data(sample_data, colored_normals = False): + + cloud = o3d.PointCloud() + cloud.points = o3d.Vector3dVector(sample_data[:, :3]) + cloud.colors = \ + o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6]) + + o3d.draw_geometries([cloud]) + + def normalize_pointcloud(pc, factor=1.0): max = pc.max(axis=0) @@ -221,12 +248,12 @@ def hierarchical_clustering(data, selected_indices_0, selected_indices_1, eps, m return total_clusters -def filter_clusters(clusters, min_size): +def filter_clusters(clusters, filter): filtered_clusters = [] for c in clusters: - if len(c) >= min_size: + if filter(c): filtered_clusters.append(c) return filtered_clusters @@ -238,4 +265,13 @@ def split_outliers(pc, columns): # LOF, kNN - return pc[clf.labels_ == 0], pc[clf.labels_ == 1] \ No newline at end of file + return pc[clf.labels_ == 0], pc[clf.labels_ == 1] + + +def append_onehotencoded_type(data, factor = 1.0): + + types = data[:, 6].astype(int) + res = np.zeros((len(types), 4)) + res[np.arange(len(types)), types] = factor + + return np.column_stack((data, res)) diff --git a/predict/predict.py b/predict/predict.py index 12849f1..e926f99 100644 --- a/predict/predict.py +++ b/predict/predict.py @@ -2,21 +2,19 @@ import sys import os import shutil import math - -sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory - from dataset.shapenet import ShapeNetPartSegDataset from model.pointnet2_part_seg import PointNet2PartSegmentNet import torch_geometric.transforms as GT import torch import argparse from distutils.util import strtobool - import numpy as np -from sklearn.cluster import DBSCAN -from sklearn.preprocessing import StandardScaler -import open3d as o3d import pointcloud as pc +import operator + + +sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory + def eval_sample(net, sample): ''' @@ -42,78 +40,6 @@ def eval_sample(net, sample): return (pred_label, gt_label) -def mini_color_table(index, norm=True): - colors = [ - [0.5000, 0.5400, 0.5300], [0.8900, 0.1500, 0.2100], [0.6400, 0.5800, 0.5000], - [1.0000, 0.3800, 0.0100], [1.0000, 0.6600, 0.1400], [0.4980, 1.0000, 0.0000], - [0.4980, 1.0000, 0.8314], [0.9412, 0.9725, 1.0000], [0.5412, 0.1686, 0.8863], - [0.5765, 0.4392, 0.8588], [0.3600, 0.1400, 0.4300], [0.5600, 0.3700, 0.6000], - ] - - color = colors[index % len(colors)] - - if not norm: - color[0] *= 255 - color[1] *= 255 - color[2] *= 255 - - return color - - -def label2color(labels): - ''' - labels: np.ndarray with shape (n, ) - colors(return): np.ndarray with shape (n, 3) - ''' - num = labels.shape[0] - colors = np.zeros((num, 3)) - - minl, maxl = np.min(labels), np.max(labels) - for l in range(minl, maxl + 1): - colors[labels == l, :] = mini_color_table(l) - - return colors - - -def clusterToColor(cluster, cluster_idx): - - colors = np.zeros(shape=(len(cluster), 3)) - point_idx = 0 - for point in cluster: - colors[point_idx, :] = mini_color_table(cluster_idx) - point_idx += 1 - - return colors - - - -def farthest_point_sampling(pts, K): - - if pts.shape[0] < K: - return pts - - def calc_distances(p0, points): - return ((p0[:3] - points[:, :3]) ** 2).sum(axis=1) - - farthest_pts = np.zeros((K, pts.shape[1])) - farthest_pts[0] = pts[np.random.randint(len(pts))] - distances = calc_distances(farthest_pts[0], pts) - for i in range(1, K): - farthest_pts[i] = pts[np.argmax(distances)] - distances = np.minimum(distances, calc_distances(farthest_pts[i], pts)) - - return farthest_pts - - -def append_onehotencoded_type(data, factor = 1.0): - - types = data[:, 6].astype(int) - res = np.zeros((len(types), 4)) - res[np.arange(len(types)), types] = factor - - return np.column_stack((data, res)) - - def append_normal_angles(data): def func(x): @@ -128,64 +54,6 @@ def append_normal_angles(data): return np.column_stack((data, res)) -def extract_clusters(data, selected_indices, eps, min_samples, metric='euclidean', algo='auto'): - - min_samples = min_samples * len(data) - - print('Clustering. Min Samples: ' + str(min_samples) + ' EPS: ' + str(eps)) - - # 0,1,2 : pos - # 3,4,5 : normal - # 6: type index - # 7,8,9,10: type index one hot encoded - # 11,12: normal as angles - - db_res = DBSCAN(eps=eps, metric=metric, n_jobs=-1, algorithm=algo, min_samples=min_samples).fit(data[:, selected_indices]) - - labels = db_res.labels_ - n_clusters = len(set(labels)) - (1 if -1 in labels else 0) - n_noise = list(labels).count(-1) - print("Noise: " + str(n_noise) + " Clusters: " + str(n_clusters)) - - clusters = {} - for idx, l in enumerate(labels): - if l is -1: - continue - clusters.setdefault(str(l), []).append(data[idx, :]) - - npClusters = [] - for cluster in clusters.values(): - npClusters.append(np.array(cluster)) - - return npClusters - - -def draw_clusters(clusters): - - clouds = [] - - cluster_idx = 0 - for cluster in clusters: - - cloud = o3d.PointCloud() - cloud.points = o3d.Vector3dVector(cluster[:,:3]) - cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx)) - clouds.append(cloud) - cluster_idx += 1 - - o3d.draw_geometries(clouds) - - -def draw_sample_data(sample_data, colored_normals = False): - - cloud = o3d.PointCloud() - cloud.points = o3d.Vector3dVector(sample_data[:, :3]) - cloud.colors = \ - o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6]) - - o3d.draw_geometries([cloud]) - - def recreate_folder(folder): if os.path.exists(folder) and os.path.isdir(folder): shutil.rmtree(folder) @@ -194,8 +62,8 @@ def recreate_folder(folder): sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory parser = argparse.ArgumentParser() -parser.add_argument('--npoints', type=int, default=2048, help='resample points number') -parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_3.pth', help='model path') +parser.add_argument('--npoints', type=int, default=4096, help='resample points number') +parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path') parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result') parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers') parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals') @@ -203,139 +71,175 @@ parser.add_argument('--collate_per_segment', type=strtobool, default=True, help= parser.add_argument('--has_variations', type=strtobool, default=False, help='whether a single pointcloud has variations ' 'named int(id)_pc.(xyz|dat) look at pointclouds or sub') - opt = parser.parse_args() print(opt) if __name__ == '__main__': - # Create dataset - print('Create data set ..') + # # ------------------------------------------------------------------------------------------------------------------ + # # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction + # # ------------------------------------------------------------------------------------------------------------------ + # + # # Create dataset + # print('Create data set ..') + # + # dataset_folder = './data/raw/predict/' + # pointcloud_file = './pointclouds/m3.xyz' + # + # # Load and pre-process point cloud + # pcloud = pc.read_pointcloud(pointcloud_file) + # pcloud = pc.normalize_pointcloud(pcloud, 1) + # + # #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5], + # # selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5) + # + # pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4]) + # print("Pre-Processing: Clustering") + # pc.draw_clusters(pc_clusters) + # + # recreate_folder(dataset_folder) + # for idx, pcc in enumerate(pc_clusters): + # + # pcc = pc.farthest_point_sampling(pcc, opt.npoints) + # recreate_folder(dataset_folder + str(idx) + '/') + # pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc) + # #draw_sample_data(pcc, True) + # + # # ------------------------------------------------------------------------------------------------------------------ + # # Load point cloud clusters and model. + # # ------------------------------------------------------------------------------------------------------------------ + # + # # Load dataset + # print('load dataset ..') + # test_transform = GT.Compose([GT.NormalizeScale(), ]) + # + # test_dataset = ShapeNetPartSegDataset( + # mode='predict', + # root_dir='data', + # with_normals=opt.with_normals, + # npoints=opt.npoints, + # refresh=True, + # collate_per_segment=opt.collate_per_segment, + # has_variations=opt.has_variations, + # headers=opt.headers + # ) + # num_classes = test_dataset.num_classes() + # + # # Load model + # print('Construct model ..') + # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + # dtype = torch.float + # + # net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals) + # net.load_state_dict(torch.load(opt.model, map_location=device.type)) + # net = net.to(device, dtype) + # net.eval() + # + # # ------------------------------------------------------------------------------------------------------------------ + # # Predict per cluster. + # # ------------------------------------------------------------------------------------------------------------------ + # + # labeled_dataset = None + # result_clusters = [] + # + # # Iterate over all the samples and predict + # for idx, sample in enumerate(test_dataset): + # + # predicted_label, _ = eval_sample(net, sample) + # if opt.with_normals: + # sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy())) + # else: + # sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy())) + # + # result_clusters.append(sample_data) + # + # if labeled_dataset is None: + # labeled_dataset = sample_data + # else: + # labeled_dataset = np.vstack((labeled_dataset, sample_data)) + # + # print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset))) + # + # # ------------------------------------------------------------------------------------------------------------------ + # # Remove cluster rows if the amount of points for a particular primitive type is below a threshold. + # # ------------------------------------------------------------------------------------------------------------------ + # + # min_cluster_size = 10 + # contamination = 0.01 + # + # filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters) + # type_filtered_clusters = [] + # for c in filtered_clusters: + # + # prim_types = np.unique(c[:, 6]) + # pt_count = {} + # for pt in prim_types: + # pt_count[pt] = len(c[c[:, 6] == pt]) + # + # max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0] + # min_size = pt_count[max_pt] * contamination + # + # valid_types = [] + # for pt in prim_types: + # if pt_count[pt] > min_size: + # valid_types.append(pt) + # + # filtered_c = c[np.isin(c[:, 6], valid_types)] + # type_filtered_clusters.append(filtered_c) + # result_clusters = type_filtered_clusters + # + # labeled_dataset = np.vstack(result_clusters) + # + # np.savetxt('labeled_dataset.txt', labeled_dataset) - dataset_folder = './data/raw/predict/' - pointcloud_file = './pointclouds/0_0.xyz' + # ------------------------------------------------------------------------------------------------------------------ + # Clustering that results in per-primitive type clusters + # ------------------------------------------------------------------------------------------------------------------ - # Load and pre-process point cloud - pcloud = pc.read_pointcloud(pointcloud_file) - pcloud = pc.normalize_pointcloud(pcloud, 1) - - #a, b = pc.split_outliers(pcloud, [3, 4, 5]) - #draw_sample_data(a, True) - #draw_sample_data(b, True) - #pcloud = a + labeled_dataset = np.loadtxt('labeled_dataset.txt') + # pc.draw_sample_data(labeled_dataset) - # for 0_0.xyz: pc.hierarchical_clustering(pcloud, [0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5) + # TODO: Filter labeled dataset to get rid of edge clusters + print("Final clustering..") - #pc_clusters = pc.cluster_dbscan(pcloud, [0, 1, 2, 3,4,5], eps=0.5, min_samples=5) - #pc_clusters = pc.filter_clusters(pc_clusters, 100) + labeled_dataset = pc.append_onehotencoded_type(labeled_dataset) - #pc_clusters = [pcloud] + print("Test row: ", labeled_dataset[:1, :]) - #print("NUM CLUSTERS: ", len(pc_clusters)) + total_clusters = [] - #draw_clusters(pc_clusters) - #for c in pc_clusters: - #draw_sample_data(c, True) - # print("Cluster Size: ", len(c)) + clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50) + print("Pre-clustering done. Clusters: ", len(clusters)) + pc.draw_clusters(clusters) + for cluster in clusters: + #cluster = pc.normalize_pointcloud(cluster) + print("2nd level clustering ..") - # draw_sample_data(pcloud) - - pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5], - selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5) - # pc.cluster_cubes(pcloud, [4, 4, 4]) - - recreate_folder(dataset_folder) - for idx, pcc in enumerate(pc_clusters): - - pcc = farthest_point_sampling(pcc, opt.npoints) - recreate_folder(dataset_folder + str(idx) + '/') - pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc) - # draw_sample_data(pcc, False) - - # Load dataset - print('load dataset ..') - test_transform = GT.Compose([GT.NormalizeScale(), ]) - - test_dataset = ShapeNetPartSegDataset( - mode='predict', - root_dir='data', - with_normals=opt.with_normals, - npoints=opt.npoints, - refresh=True, - collate_per_segment=opt.collate_per_segment, - has_variations=opt.has_variations, - headers=opt.headers - ) - - num_classes = test_dataset.num_classes() - - # Load model - print('Construct model ..') - device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') - dtype = torch.float - - # net = PointNetPartSegmentNet(num_classes) - net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals) - - net.load_state_dict(torch.load(opt.model, map_location=device.type)) - net = net.to(device, dtype) - net.eval() - - labeled_dataset = None - result_clusters = [] - # Iterate over all the samples and predict - for sample in test_dataset: - - # Predict - - pred_label, gt_label = eval_sample(net, sample) - if opt.with_normals: - sample_data = np.column_stack((sample["points"].numpy(), pred_label.numpy())) + prim_types_in_cluster = len(np.unique(cluster[:, 6], axis=0)) + if prim_types_in_cluster == 1: + print("No need for 2nd level clustering since there is only a single primitive type in the cluster.") + total_clusters.append(cluster) else: - sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], pred_label.numpy())) + sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50) + print("Sub clusters: ", len(sub_clusters)) + total_clusters.extend(sub_clusters) - # draw_sample_data(sample_data, False) + result_clusters = list(filter(lambda c: c.shape[0] > 100, total_clusters)) - #print("Sample Datat: ", sample_data[:5, :]) - #print('Eval done.') - print("PRED LABEL: ", pred_label) + for cluster in result_clusters: + print("Cluster: ", cluster.shape[0]) - #sample_data = normalize_pointcloud(sample_data) - #sample_data = append_onehotencoded_type(sample_data, 1.0) - #sample_data = append_normal_angles(sample_data) + pc.draw_sample_data(cluster, False) - # print('Clustering ..') - # print('Shape: ' + str(sample_data.shape)) + print("Number of clusters: ", len(result_clusters)) - # clusters = extract_clusters(sample_data, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10], eps=0.1, min_samples=0.0001, metric='euclidean', algo='auto') - # print('Clustering done. ' + str(len(clusters)) + " Clusters.") - # print(sample_data[:, 6]) - - # draw_sample_data(sample_data, False) - - # result_clusters.extend(clusters) - result_clusters.append(sample_data) - - if labeled_dataset is None: - labeled_dataset = sample_data - else: - labeled_dataset = np.vstack((labeled_dataset, sample_data)) - - print("prediction done") - - draw_sample_data(labeled_dataset, False) - print("point cloud size: ", labeled_dataset.shape) - - print("Min: ", np.min(labeled_dataset[:, :3])) - print("Max: ", np.max(labeled_dataset[:, :3])) - print("Min: ", np.min(pcloud[:, :3])) - print("Max: ", np.max(pcloud[:, :3])) - - - # TODO: Take result clusters and cluster them by primitive type. + pc.draw_clusters(result_clusters) + # ------------------------------------------------------------------------------------------------------------------ + # Write clusters to file. + # ------------------------------------------------------------------------------------------------------------------ pc.write_clusters("clusters.txt", result_clusters) \ No newline at end of file