added structured clustering

This commit is contained in:
Markus Friedrich 2019-08-12 19:13:04 +02:00
parent e4cc447f68
commit 557d49fefc
2 changed files with 198 additions and 258 deletions

View File

@ -37,7 +37,7 @@ def mini_color_table(index, norm=True):
return color return color
def clusterToColor(cluster, cluster_idx): def cluster2Color(cluster, cluster_idx):
colors = np.zeros(shape=(len(cluster), 3)) colors = np.zeros(shape=(len(cluster), 3))
point_idx = 0 point_idx = 0
@ -48,6 +48,21 @@ def clusterToColor(cluster, cluster_idx):
return colors return colors
def label2color(labels):
'''
labels: np.ndarray with shape (n, )
colors(return): np.ndarray with shape (n, 3)
'''
num = labels.shape[0]
colors = np.zeros((num, 3))
minl, maxl = np.min(labels), np.max(labels)
for l in range(minl, maxl + 1):
colors[labels == l, :] = mini_color_table(l)
return colors
def read_pointcloud(path, delimiter=' ', hasHeader=True): def read_pointcloud(path, delimiter=' ', hasHeader=True):
with open(path, 'r') as f: with open(path, 'r') as f:
if hasHeader: if hasHeader:
@ -174,7 +189,7 @@ def draw_clusters(clusters):
cloud = o3d.PointCloud() cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(cluster[:,:3]) cloud.points = o3d.Vector3dVector(cluster[:,:3])
cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx)) cloud.colors = o3d.Vector3dVector(cluster2Color(cluster, cluster_idx))
clouds.append(cloud) clouds.append(cloud)
o3d.draw_geometries(clouds) o3d.draw_geometries(clouds)
@ -186,14 +201,26 @@ def write_clusters(path, clusters, type_column=6):
file.write(str(len(clusters)) + "\n") file.write(str(len(clusters)) + "\n")
for cluster in clusters: for cluster in clusters:
print("Types: ", cluster[:, type_column]) # print("Types: ", cluster[:, type_column])
types = np.unique(cluster[:, type_column], axis=0) types = np.unique(cluster[:, type_column], axis=0).astype(int)
np.savetxt(file, types, header='', comments='') print("Types: ", types)
np.savetxt(file, types, header='', comments='', fmt='%i')
np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='') np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='')
def draw_sample_data(sample_data, colored_normals = False):
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(sample_data[:, :3])
cloud.colors = \
o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
o3d.draw_geometries([cloud])
def normalize_pointcloud(pc, factor=1.0): def normalize_pointcloud(pc, factor=1.0):
max = pc.max(axis=0) max = pc.max(axis=0)
@ -221,12 +248,12 @@ def hierarchical_clustering(data, selected_indices_0, selected_indices_1, eps, m
return total_clusters return total_clusters
def filter_clusters(clusters, min_size): def filter_clusters(clusters, filter):
filtered_clusters = [] filtered_clusters = []
for c in clusters: for c in clusters:
if len(c) >= min_size: if filter(c):
filtered_clusters.append(c) filtered_clusters.append(c)
return filtered_clusters return filtered_clusters
@ -238,4 +265,13 @@ def split_outliers(pc, columns):
# LOF, kNN # LOF, kNN
return pc[clf.labels_ == 0], pc[clf.labels_ == 1] return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
def append_onehotencoded_type(data, factor = 1.0):
types = data[:, 6].astype(int)
res = np.zeros((len(types), 4))
res[np.arange(len(types)), types] = factor
return np.column_stack((data, res))

View File

@ -2,21 +2,19 @@ import sys
import os import os
import shutil import shutil
import math import math
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
from dataset.shapenet import ShapeNetPartSegDataset from dataset.shapenet import ShapeNetPartSegDataset
from model.pointnet2_part_seg import PointNet2PartSegmentNet from model.pointnet2_part_seg import PointNet2PartSegmentNet
import torch_geometric.transforms as GT import torch_geometric.transforms as GT
import torch import torch
import argparse import argparse
from distutils.util import strtobool from distutils.util import strtobool
import numpy as np import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import open3d as o3d
import pointcloud as pc import pointcloud as pc
import operator
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
def eval_sample(net, sample): def eval_sample(net, sample):
''' '''
@ -42,78 +40,6 @@ def eval_sample(net, sample):
return (pred_label, gt_label) return (pred_label, gt_label)
def mini_color_table(index, norm=True):
colors = [
[0.5000, 0.5400, 0.5300], [0.8900, 0.1500, 0.2100], [0.6400, 0.5800, 0.5000],
[1.0000, 0.3800, 0.0100], [1.0000, 0.6600, 0.1400], [0.4980, 1.0000, 0.0000],
[0.4980, 1.0000, 0.8314], [0.9412, 0.9725, 1.0000], [0.5412, 0.1686, 0.8863],
[0.5765, 0.4392, 0.8588], [0.3600, 0.1400, 0.4300], [0.5600, 0.3700, 0.6000],
]
color = colors[index % len(colors)]
if not norm:
color[0] *= 255
color[1] *= 255
color[2] *= 255
return color
def label2color(labels):
'''
labels: np.ndarray with shape (n, )
colors(return): np.ndarray with shape (n, 3)
'''
num = labels.shape[0]
colors = np.zeros((num, 3))
minl, maxl = np.min(labels), np.max(labels)
for l in range(minl, maxl + 1):
colors[labels == l, :] = mini_color_table(l)
return colors
def clusterToColor(cluster, cluster_idx):
colors = np.zeros(shape=(len(cluster), 3))
point_idx = 0
for point in cluster:
colors[point_idx, :] = mini_color_table(cluster_idx)
point_idx += 1
return colors
def farthest_point_sampling(pts, K):
if pts.shape[0] < K:
return pts
def calc_distances(p0, points):
return ((p0[:3] - points[:, :3]) ** 2).sum(axis=1)
farthest_pts = np.zeros((K, pts.shape[1]))
farthest_pts[0] = pts[np.random.randint(len(pts))]
distances = calc_distances(farthest_pts[0], pts)
for i in range(1, K):
farthest_pts[i] = pts[np.argmax(distances)]
distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
return farthest_pts
def append_onehotencoded_type(data, factor = 1.0):
types = data[:, 6].astype(int)
res = np.zeros((len(types), 4))
res[np.arange(len(types)), types] = factor
return np.column_stack((data, res))
def append_normal_angles(data): def append_normal_angles(data):
def func(x): def func(x):
@ -128,64 +54,6 @@ def append_normal_angles(data):
return np.column_stack((data, res)) return np.column_stack((data, res))
def extract_clusters(data, selected_indices, eps, min_samples, metric='euclidean', algo='auto'):
min_samples = min_samples * len(data)
print('Clustering. Min Samples: ' + str(min_samples) + ' EPS: ' + str(eps))
# 0,1,2 : pos
# 3,4,5 : normal
# 6: type index
# 7,8,9,10: type index one hot encoded
# 11,12: normal as angles
db_res = DBSCAN(eps=eps, metric=metric, n_jobs=-1, algorithm=algo, min_samples=min_samples).fit(data[:, selected_indices])
labels = db_res.labels_
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
print("Noise: " + str(n_noise) + " Clusters: " + str(n_clusters))
clusters = {}
for idx, l in enumerate(labels):
if l is -1:
continue
clusters.setdefault(str(l), []).append(data[idx, :])
npClusters = []
for cluster in clusters.values():
npClusters.append(np.array(cluster))
return npClusters
def draw_clusters(clusters):
clouds = []
cluster_idx = 0
for cluster in clusters:
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(cluster[:,:3])
cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
clouds.append(cloud)
cluster_idx += 1
o3d.draw_geometries(clouds)
def draw_sample_data(sample_data, colored_normals = False):
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(sample_data[:, :3])
cloud.colors = \
o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
o3d.draw_geometries([cloud])
def recreate_folder(folder): def recreate_folder(folder):
if os.path.exists(folder) and os.path.isdir(folder): if os.path.exists(folder) and os.path.isdir(folder):
shutil.rmtree(folder) shutil.rmtree(folder)
@ -194,8 +62,8 @@ def recreate_folder(folder):
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--npoints', type=int, default=2048, help='resample points number') parser.add_argument('--npoints', type=int, default=4096, help='resample points number')
parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_3.pth', help='model path') parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path')
parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result') parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers') parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals') parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals')
@ -203,139 +71,175 @@ parser.add_argument('--collate_per_segment', type=strtobool, default=True, help=
parser.add_argument('--has_variations', type=strtobool, default=False, parser.add_argument('--has_variations', type=strtobool, default=False,
help='whether a single pointcloud has variations ' help='whether a single pointcloud has variations '
'named int(id)_pc.(xyz|dat) look at pointclouds or sub') 'named int(id)_pc.(xyz|dat) look at pointclouds or sub')
opt = parser.parse_args() opt = parser.parse_args()
print(opt) print(opt)
if __name__ == '__main__': if __name__ == '__main__':
# Create dataset # # ------------------------------------------------------------------------------------------------------------------
print('Create data set ..') # # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction
# # ------------------------------------------------------------------------------------------------------------------
#
# # Create dataset
# print('Create data set ..')
#
# dataset_folder = './data/raw/predict/'
# pointcloud_file = './pointclouds/m3.xyz'
#
# # Load and pre-process point cloud
# pcloud = pc.read_pointcloud(pointcloud_file)
# pcloud = pc.normalize_pointcloud(pcloud, 1)
#
# #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
# # selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
#
# pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
# print("Pre-Processing: Clustering")
# pc.draw_clusters(pc_clusters)
#
# recreate_folder(dataset_folder)
# for idx, pcc in enumerate(pc_clusters):
#
# pcc = pc.farthest_point_sampling(pcc, opt.npoints)
# recreate_folder(dataset_folder + str(idx) + '/')
# pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
# #draw_sample_data(pcc, True)
#
# # ------------------------------------------------------------------------------------------------------------------
# # Load point cloud clusters and model.
# # ------------------------------------------------------------------------------------------------------------------
#
# # Load dataset
# print('load dataset ..')
# test_transform = GT.Compose([GT.NormalizeScale(), ])
#
# test_dataset = ShapeNetPartSegDataset(
# mode='predict',
# root_dir='data',
# with_normals=opt.with_normals,
# npoints=opt.npoints,
# refresh=True,
# collate_per_segment=opt.collate_per_segment,
# has_variations=opt.has_variations,
# headers=opt.headers
# )
# num_classes = test_dataset.num_classes()
#
# # Load model
# print('Construct model ..')
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# dtype = torch.float
#
# net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
# net.load_state_dict(torch.load(opt.model, map_location=device.type))
# net = net.to(device, dtype)
# net.eval()
#
# # ------------------------------------------------------------------------------------------------------------------
# # Predict per cluster.
# # ------------------------------------------------------------------------------------------------------------------
#
# labeled_dataset = None
# result_clusters = []
#
# # Iterate over all the samples and predict
# for idx, sample in enumerate(test_dataset):
#
# predicted_label, _ = eval_sample(net, sample)
# if opt.with_normals:
# sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
# else:
# sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
#
# result_clusters.append(sample_data)
#
# if labeled_dataset is None:
# labeled_dataset = sample_data
# else:
# labeled_dataset = np.vstack((labeled_dataset, sample_data))
#
# print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
#
# # ------------------------------------------------------------------------------------------------------------------
# # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
# # ------------------------------------------------------------------------------------------------------------------
#
# min_cluster_size = 10
# contamination = 0.01
#
# filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
# type_filtered_clusters = []
# for c in filtered_clusters:
#
# prim_types = np.unique(c[:, 6])
# pt_count = {}
# for pt in prim_types:
# pt_count[pt] = len(c[c[:, 6] == pt])
#
# max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
# min_size = pt_count[max_pt] * contamination
#
# valid_types = []
# for pt in prim_types:
# if pt_count[pt] > min_size:
# valid_types.append(pt)
#
# filtered_c = c[np.isin(c[:, 6], valid_types)]
# type_filtered_clusters.append(filtered_c)
# result_clusters = type_filtered_clusters
#
# labeled_dataset = np.vstack(result_clusters)
#
# np.savetxt('labeled_dataset.txt', labeled_dataset)
dataset_folder = './data/raw/predict/' # ------------------------------------------------------------------------------------------------------------------
pointcloud_file = './pointclouds/0_0.xyz' # Clustering that results in per-primitive type clusters
# ------------------------------------------------------------------------------------------------------------------
# Load and pre-process point cloud labeled_dataset = np.loadtxt('labeled_dataset.txt')
pcloud = pc.read_pointcloud(pointcloud_file) # pc.draw_sample_data(labeled_dataset)
pcloud = pc.normalize_pointcloud(pcloud, 1)
#a, b = pc.split_outliers(pcloud, [3, 4, 5])
#draw_sample_data(a, True)
#draw_sample_data(b, True)
#pcloud = a
# for 0_0.xyz: pc.hierarchical_clustering(pcloud, [0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5) # TODO: Filter labeled dataset to get rid of edge clusters
print("Final clustering..")
#pc_clusters = pc.cluster_dbscan(pcloud, [0, 1, 2, 3,4,5], eps=0.5, min_samples=5) labeled_dataset = pc.append_onehotencoded_type(labeled_dataset)
#pc_clusters = pc.filter_clusters(pc_clusters, 100)
#pc_clusters = [pcloud] print("Test row: ", labeled_dataset[:1, :])
#print("NUM CLUSTERS: ", len(pc_clusters)) total_clusters = []
#draw_clusters(pc_clusters) clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50)
#for c in pc_clusters: print("Pre-clustering done. Clusters: ", len(clusters))
#draw_sample_data(c, True) pc.draw_clusters(clusters)
# print("Cluster Size: ", len(c))
for cluster in clusters:
#cluster = pc.normalize_pointcloud(cluster)
print("2nd level clustering ..")
# draw_sample_data(pcloud) prim_types_in_cluster = len(np.unique(cluster[:, 6], axis=0))
if prim_types_in_cluster == 1:
pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5], print("No need for 2nd level clustering since there is only a single primitive type in the cluster.")
selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5) total_clusters.append(cluster)
# pc.cluster_cubes(pcloud, [4, 4, 4])
recreate_folder(dataset_folder)
for idx, pcc in enumerate(pc_clusters):
pcc = farthest_point_sampling(pcc, opt.npoints)
recreate_folder(dataset_folder + str(idx) + '/')
pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
# draw_sample_data(pcc, False)
# Load dataset
print('load dataset ..')
test_transform = GT.Compose([GT.NormalizeScale(), ])
test_dataset = ShapeNetPartSegDataset(
mode='predict',
root_dir='data',
with_normals=opt.with_normals,
npoints=opt.npoints,
refresh=True,
collate_per_segment=opt.collate_per_segment,
has_variations=opt.has_variations,
headers=opt.headers
)
num_classes = test_dataset.num_classes()
# Load model
print('Construct model ..')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dtype = torch.float
# net = PointNetPartSegmentNet(num_classes)
net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
net.load_state_dict(torch.load(opt.model, map_location=device.type))
net = net.to(device, dtype)
net.eval()
labeled_dataset = None
result_clusters = []
# Iterate over all the samples and predict
for sample in test_dataset:
# Predict
pred_label, gt_label = eval_sample(net, sample)
if opt.with_normals:
sample_data = np.column_stack((sample["points"].numpy(), pred_label.numpy()))
else: else:
sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], pred_label.numpy())) sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50)
print("Sub clusters: ", len(sub_clusters))
total_clusters.extend(sub_clusters)
# draw_sample_data(sample_data, False) result_clusters = list(filter(lambda c: c.shape[0] > 100, total_clusters))
#print("Sample Datat: ", sample_data[:5, :]) for cluster in result_clusters:
#print('Eval done.') print("Cluster: ", cluster.shape[0])
print("PRED LABEL: ", pred_label)
#sample_data = normalize_pointcloud(sample_data) pc.draw_sample_data(cluster, False)
#sample_data = append_onehotencoded_type(sample_data, 1.0)
#sample_data = append_normal_angles(sample_data)
# print('Clustering ..') print("Number of clusters: ", len(result_clusters))
# print('Shape: ' + str(sample_data.shape))
# clusters = extract_clusters(sample_data, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10], eps=0.1, min_samples=0.0001, metric='euclidean', algo='auto') pc.draw_clusters(result_clusters)
# print('Clustering done. ' + str(len(clusters)) + " Clusters.")
# print(sample_data[:, 6])
# draw_sample_data(sample_data, False)
# result_clusters.extend(clusters)
result_clusters.append(sample_data)
if labeled_dataset is None:
labeled_dataset = sample_data
else:
labeled_dataset = np.vstack((labeled_dataset, sample_data))
print("prediction done")
draw_sample_data(labeled_dataset, False)
print("point cloud size: ", labeled_dataset.shape)
print("Min: ", np.min(labeled_dataset[:, :3]))
print("Max: ", np.max(labeled_dataset[:, :3]))
print("Min: ", np.min(pcloud[:, :3]))
print("Max: ", np.max(pcloud[:, :3]))
# TODO: Take result clusters and cluster them by primitive type.
# ------------------------------------------------------------------------------------------------------------------
# Write clusters to file.
# ------------------------------------------------------------------------------------------------------------------
pc.write_clusters("clusters.txt", result_clusters) pc.write_clusters("clusters.txt", result_clusters)