added structured clustering

This commit is contained in:
Markus Friedrich 2019-08-12 19:13:04 +02:00
parent e4cc447f68
commit 557d49fefc
2 changed files with 198 additions and 258 deletions

View File

@ -37,7 +37,7 @@ def mini_color_table(index, norm=True):
return color
def clusterToColor(cluster, cluster_idx):
def cluster2Color(cluster, cluster_idx):
colors = np.zeros(shape=(len(cluster), 3))
point_idx = 0
@ -48,6 +48,21 @@ def clusterToColor(cluster, cluster_idx):
return colors
def label2color(labels):
'''
labels: np.ndarray with shape (n, )
colors(return): np.ndarray with shape (n, 3)
'''
num = labels.shape[0]
colors = np.zeros((num, 3))
minl, maxl = np.min(labels), np.max(labels)
for l in range(minl, maxl + 1):
colors[labels == l, :] = mini_color_table(l)
return colors
def read_pointcloud(path, delimiter=' ', hasHeader=True):
with open(path, 'r') as f:
if hasHeader:
@ -174,7 +189,7 @@ def draw_clusters(clusters):
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(cluster[:,:3])
cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
cloud.colors = o3d.Vector3dVector(cluster2Color(cluster, cluster_idx))
clouds.append(cloud)
o3d.draw_geometries(clouds)
@ -186,14 +201,26 @@ def write_clusters(path, clusters, type_column=6):
file.write(str(len(clusters)) + "\n")
for cluster in clusters:
print("Types: ", cluster[:, type_column])
# print("Types: ", cluster[:, type_column])
types = np.unique(cluster[:, type_column], axis=0)
types = np.unique(cluster[:, type_column], axis=0).astype(int)
np.savetxt(file, types, header='', comments='')
print("Types: ", types)
np.savetxt(file, types, header='', comments='', fmt='%i')
np.savetxt(file, cluster[:, :6], header=str(len(cluster)) + ' ' + str(6), comments='')
def draw_sample_data(sample_data, colored_normals = False):
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(sample_data[:, :3])
cloud.colors = \
o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
o3d.draw_geometries([cloud])
def normalize_pointcloud(pc, factor=1.0):
max = pc.max(axis=0)
@ -221,12 +248,12 @@ def hierarchical_clustering(data, selected_indices_0, selected_indices_1, eps, m
return total_clusters
def filter_clusters(clusters, min_size):
def filter_clusters(clusters, filter):
filtered_clusters = []
for c in clusters:
if len(c) >= min_size:
if filter(c):
filtered_clusters.append(c)
return filtered_clusters
@ -238,4 +265,13 @@ def split_outliers(pc, columns):
# LOF, kNN
return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
return pc[clf.labels_ == 0], pc[clf.labels_ == 1]
def append_onehotencoded_type(data, factor = 1.0):
types = data[:, 6].astype(int)
res = np.zeros((len(types), 4))
res[np.arange(len(types)), types] = factor
return np.column_stack((data, res))

View File

@ -2,21 +2,19 @@ import sys
import os
import shutil
import math
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
from dataset.shapenet import ShapeNetPartSegDataset
from model.pointnet2_part_seg import PointNet2PartSegmentNet
import torch_geometric.transforms as GT
import torch
import argparse
from distutils.util import strtobool
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import open3d as o3d
import pointcloud as pc
import operator
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
def eval_sample(net, sample):
'''
@ -42,78 +40,6 @@ def eval_sample(net, sample):
return (pred_label, gt_label)
def mini_color_table(index, norm=True):
colors = [
[0.5000, 0.5400, 0.5300], [0.8900, 0.1500, 0.2100], [0.6400, 0.5800, 0.5000],
[1.0000, 0.3800, 0.0100], [1.0000, 0.6600, 0.1400], [0.4980, 1.0000, 0.0000],
[0.4980, 1.0000, 0.8314], [0.9412, 0.9725, 1.0000], [0.5412, 0.1686, 0.8863],
[0.5765, 0.4392, 0.8588], [0.3600, 0.1400, 0.4300], [0.5600, 0.3700, 0.6000],
]
color = colors[index % len(colors)]
if not norm:
color[0] *= 255
color[1] *= 255
color[2] *= 255
return color
def label2color(labels):
'''
labels: np.ndarray with shape (n, )
colors(return): np.ndarray with shape (n, 3)
'''
num = labels.shape[0]
colors = np.zeros((num, 3))
minl, maxl = np.min(labels), np.max(labels)
for l in range(minl, maxl + 1):
colors[labels == l, :] = mini_color_table(l)
return colors
def clusterToColor(cluster, cluster_idx):
colors = np.zeros(shape=(len(cluster), 3))
point_idx = 0
for point in cluster:
colors[point_idx, :] = mini_color_table(cluster_idx)
point_idx += 1
return colors
def farthest_point_sampling(pts, K):
if pts.shape[0] < K:
return pts
def calc_distances(p0, points):
return ((p0[:3] - points[:, :3]) ** 2).sum(axis=1)
farthest_pts = np.zeros((K, pts.shape[1]))
farthest_pts[0] = pts[np.random.randint(len(pts))]
distances = calc_distances(farthest_pts[0], pts)
for i in range(1, K):
farthest_pts[i] = pts[np.argmax(distances)]
distances = np.minimum(distances, calc_distances(farthest_pts[i], pts))
return farthest_pts
def append_onehotencoded_type(data, factor = 1.0):
types = data[:, 6].astype(int)
res = np.zeros((len(types), 4))
res[np.arange(len(types)), types] = factor
return np.column_stack((data, res))
def append_normal_angles(data):
def func(x):
@ -128,64 +54,6 @@ def append_normal_angles(data):
return np.column_stack((data, res))
def extract_clusters(data, selected_indices, eps, min_samples, metric='euclidean', algo='auto'):
min_samples = min_samples * len(data)
print('Clustering. Min Samples: ' + str(min_samples) + ' EPS: ' + str(eps))
# 0,1,2 : pos
# 3,4,5 : normal
# 6: type index
# 7,8,9,10: type index one hot encoded
# 11,12: normal as angles
db_res = DBSCAN(eps=eps, metric=metric, n_jobs=-1, algorithm=algo, min_samples=min_samples).fit(data[:, selected_indices])
labels = db_res.labels_
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
print("Noise: " + str(n_noise) + " Clusters: " + str(n_clusters))
clusters = {}
for idx, l in enumerate(labels):
if l is -1:
continue
clusters.setdefault(str(l), []).append(data[idx, :])
npClusters = []
for cluster in clusters.values():
npClusters.append(np.array(cluster))
return npClusters
def draw_clusters(clusters):
clouds = []
cluster_idx = 0
for cluster in clusters:
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(cluster[:,:3])
cloud.colors = o3d.Vector3dVector(clusterToColor(cluster, cluster_idx))
clouds.append(cloud)
cluster_idx += 1
o3d.draw_geometries(clouds)
def draw_sample_data(sample_data, colored_normals = False):
cloud = o3d.PointCloud()
cloud.points = o3d.Vector3dVector(sample_data[:, :3])
cloud.colors = \
o3d.Vector3dVector(label2color(sample_data[:, 6].astype(int)) if not colored_normals else sample_data[:, 3:6])
o3d.draw_geometries([cloud])
def recreate_folder(folder):
if os.path.exists(folder) and os.path.isdir(folder):
shutil.rmtree(folder)
@ -194,8 +62,8 @@ def recreate_folder(folder):
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
parser = argparse.ArgumentParser()
parser.add_argument('--npoints', type=int, default=2048, help='resample points number')
parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_3.pth', help='model path')
parser.add_argument('--npoints', type=int, default=4096, help='resample points number')
parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_28.pth', help='model path')
parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
parser.add_argument('--with_normals', type=strtobool, default=True, help='if training will include normals')
@ -203,139 +71,175 @@ parser.add_argument('--collate_per_segment', type=strtobool, default=True, help=
parser.add_argument('--has_variations', type=strtobool, default=False,
help='whether a single pointcloud has variations '
'named int(id)_pc.(xyz|dat) look at pointclouds or sub')
opt = parser.parse_args()
print(opt)
if __name__ == '__main__':
# Create dataset
print('Create data set ..')
# # ------------------------------------------------------------------------------------------------------------------
# # Load point cloud, cluster it and store clusters as point cloud clusters again for later prediction
# # ------------------------------------------------------------------------------------------------------------------
#
# # Create dataset
# print('Create data set ..')
#
# dataset_folder = './data/raw/predict/'
# pointcloud_file = './pointclouds/m3.xyz'
#
# # Load and pre-process point cloud
# pcloud = pc.read_pointcloud(pointcloud_file)
# pcloud = pc.normalize_pointcloud(pcloud, 1)
#
# #pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
# # selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.7, min_samples=5)
#
# pc_clusters = pc.cluster_cubes(pcloud, [4, 4, 4])
# print("Pre-Processing: Clustering")
# pc.draw_clusters(pc_clusters)
#
# recreate_folder(dataset_folder)
# for idx, pcc in enumerate(pc_clusters):
#
# pcc = pc.farthest_point_sampling(pcc, opt.npoints)
# recreate_folder(dataset_folder + str(idx) + '/')
# pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
# #draw_sample_data(pcc, True)
#
# # ------------------------------------------------------------------------------------------------------------------
# # Load point cloud clusters and model.
# # ------------------------------------------------------------------------------------------------------------------
#
# # Load dataset
# print('load dataset ..')
# test_transform = GT.Compose([GT.NormalizeScale(), ])
#
# test_dataset = ShapeNetPartSegDataset(
# mode='predict',
# root_dir='data',
# with_normals=opt.with_normals,
# npoints=opt.npoints,
# refresh=True,
# collate_per_segment=opt.collate_per_segment,
# has_variations=opt.has_variations,
# headers=opt.headers
# )
# num_classes = test_dataset.num_classes()
#
# # Load model
# print('Construct model ..')
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# dtype = torch.float
#
# net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
# net.load_state_dict(torch.load(opt.model, map_location=device.type))
# net = net.to(device, dtype)
# net.eval()
#
# # ------------------------------------------------------------------------------------------------------------------
# # Predict per cluster.
# # ------------------------------------------------------------------------------------------------------------------
#
# labeled_dataset = None
# result_clusters = []
#
# # Iterate over all the samples and predict
# for idx, sample in enumerate(test_dataset):
#
# predicted_label, _ = eval_sample(net, sample)
# if opt.with_normals:
# sample_data = np.column_stack((sample["points"].numpy(), predicted_label.numpy()))
# else:
# sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], predicted_label.numpy()))
#
# result_clusters.append(sample_data)
#
# if labeled_dataset is None:
# labeled_dataset = sample_data
# else:
# labeled_dataset = np.vstack((labeled_dataset, sample_data))
#
# print("Prediction done for cluster " + str(idx+1) + "/" + str(len(test_dataset)))
#
# # ------------------------------------------------------------------------------------------------------------------
# # Remove cluster rows if the amount of points for a particular primitive type is below a threshold.
# # ------------------------------------------------------------------------------------------------------------------
#
# min_cluster_size = 10
# contamination = 0.01
#
# filtered_clusters = filter(lambda c : c.shape[0] > min_cluster_size, result_clusters)
# type_filtered_clusters = []
# for c in filtered_clusters:
#
# prim_types = np.unique(c[:, 6])
# pt_count = {}
# for pt in prim_types:
# pt_count[pt] = len(c[c[:, 6] == pt])
#
# max_pt = max(pt_count.items(), key=operator.itemgetter(1))[0]
# min_size = pt_count[max_pt] * contamination
#
# valid_types = []
# for pt in prim_types:
# if pt_count[pt] > min_size:
# valid_types.append(pt)
#
# filtered_c = c[np.isin(c[:, 6], valid_types)]
# type_filtered_clusters.append(filtered_c)
# result_clusters = type_filtered_clusters
#
# labeled_dataset = np.vstack(result_clusters)
#
# np.savetxt('labeled_dataset.txt', labeled_dataset)
dataset_folder = './data/raw/predict/'
pointcloud_file = './pointclouds/0_0.xyz'
# ------------------------------------------------------------------------------------------------------------------
# Clustering that results in per-primitive type clusters
# ------------------------------------------------------------------------------------------------------------------
# Load and pre-process point cloud
pcloud = pc.read_pointcloud(pointcloud_file)
pcloud = pc.normalize_pointcloud(pcloud, 1)
#a, b = pc.split_outliers(pcloud, [3, 4, 5])
#draw_sample_data(a, True)
#draw_sample_data(b, True)
#pcloud = a
labeled_dataset = np.loadtxt('labeled_dataset.txt')
# pc.draw_sample_data(labeled_dataset)
# for 0_0.xyz: pc.hierarchical_clustering(pcloud, [0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
# TODO: Filter labeled dataset to get rid of edge clusters
print("Final clustering..")
#pc_clusters = pc.cluster_dbscan(pcloud, [0, 1, 2, 3,4,5], eps=0.5, min_samples=5)
#pc_clusters = pc.filter_clusters(pc_clusters, 100)
labeled_dataset = pc.append_onehotencoded_type(labeled_dataset)
#pc_clusters = [pcloud]
print("Test row: ", labeled_dataset[:1, :])
#print("NUM CLUSTERS: ", len(pc_clusters))
total_clusters = []
#draw_clusters(pc_clusters)
#for c in pc_clusters:
#draw_sample_data(c, True)
# print("Cluster Size: ", len(c))
clusters = pc.cluster_dbscan(labeled_dataset, [0,1,2,3,4,5], eps=0.1, min_samples=50)
print("Pre-clustering done. Clusters: ", len(clusters))
pc.draw_clusters(clusters)
for cluster in clusters:
#cluster = pc.normalize_pointcloud(cluster)
print("2nd level clustering ..")
# draw_sample_data(pcloud)
pc_clusters = pc.hierarchical_clustering(pcloud, selected_indices_0=[0, 1, 2, 3, 4, 5],
selected_indices_1=[0, 1, 2, 3, 4, 5], eps=0.1, min_samples=5)
# pc.cluster_cubes(pcloud, [4, 4, 4])
recreate_folder(dataset_folder)
for idx, pcc in enumerate(pc_clusters):
pcc = farthest_point_sampling(pcc, opt.npoints)
recreate_folder(dataset_folder + str(idx) + '/')
pc.write_pointcloud(dataset_folder + str(idx) + '/pc.xyz', pcc)
# draw_sample_data(pcc, False)
# Load dataset
print('load dataset ..')
test_transform = GT.Compose([GT.NormalizeScale(), ])
test_dataset = ShapeNetPartSegDataset(
mode='predict',
root_dir='data',
with_normals=opt.with_normals,
npoints=opt.npoints,
refresh=True,
collate_per_segment=opt.collate_per_segment,
has_variations=opt.has_variations,
headers=opt.headers
)
num_classes = test_dataset.num_classes()
# Load model
print('Construct model ..')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dtype = torch.float
# net = PointNetPartSegmentNet(num_classes)
net = PointNet2PartSegmentNet(num_classes, with_normals=opt.with_normals)
net.load_state_dict(torch.load(opt.model, map_location=device.type))
net = net.to(device, dtype)
net.eval()
labeled_dataset = None
result_clusters = []
# Iterate over all the samples and predict
for sample in test_dataset:
# Predict
pred_label, gt_label = eval_sample(net, sample)
if opt.with_normals:
sample_data = np.column_stack((sample["points"].numpy(), pred_label.numpy()))
prim_types_in_cluster = len(np.unique(cluster[:, 6], axis=0))
if prim_types_in_cluster == 1:
print("No need for 2nd level clustering since there is only a single primitive type in the cluster.")
total_clusters.append(cluster)
else:
sample_data = np.column_stack((sample["points"].numpy(), sample["normals"], pred_label.numpy()))
sub_clusters = pc.cluster_dbscan(cluster, [0,1,2,7,8,9,10], eps=0.1, min_samples=50)
print("Sub clusters: ", len(sub_clusters))
total_clusters.extend(sub_clusters)
# draw_sample_data(sample_data, False)
result_clusters = list(filter(lambda c: c.shape[0] > 100, total_clusters))
#print("Sample Datat: ", sample_data[:5, :])
#print('Eval done.')
print("PRED LABEL: ", pred_label)
for cluster in result_clusters:
print("Cluster: ", cluster.shape[0])
#sample_data = normalize_pointcloud(sample_data)
#sample_data = append_onehotencoded_type(sample_data, 1.0)
#sample_data = append_normal_angles(sample_data)
pc.draw_sample_data(cluster, False)
# print('Clustering ..')
# print('Shape: ' + str(sample_data.shape))
print("Number of clusters: ", len(result_clusters))
# clusters = extract_clusters(sample_data, [0, 1, 2, 3, 4, 5, 7, 8, 9, 10], eps=0.1, min_samples=0.0001, metric='euclidean', algo='auto')
# print('Clustering done. ' + str(len(clusters)) + " Clusters.")
# print(sample_data[:, 6])
# draw_sample_data(sample_data, False)
# result_clusters.extend(clusters)
result_clusters.append(sample_data)
if labeled_dataset is None:
labeled_dataset = sample_data
else:
labeled_dataset = np.vstack((labeled_dataset, sample_data))
print("prediction done")
draw_sample_data(labeled_dataset, False)
print("point cloud size: ", labeled_dataset.shape)
print("Min: ", np.min(labeled_dataset[:, :3]))
print("Max: ", np.max(labeled_dataset[:, :3]))
print("Min: ", np.min(pcloud[:, :3]))
print("Max: ", np.max(pcloud[:, :3]))
# TODO: Take result clusters and cluster them by primitive type.
pc.draw_clusters(result_clusters)
# ------------------------------------------------------------------------------------------------------------------
# Write clusters to file.
# ------------------------------------------------------------------------------------------------------------------
pc.write_clusters("clusters.txt", result_clusters)