dataset modification

This commit is contained in:
Si11ium 2020-06-19 19:00:07 +02:00
parent b3c67bab40
commit a19bd9cafd
4 changed files with 36 additions and 20 deletions

View File

@ -8,7 +8,6 @@ from collections import defaultdict
import os import os
from torch.utils.data import Dataset from torch.utils.data import Dataset
from tqdm import tqdm from tqdm import tqdm
import glob
import torch import torch
from torch_geometric.data import InMemoryDataset from torch_geometric.data import InMemoryDataset
@ -45,7 +44,9 @@ class CustomShapeNet(InMemoryDataset):
assert mode in self.modes.keys(), f'"mode" must be one of {self.modes.keys()}' assert mode in self.modes.keys(), f'"mode" must be one of {self.modes.keys()}'
# Set the Dataset Parameters # Set the Dataset Parameters
self.collate_per_segment, self.mode, self.refresh = collate_per_segment, mode, refresh self.collate_per_segment = collate_per_segment
self.mode = mode
self.refresh = refresh
self.with_normals = with_normals self.with_normals = with_normals
root_dir = Path(root_dir) root_dir = Path(root_dir)
super(CustomShapeNet, self).__init__(root_dir, transform, pre_transform, pre_filter) super(CustomShapeNet, self).__init__(root_dir, transform, pre_transform, pre_filter)
@ -57,15 +58,15 @@ class CustomShapeNet(InMemoryDataset):
return [f'{self.mode}.pt'] return [f'{self.mode}.pt']
def check_and_resolve_cloud_count(self): def check_and_resolve_cloud_count(self):
if self.raw_dir.exists(): if (self.raw_dir / self.mode).exists():
dir_count = len([name for name in os.listdir(self.raw_dir) if os.path.isdir(os.path.join(self.raw_dir, name))]) file_count = len([cloud for cloud in (self.raw_dir / self.mode).iterdir() if cloud.is_file()])
if dir_count: if file_count:
print(f'{dir_count} folders have been found....') print(f'{file_count} files have been found....')
return dir_count return file_count
else: else:
warn(ResourceWarning("No raw pointclouds have been found. Was this intentional?")) warn(ResourceWarning("No raw pointclouds have been found. Was this intentional?"))
return dir_count return file_count
warn(ResourceWarning("The raw data folder does not exist. Was this intentional?")) warn(ResourceWarning("The raw data folder does not exist. Was this intentional?"))
return -1 return -1
@ -99,7 +100,7 @@ class CustomShapeNet(InMemoryDataset):
continue continue
return data, slices return data, slices
def _transform_and_filter(self, data): def _pre_transform_and_filter(self, data):
# ToDo: ANy filter to apply? Then do it here. # ToDo: ANy filter to apply? Then do it here.
if self.pre_filter is not None and not self.pre_filter(data): if self.pre_filter is not None and not self.pre_filter(data):
data = self.pre_filter(data) data = self.pre_filter(data)
@ -133,7 +134,9 @@ class CustomShapeNet(InMemoryDataset):
src[key] = torch.tensor(values, dtype=torch.double).squeeze() src[key] = torch.tensor(values, dtype=torch.double).squeeze()
if not self.collate_per_segment: if not self.collate_per_segment:
src = dict(all=torch.stack([x for x in src.values()])) src = dict(
all=torch.cat(tuple(src.values()))
)
for key, values in src.items(): for key, values in src.items():
try: try:
@ -157,17 +160,18 @@ class CustomShapeNet(InMemoryDataset):
if self.collate_per_segment: if self.collate_per_segment:
data = Data(**attr_dict) data = Data(**attr_dict)
else: else:
if not data: if data is None:
data = defaultdict(list) data = defaultdict(list)
# points=points, norm=points[:, 3:] # points=points, norm=points[:, 3:]
for key, val in attr_dict.items(): for key, val in attr_dict.items():
data[key].append(val) data[key].append(val)
# data = Data(**data)
data = self._transform_and_filter(data) # data = self._pre_transform_and_filter(data)
if self.collate_per_segment: if self.collate_per_segment:
datasets[self.mode].append(data) datasets[self.mode].append(data)
if not self.collate_per_segment: if not self.collate_per_segment:
# Todo: What is this? # This is just to be sure, but should not be needed, since src[all] == all there is in this cloud
datasets[self.mode].append(Data(**{key: torch.cat(data[key]) for key in data.keys()})) datasets[self.mode].append(Data(**{key: torch.cat(data[key]) for key in data.keys()}))
if datasets[self.mode]: if datasets[self.mode]:
@ -198,8 +202,10 @@ class ShapeNetPartSegDataset(Dataset):
# Resample to fixed number of points # Resample to fixed number of points
try: try:
npoints = self.npoints if self.mode != 'predict' else data.pos.shape[0] npoints = self.npoints if self.mode != DataSplit.predict else data.pos.shape[0]
choice = np.random.choice(data.pos.shape[0], npoints, replace=False if self.mode == 'predict' else True) choice = np.random.choice(data.pos.shape[0], npoints,
replace=False if self.mode == DataSplit.predict else True
)
except ValueError: except ValueError:
choice = [] choice = []

View File

@ -57,7 +57,7 @@ def predict_prim_type(input_pc, model):
if __name__ == '__main__': if __name__ == '__main__':
input_pc_path = Path('data') / 'pc' / 'pc.txt' input_pc_path = Path('data') / 'pc' / 'test.xyz'
model_path = Path('output') / 'PN2' / 'PN_26512907a2de0664bfad2349a6bffee3' / 'version_0' model_path = Path('output') / 'PN2' / 'PN_26512907a2de0664bfad2349a6bffee3' / 'version_0'
# config_filename = 'config.ini' # config_filename = 'config.ini'
@ -66,15 +66,19 @@ if __name__ == '__main__':
loaded_model = restore_logger_and_model(model_path) loaded_model = restore_logger_and_model(model_path)
loaded_model.eval() loaded_model.eval()
input_pc = read_pointcloud(input_pc_path, ' ', False) #input_pc = read_pointcloud(input_pc_path, ' ', False)
input_pc = normalize_pointcloud(input_pc) # input_pc = normalize_pointcloud(input_pc)
grid_clusters = cluster_cubes(input_pc, [1,1,1], 1024) # TEST DATASET
test_dataset = ShapeNetPartSegDataset('data', mode=GlobalVar.data_split.predict, collate_per_segment=False,
npoints=1024, refresh=True)
grid_clusters = cluster_cubes(test_dataset[0], [3, 3, 3], max_points_per_cluster=1024)
ps.init() ps.init()
for i,grid_cluster_pc in enumerate(grid_clusters): for i, grid_cluster_pc in enumerate(grid_clusters):
print("Cluster pointcloud size: {}".format(grid_cluster_pc.shape[0])) print("Cluster pointcloud size: {}".format(grid_cluster_pc.shape[0]))

View File

@ -17,6 +17,7 @@ from pyod.models.loci import LOCI
from pyod.models.hbos import HBOS from pyod.models.hbos import HBOS
from pyod.models.lscp import LSCP from pyod.models.lscp import LSCP
from pyod.models.feature_bagging import FeatureBagging from pyod.models.feature_bagging import FeatureBagging
from torch_geometric.data import Data
from utils.project_settings import Classes from utils.project_settings import Classes
@ -116,6 +117,10 @@ def cluster_cubes(data, cluster_dims, max_points_per_cluster=-1, min_points_per_
print("no need to cluster.") print("no need to cluster.")
return [farthest_point_sampling(data, max_points_per_cluster)] return [farthest_point_sampling(data, max_points_per_cluster)]
if isinstance(data, Data):
import torch
data = torch.cat((data.pos, data.norm, data.y.double().unsqueeze(-1)), dim=-1).numpy()
max = data[:, :3].max(axis=0) max = data[:, :3].max(axis=0)
max += max * 0.01 max += max * 0.01

View File

@ -43,6 +43,7 @@ class DataSplit(DataClass):
train = 'train' train = 'train'
devel = 'devel' devel = 'devel'
test = 'test' test = 'test'
predict = 'predict'
class GlobalVar(DataClass): class GlobalVar(DataClass):