This commit is contained in:
Markus Friedrich 2019-08-06 11:19:22 +02:00
commit 97a9db9a32
7 changed files with 93 additions and 2244 deletions

View File

@ -17,24 +17,28 @@ def save_names(name_list, path):
with open(path, 'wb') as f: with open(path, 'wb') as f:
f.writelines(name_list) f.writelines(name_list)
class CustomShapeNet(InMemoryDataset): class CustomShapeNet(InMemoryDataset):
categories = {key: val for val, key in enumerate(['Box', 'Cone', 'Cylinder', 'Sphere'])} categories = {key: val for val, key in enumerate(['Box', 'Cone', 'Cylinder', 'Sphere'])}
modes = {key: val for val, key in enumerate(['train', 'test', 'predict'])}
def __init__(self, root, collate_per_segment=True, train=True, transform=None, pre_filter=None, pre_transform=None, def __init__(self, root_dir, collate_per_segment=True, mode='train', transform=None, pre_filter=None,
headers=True, **kwargs): pre_transform=None, headers=True, has_variations=False, refresh=False, labels_within=False):
self.has_headers = headers assert mode in self.modes.keys(), f'"mode" must be one of {self.modes.keys()}'
self.collate_per_element = collate_per_segment assert not (collate_per_segment and has_variations), 'Either use each element or pointclouds - with variations'
self.train = train
super(CustomShapeNet, self).__init__(root, transform, pre_transform, pre_filter) #Set the Dataset Parameters
path = self.processed_paths[0] if train else self.processed_paths[-1] self.has_headers, self.has_variations, self.labels_within = headers, has_variations, labels_within
self.data, self.slices = torch.load(path) self.collate_per_element, self.mode, self.refresh = collate_per_segment, mode, refresh
super(CustomShapeNet, self).__init__(root_dir, transform, pre_transform, pre_filter)
self.data, self.slices = self._load_dataset()
print("Initialized") print("Initialized")
@property @property
def raw_file_names(self): def raw_file_names(self):
# Maybe add more data like validation sets # Maybe add more data like validation sets
return ['train', 'test'] return list(self.modes.keys())
@property @property
def processed_file_names(self): def processed_file_names(self):
@ -53,9 +57,18 @@ class CustomShapeNet(InMemoryDataset):
def _load_dataset(self): def _load_dataset(self):
data, slices = None, None data, slices = None, None
filepath = self.processed_paths[self.modes[self.mode]]
if self.refresh:
try:
os.remove(filepath)
print('Processed Location "Refreshed" (We deleted the Files)')
except FileNotFoundError:
print('You meant to refresh the allready processed dataset, but there were none...')
print('continue processing')
pass
while True: while True:
try: try:
filepath = os.path.join(self.root, self.processed_dir, f'{"train" if self.train else "test"}.pt')
data, slices = torch.load(filepath) data, slices = torch.load(filepath)
print('Dataset Loaded') print('Dataset Loaded')
break break
@ -77,7 +90,7 @@ class CustomShapeNet(InMemoryDataset):
def process(self, delimiter=' '): def process(self, delimiter=' '):
datasets = defaultdict(list) datasets = defaultdict(list)
idx, data_folder = (0, self.raw_file_names[0]) if self.train else (1, self.raw_file_names[1]) idx, data_folder = self.modes[self.mode], self.raw_file_names[self.modes[self.mode]]
path_to_clouds = os.path.join(self.raw_dir, data_folder) path_to_clouds = os.path.join(self.raw_dir, data_folder)
if '.headers' in os.listdir(path_to_clouds): if '.headers' in os.listdir(path_to_clouds):
@ -88,6 +101,8 @@ class CustomShapeNet(InMemoryDataset):
pass pass
for pointcloud in tqdm(os.scandir(path_to_clouds)): for pointcloud in tqdm(os.scandir(path_to_clouds)):
if self.has_variations:
cloud_variations = defaultdict(list)
if not os.path.isdir(pointcloud): if not os.path.isdir(pointcloud):
continue continue
data, paths = None, list() data, paths = None, list()
@ -95,16 +110,11 @@ class CustomShapeNet(InMemoryDataset):
paths.extend(glob.glob(os.path.join(pointcloud.path, f'*.{ext}'))) paths.extend(glob.glob(os.path.join(pointcloud.path, f'*.{ext}')))
for element in paths: for element in paths:
if all([x not in os.path.split(element)[-1] for x in ['pc.dat', 'pc.xyz']]): # This was build to filter all variations that aregreater then 25
# Assign training data to the data container pattern = re.compile('^((6[0-1]|[1-5][0-9])_\w+?\d+?|pc|\d+?_pc)\.(xyz|dat)$')
# Following the original logic; if pattern.match(os.path.split(element)[-1]):
# y should be the label; continue
# pos should be the six dimensional vector describing: !its pos not points!! else:
# x,y,z,x_rot,y_rot,z_rot
# Get the y - Label
y_raw = next(i for i, v in enumerate(self.categories.keys()) if v.lower() in element.lower())
# y_raw = os.path.splitext(element)[0].split('_')[-2]
with open(element,'r') as f: with open(element,'r') as f:
if self.has_headers: if self.has_headers:
headers = f.__next__() headers = f.__next__()
@ -118,20 +128,35 @@ class CustomShapeNet(InMemoryDataset):
points = torch.tensor(src, dtype=None).squeeze() points = torch.tensor(src, dtype=None).squeeze()
if not len(points.shape) > 1: if not len(points.shape) > 1:
continue continue
y_all = [y_raw] * points.shape[0] # Place Fake Labels to hold the given structure
if self.labels_within:
y_all = points[:, -1]
points = points[:, :-1]
else:
# Get the y - Label
y_raw = next(i for i, v in enumerate(self.categories.keys()) if v.lower() in element.lower())
y_all = ([y_raw] if self.mode != 'predict' else [-1]) * points.shape[0]
y = torch.as_tensor(y_all, dtype=torch.int) y = torch.as_tensor(y_all, dtype=torch.int)
if self.collate_per_element: if self.collate_per_element:
data = Data(y=y, pos=points[:, :3], points=points, norm=points[:, 3:]) data = Data(y=y, pos=points[:, :3]) # , points=points, norm=points[:, 3:])
else: else:
if not data: if not data:
data = defaultdict(list) data = defaultdict(list)
for key, val in dict(y=y, pos=points[:, :3], points=points, norm=points[:, 3:]).items(): # points=points, norm=points[:, 3:]
for key, val in dict(y=y, pos=points[:, :3]).items():
data[key].append(val) data[key].append(val)
data = self._transform_and_filter(data) data = self._transform_and_filter(data)
if self.collate_per_element: if self.collate_per_element:
datasets[data_folder].append(data) datasets[data_folder].append(data)
if self.has_variations:
cloud_variations[int(os.path.split(element)[-1].split('_')[0])].append(data)
if not self.collate_per_element: if not self.collate_per_element:
if self.has_variations:
for variation in cloud_variations.keys():
datasets[data_folder].append(Data(**{key: torch.cat(data[key]) for key in data.keys()}))
else:
datasets[data_folder].append(Data(**{key: torch.cat(data[key]) for key in data.keys()})) datasets[data_folder].append(Data(**{key: torch.cat(data[key]) for key in data.keys()}))
if datasets[data_folder]: if datasets[data_folder]:
@ -147,15 +172,15 @@ class ShapeNetPartSegDataset(Dataset):
Resample raw point cloud to fixed number of points. Resample raw point cloud to fixed number of points.
Map raw label from range [1, N] to [0, N-1]. Map raw label from range [1, N] to [0, N-1].
""" """
def __init__(self, root_dir, collate_per_segment=True, train=True, transform=None, npoints=1024, headers=True): def __init__(self, root_dir, npoints=1024, **kwargs):
super(ShapeNetPartSegDataset, self).__init__() super(ShapeNetPartSegDataset, self).__init__()
kwargs.update(dict(root_dir=root_dir))
self.npoints = npoints self.npoints = npoints
self.dataset = CustomShapeNet(root=root_dir, collate_per_segment=collate_per_segment, self.dataset = CustomShapeNet(**kwargs)
train=train, transform=transform, headers=headers)
def __getitem__(self, index): def __getitem__(self, index):
data = self.dataset[index] data = self.dataset[index]
points, labels, _, norm = data.pos, data.y, data.points, data.norm points, labels = data.pos, data.y # , data.points, data.norm
# Resample to fixed number of points # Resample to fixed number of points
try: try:
@ -163,14 +188,13 @@ class ShapeNetPartSegDataset(Dataset):
except ValueError: except ValueError:
choice = [] choice = []
points, labels, norm = points[choice, :], labels[choice], norm[choice] points, labels = points[choice, :], labels[choice]
labels -= 1 if self.num_classes() in labels else 0 # Map label from [1, C] to [0, C-1] labels -= 1 if self.num_classes() in labels else 0 # Map label from [1, C] to [0, C-1]
sample = { sample = {
'points': points, # torch.Tensor (n, 3) 'points': points, # torch.Tensor (n, 3)
'labels': labels, # torch.Tensor (n,) 'labels': labels # torch.Tensor (n,)
'normals': norm # torch.Tensor (n,)
} }
return sample return sample
@ -180,144 +204,3 @@ class ShapeNetPartSegDataset(Dataset):
def num_classes(self): def num_classes(self):
return self.dataset.num_classes return self.dataset.num_classes
class PredictionShapeNet(InMemoryDataset):
def __init__(self, root, transform=None, pre_filter=None, pre_transform=None, headers=True, refresh=False):
self.has_headers = headers
self.refresh = refresh
super(PredictionShapeNet, self).__init__(root, transform, pre_transform, pre_filter)
path = self.processed_paths[0]
self.data, self.slices = self._load_dataset()
print("Initialized")
@property
def raw_file_names(self):
# Maybe add more data like validation sets
return ['predict']
@property
def processed_file_names(self):
return [f'{x}.pt' for x in self.raw_file_names]
def download(self):
dir_count = len([name for name in os.listdir(self.raw_dir) if os.path.isdir(os.path.join(self.raw_dir, name))])
print(f'{dir_count} folders have been found....')
if dir_count:
return dir_count
raise IOError("No raw pointclouds have been found.")
@property
def num_classes(self):
return len(self.categories)
def _load_dataset(self):
data, slices = None, None
filepath = os.path.join(self.processed_dir, self.processed_file_names[0])
if self.refresh:
try:
os.remove(filepath)
print('Processed Location "Refreshed" (We deleted the Files)')
except FileNotFoundError:
print('You meant to refresh the allready processed dataset, but there were none...')
print('continue processing')
pass
while True:
try:
data, slices = torch.load(filepath)
print('Dataset Loaded')
break
except FileNotFoundError:
self.process()
continue
return data, slices
def process(self, delimiter=' '):
datasets, filenames = defaultdict(list), []
path_to_clouds = os.path.join(self.raw_dir, self.raw_file_names[0])
if '.headers' in os.listdir(path_to_clouds):
self.has_headers = True
elif 'no.headers' in os.listdir(path_to_clouds):
self.has_headers = False
else:
pass
for pointcloud in tqdm(os.scandir(path_to_clouds)):
if not os.path.isdir(pointcloud):
continue
full_cloud_pattern = '(^\d+?_|^)pc\.(xyz|dat)'
pattern = re.compile(full_cloud_pattern)
for file in os.scandir(pointcloud.path):
if not pattern.match(file.name):
continue
with open(file, 'r') as f:
if self.has_headers:
headers = f.__next__()
# Check if there are no useable nodes in this file, header says 0.
if not int(headers.rstrip().split(delimiter)[0]):
continue
# Iterate over all rows
src = [[float(x) if x not in ['-nan(ind)', 'nan(ind)'] else 0
for x in line.rstrip().split(delimiter)[None:None]] for line in f if line != '']
points = torch.tensor(src, dtype=None).squeeze()
if not len(points.shape) > 1:
continue
# pos = points[:, :3]
# norm = points[:, 3:]
y_fake_all = [-1] * points.shape[0]
y = torch.as_tensor(y_fake_all, dtype=torch.int)
# points = torch.as_tensor(points, dtype=torch.float)
# norm = torch.as_tensor(norm, dtype=torch.float)
data = Data(y=y, pos=points[:, :3], points=points, norm=points[:, 3:])
# , points=points, norm=points[:3], )
# ToDo: ANy filter to apply? Then do it here.
if self.pre_filter is not None and not self.pre_filter(data):
data = self.pre_filter(data)
raise NotImplementedError
# ToDo: ANy transformation to apply? Then do it here.
if self.pre_transform is not None:
data = self.pre_transform(data)
raise NotImplementedError
datasets[self.raw_file_names[0]].append(data)
filenames.append(file)
os.makedirs(self.processed_dir, exist_ok=True)
torch.save(self.collate(datasets[self.raw_file_names[0]]), self.processed_paths[0])
# save_names(filenames)
def __repr__(self):
return f'{self.__class__.__name__}({len(self)})'
class PredictNetPartSegDataset(Dataset):
"""
Resample raw point cloud to fixed number of points.
Map raw label from range [1, N] to [0, N-1].
"""
def __init__(self, root_dir, num_classes, transform=None, npoints=2048, headers=True, refresh=False):
super(PredictNetPartSegDataset, self).__init__()
self.npoints = npoints
self._num_classes = num_classes
self.dataset = PredictionShapeNet(root=root_dir, transform=transform, headers=headers, refresh=refresh)
def __getitem__(self, index):
data = self.dataset[index]
points, labels, _, norm = data.pos, data.y, data.points, data.norm
sample = {
'points': points, # torch.Tensor (n, 3)
'labels': labels, # torch.Tensor (n,)
'normals': norm # torch.Tensor (n,)
}
return sample
def __len__(self):
return len(self.dataset)
def num_classes(self):
return self._num_classes

27
main.py
View File

@ -33,11 +33,15 @@ parser.add_argument('--npoints', type=int, default=1024, help='resample points n
parser.add_argument('--model', type=str, default='', help='model path') parser.add_argument('--model', type=str, default='', help='model path')
parser.add_argument('--nepoch', type=int, default=250, help='number of epochs to train for') parser.add_argument('--nepoch', type=int, default=250, help='number of epochs to train for')
parser.add_argument('--outf', type=str, default='checkpoint', help='output folder') parser.add_argument('--outf', type=str, default='checkpoint', help='output folder')
parser.add_argument('--labels_within', type=strtobool, default=False, help='defines the label location')
parser.add_argument('--batch_size', type=int, default=8, help='input batch size') parser.add_argument('--batch_size', type=int, default=8, help='input batch size')
parser.add_argument('--test_per_batches', type=int, default=1000, help='run a test batch per training batches number') parser.add_argument('--test_per_batches', type=int, default=1000, help='run a test batch per training batches number')
parser.add_argument('--num_workers', type=int, default=4, help='number of data loading workers') parser.add_argument('--num_workers', type=int, default=4, help='number of data loading workers')
parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers') parser.add_argument('--headers', type=strtobool, default=True, help='if raw files come with headers')
parser.add_argument('--collate_per_segment', type=strtobool, default=True, help='whether to look at pointclouds or sub') parser.add_argument('--collate_per_segment', type=strtobool, default=True, help='whether to look at pointclouds or sub')
parser.add_argument('--has_variations', type=strtobool, default=False,
help='whether a single pointcloud has variations '
'named int(id)_pc.(xyz|dat) look at pointclouds or sub')
opt = parser.parse_args() opt = parser.parse_args()
@ -53,10 +57,9 @@ torch.manual_seed(opt.manual_seed)
torch.cuda.manual_seed(opt.manual_seed) torch.cuda.manual_seed(opt.manual_seed)
if __name__ == '__main__': if __name__ == '__main__':
# Dataset and transform # Dataset and transform
print('Construct dataset ..') print('Construct dataset ..')
if True:
rot_max_angle = 15 rot_max_angle = 15
trans_max_distance = 0.01 trans_max_distance = 0.01
@ -64,17 +67,24 @@ if __name__ == '__main__':
GT.RandomRotate(rot_max_angle, 1), GT.RandomRotate(rot_max_angle, 1),
GT.RandomRotate(rot_max_angle, 2)] GT.RandomRotate(rot_max_angle, 2)]
) )
TransTransform = GT.RandomTranslate(trans_max_distance)
TransTransform = GT.RandomTranslate(trans_max_distance)
train_transform = GT.Compose([GT.NormalizeScale(), RotTransform, TransTransform]) train_transform = GT.Compose([GT.NormalizeScale(), RotTransform, TransTransform])
test_transform = GT.Compose([GT.NormalizeScale(), ]) test_transform = GT.Compose([GT.NormalizeScale(), ])
dataset = ShapeNetPartSegDataset(root_dir=opt.dataset, collate_per_segment=opt.collate_per_segment, params = dict(root_dir=opt.dataset,
train=True, transform=train_transform, npoints=opt.npoints, headers=opt.headers) collate_per_segment=opt.collate_per_segment,
transform=train_transform,
npoints=opt.npoints,
labels_within=opt.labels_within,
has_variations=opt.has_variations,
headers=opt.headers
)
dataset = ShapeNetPartSegDataset(mode='train', **params)
dataLoader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) dataLoader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers)
test_dataset = ShapeNetPartSegDataset(root_dir=opt.dataset, collate_per_segment=opt.collate_per_segment, test_dataset = ShapeNetPartSegDataset(mode='test', **params)
train=False, transform=test_transform, npoints=opt.npoints, headers=opt.headers)
test_dataLoader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) test_dataLoader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers)
num_classes = dataset.num_classes() num_classes = dataset.num_classes()
@ -118,7 +128,8 @@ if __name__ == '__main__':
print('Epoch {}, total epoches {}'.format(epoch+1, opt.nepoch)) print('Epoch {}, total epoches {}'.format(epoch+1, opt.nepoch))
net.train() net.train()
# ToDo: We need different dataloader here to train the network in multiple iterations, maybe move the loop down
# for dataloader in ...
for batch_idx, sample in enumerate(dataLoader): for batch_idx, sample in enumerate(dataLoader):
# points: (batch_size, n, 3) # points: (batch_size, n, 3)
# labels: (batch_size, n) # labels: (batch_size, n)

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,10 @@ import sys
import os import os
import shutil import shutil
import math import math
from dataset.shapenet import PredictNetPartSegDataset
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add project root directory
from dataset.shapenet import ShapeNetPartSegDataset
from model.pointnet2_part_seg import PointNet2PartSegmentNet from model.pointnet2_part_seg import PointNet2PartSegmentNet
import torch_geometric.transforms as GT import torch_geometric.transforms as GT
import torch import torch
@ -237,7 +240,7 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../') # add proj
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='data', help='dataset path') parser.add_argument('--dataset', type=str, default='data', help='dataset path')
parser.add_argument('--npoints', type=int, default=2048, help='resample points number') parser.add_argument('--npoints', type=int, default=2048, help='resample points number')
parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_241.pth', help='model path') parser.add_argument('--model', type=str, default='./checkpoint/seg_model_custom_246.pth', help='model path')
parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result') parser.add_argument('--sample_idx', type=int, default=0, help='select a sample to segment and view result')
opt = parser.parse_args() opt = parser.parse_args()
print(opt) print(opt)
@ -279,12 +282,12 @@ if __name__ == '__main__':
print('load dataset ..') print('load dataset ..')
test_transform = GT.Compose([GT.NormalizeScale(), ]) test_transform = GT.Compose([GT.NormalizeScale(), ])
test_dataset = PredictNetPartSegDataset( test_dataset = ShapeNetPartSegDataset(
mode='predict',
root_dir=opt.dataset, root_dir=opt.dataset,
num_classes=4,
transform=None, transform=None,
npoints=opt.npoints, npoints=opt.npoints,
refresh=True refresh=False
) )
num_classes = test_dataset.num_classes() num_classes = test_dataset.num_classes()

View File

@ -28,10 +28,10 @@ if __name__ == '__main__':
print('Construct dataset ..') print('Construct dataset ..')
test_transform = GT.Compose([GT.NormalizeScale(),]) test_transform = GT.Compose([GT.NormalizeScale(),])
test_dataset = PredictNetPartSegDataset( test_dataset = ShapeNetPartSegDataset(
root_dir=opt.dataset, root_dir=opt.dataset,
collate_per_segment=False, collate_per_segment=False,
train=False, mode='predict',
transform=test_transform, transform=test_transform,
npoints=opt.npoints npoints=opt.npoints
) )