point_to_primitive/datasets/_point_dataset.py
2020-05-26 21:44:57 +02:00

66 lines
2.2 KiB
Python

import pickle
from collections import defaultdict
from abc import ABC
from pathlib import Path
from torch.utils.data import Dataset
from ml_lib.point_toolset.sampling import FarthestpointSampling
import numpy as np
class _Point_Dataset(ABC, Dataset):
@property
def sample_shape(self):
# FixMe: This does not work when more then x/y tuples are returned
return self[0][0].shape
@property
def setting(self) -> str:
raise NotImplementedError
headers = ['x', 'y', 'z', 'xn', 'yn', 'zn', 'label', 'cl_idx']
def __init__(self, root=Path('data'), sampling_k=2048, transforms=None, load_preprocessed=True, *args, **kwargs):
super(_Point_Dataset, self).__init__()
self.load_preprocessed = load_preprocessed
self.transforms = transforms if transforms else lambda x: x
self.sampling_k = sampling_k
self.sampling = FarthestpointSampling(K=self.sampling_k)
self.root = Path(root)
self.raw = self.root / 'raw'
self.processed_ext = '.pik'
self.raw_ext = '.xyz'
self.processed = self.root / self.setting
self.processed.mkdir(parents=True, exist_ok=True)
self._files = list(self.raw.glob(f'*{self.setting}*'))
def _read_or_load(self, item):
raw_file_path = self._files[item]
processed_file_path = self.processed / raw_file_path.name.replace(self.raw_ext, self.processed_ext)
if not self.load_preprocessed:
processed_file_path.unlink(missing_ok=True)
if not processed_file_path.exists():
pointcloud = defaultdict(list)
with raw_file_path.open('r') as raw_file:
for row in raw_file:
values = [float(x) for x in row.strip().split(' ')]
for header, value in zip(self.headers, values):
pointcloud[header].append(value)
for key in pointcloud.keys():
pointcloud[key] = np.asarray(pointcloud[key])
with processed_file_path.open('wb') as processed_file:
pickle.dump(pointcloud, processed_file)
return processed_file_path
def __len__(self):
raise NotImplementedError
def __getitem__(self, item):
raise NotImplementedError