diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0bbbcd9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,203 @@ +# My Local Settings + +/data/ + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# Created by https://www.gitignore.io/api/python +# Edit at https://www.gitignore.io/?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# End of https://www.gitignore.io/api/python diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/data/dataset.py b/dataset.py similarity index 87% rename from data/dataset.py rename to dataset.py index b731e66..96004c5 100644 --- a/data/dataset.py +++ b/dataset.py @@ -47,7 +47,8 @@ class AbstractDataset(ConcatDataset, ABC): # maps = ['hotel', 'tum','gallery', 'queens', 'oet'] @property def maps(self): - return ['hotel', 'tum','gallery', 'queens', 'oet'] + return ['test', 'test2'] + # return ['hotel', 'tum','gallery', 'queens', 'oet'] @property @abstractmethod @@ -66,9 +67,10 @@ class AbstractDataset(ConcatDataset, ABC): def processed_paths(self): return [os.path.join(self.path, 'processed', x) for x in self.processed_filenames] - def __init__(self, path, refresh=False, **kwargs): + def __init__(self, path, refresh=False, transforms=None, **kwargs): self.path = path self.refresh = refresh + self.transforms = transforms or None super(AbstractDataset, self).__init__(datasets=self._load_datasets()) @abstractmethod @@ -92,12 +94,13 @@ class AbstractDataset(ConcatDataset, ABC): ): while True: try: - datasets.append(torch.load(self.processed_paths[map_idx])) - print(f'Dataset "{self.processed_paths[map_idx]}" loaded') + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + datasets.append(torch.load(self.processed_paths[map_idx], map_location=device)) break except FileNotFoundError: os.makedirs(os.path.join(*os.path.split(self.processed_paths[map_idx])[:-1]), exist_ok=True) processed = self.process(self.raw_paths[map_idx]) + tqdm.write(f'Dataset "{self.processed_paths[map_idx]}" processed') torch.save(processed, self.processed_paths[map_idx]) continue return datasets @@ -122,6 +125,7 @@ class DataContainer(AbstractDataset): def process(self, filepath): dataDict = defaultdict(list) + total_lines = len(open(filepath,'r').readlines()) with open(filepath, 'r') as f: delimiter = ',' # Separate the header @@ -129,14 +133,14 @@ class DataContainer(AbstractDataset): headers.remove('inDoor') # Iterate over every line and convert it to float / value # ToDo: Make this nicer - for line in tqdm(f, total=len(self.maps), unit="lines"): + for line in tqdm(f, total=total_lines, unit=" lines", mininterval=1, miniters=1000): if line == '': continue else: for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]): if attr not in ['inDoor']: dataDict[attr].append(ast.literal_eval(x)) - return Trajectories(self.size, self.step, headers, **dataDict) + return Trajectories(self.size, self.step, headers, transforms=self.transforms, **dataDict) class Trajectories(Dataset): @@ -150,23 +154,28 @@ class Trajectories(Dataset): def features(self): return len(self.isovistMeasures) - def __init__(self, size, step, headers, **kwargs): + def __init__(self, size, step, headers, transforms=None, **kwargs): super(Trajectories, self).__init__() self.size: int = size self.step: int = step self.headers: list = headers + self.transforms: list = transforms or list() + self.data = self.__init_data_(**kwargs) + pass + def __init_data_(self, **kwargs): dataDict = dict() for key, val in kwargs.items(): if key in self.isovistMeasures: dataDict[key] = torch.tensor(val) # Check if all keys are of same length assert len(set(x.size()[0] for x in dataDict.values() if torch.is_tensor(x))) <= 1 - self.data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1) - pass + data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1) + for transformation in self.transforms: + # All but x,y + data[:, 2:] = transformation(data[:, 2:]) + return data - def __iter_tenors__(self): - return def __iter__(self): for i in range(len(self)): @@ -179,7 +188,7 @@ class Trajectories(Dataset): :return: """ subList = self.data[item:item + self.size * self.step or None:self.step] - xy, tensor = subList[:, 2], subList[:, 2:] + xy, tensor = subList[:, :2], subList[:, 2:] return (xy, tensor) if coords else tensor def __len__(self): diff --git a/networks/__init__.py b/networks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/networks/basic_ae.py b/networks/basic_ae.py index 712201b..7a6ea54 100644 --- a/networks/basic_ae.py +++ b/networks/basic_ae.py @@ -1,52 +1,73 @@ -from torch.nn import Sequential, Linear, GRU -from data.dataset import DataContainer - +from torch.nn import Sequential, Linear, GRU, ReLU, Tanh from .modules import * +from torch.nn.functional import mse_loss + ####################### # Basic AE-Implementation class BasicAE(Module, ABC): + @property + def name(self): + return self.__class__.__name__ + def __init__(self, dataParams, **kwargs): super(BasicAE, self).__init__() self.dataParams = dataParams self.latent_dim = kwargs.get('latent_dim', 2) self.encoder = self._build_encoder() - self.decoder = self._build_decoder() - + self.decoder = self._build_decoder(out_shape=self.dataParams['features']) def _build_encoder(self): - encoder = Sequential() - encoder.add_module(f'EncoderLinear_{1}', Linear(6, 10, bias=True)) - encoder.add_module(f'EncoderLinear_{2}', Linear(10, 10, bias=True)) - gru = Sequential() - gru.add_module('Encoder', TimeDistributed(encoder)) - gru.add_module('GRU', GRU(10, self.latent_dim)) + encoder = Sequential( + Linear(6, 100, bias=True), + ReLU(), + Linear(100, 10, bias=True), + ReLU() + ) + gru = Sequential( + TimeDistributed(encoder), + GRU(10, 10, batch_first=True), + RNNOutputFilter(only_last=True), + Linear(10, self.latent_dim) + ) return gru - def _build_decoder(self): - decoder = Sequential() - decoder.add_module(f'DecoderLinear_{1}', Linear(10, 10, bias=True)) - decoder.add_module(f'DecoderLinear_{2}', Linear(10, self.dataParams['features'], bias=True)) + def _build_decoder(self, out_shape): + decoder = Sequential( + Linear(10, 100, bias=True), + ReLU(), + Linear(100, out_shape, bias=True), + Tanh() + ) - gru = Sequential() - # There needs to be ab propper bat - gru.add_module('Repeater', Repeater((1, self.dataParams['size'], -1))) - gru.add_module('GRU', GRU(self.latent_dim, 10)) - gru.add_module('GRU Filter', RNNOutputFilter()) - gru.add_module('Decoder', TimeDistributed(decoder)) + gru = Sequential( + GRU(self.latent_dim, 10,batch_first=True), + RNNOutputFilter(), + TimeDistributed(decoder) + ) return gru - def forward(self, batch): - batch_size = batch.shape[0] - self.decoder.Repeater.shape = (batch_size, ) + self.decoder.Repeater.shape[-2:] + def forward(self, batch: torch.Tensor): + # Encoder # outputs, hidden (Batch, Timesteps aka. Size, Features / Latent Dim Size) - outputs, _ = self.encoder(batch) - z = outputs[:, -1] + z = self.encoder(batch) + # Decoder + # First repeat the data accordingly to the batch size + z = Repeater((batch.shape[0], self.dataParams['size'], -1))(z) x_hat = self.decoder(z) return z, x_hat +class AELightningOverrides: + + def training_step(self, x, batch_nb): + # z, x_hat + _, x_hat = self.forward(x) + loss = mse_loss(x, x_hat) + return {'loss': loss} + + if __name__ == '__main__': raise PermissionError('Get out of here - never run this module') diff --git a/networks/basic_vae.py b/networks/basic_vae.py new file mode 100644 index 0000000..032998b --- /dev/null +++ b/networks/basic_vae.py @@ -0,0 +1,81 @@ +from torch.nn import Sequential, Linear, GRU, ReLU +from .modules import * +from torch.nn.functional import mse_loss + + +####################### +# Basic AE-Implementation +class BasicVAE(Module, ABC): + + @property + def name(self): + return self.__class__.__name__ + + def __init__(self, dataParams, **kwargs): + super(BasicVAE, self).__init__() + self.dataParams = dataParams + self.latent_dim = kwargs.get('latent_dim', 2) + self.encoder = self._build_encoder() + self.decoder = self._build_decoder(out_shape=self.dataParams['features']) + self.mu, self.logvar = Linear(10, self.latent_dim), Linear(10, self.latent_dim) + + def _build_encoder(self): + linear_stack = Sequential( + Linear(6, 100, bias=True), + ReLU(), + Linear(100, 10, bias=True), + ReLU() + ) + encoder = Sequential( + TimeDistributed(linear_stack), + GRU(10, 10, batch_first=True), + RNNOutputFilter(only_last=True), + ) + return encoder + + def reparameterize(self, mu, logvar): + # Lambda Layer, add gaussian noise + std = torch.exp(0.5*logvar) + eps = torch.randn_like(std) + return mu + eps*std + + def _build_decoder(self, out_shape): + decoder = Sequential( + Linear(10, 100, bias=True), + ReLU(), + Linear(100, out_shape, bias=True), + ReLU() + ) + + sequential_decoder = Sequential( + GRU(self.latent_dim, 10, batch_first=True), + RNNOutputFilter(), + TimeDistributed(decoder) + ) + return sequential_decoder + + def forward(self, batch): + encoding = self.encoder(batch) + mu_logvar = self.mu(encoding), self.logvar(encoding) + z = self.reparameterize(*mu_logvar) + repeat = Repeater((batch.shape[0], self.dataParams['size'], -1)) + x_hat = self.decoder(repeat(z)) + return (x_hat, *mu_logvar) + + +class VAELightningOverrides: + + def training_step(self, x, batch_nb): + x_hat, logvar, mu = self.forward(x) + BCE = mse_loss(x_hat, x, reduction='mean') + + # see Appendix B from VAE paper: + # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 + # https://arxiv.org/abs/1312.6114 + # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) + KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) + return {'loss': BCE + KLD} + + +if __name__ == '__main__': + raise PermissionError('Get out of here - never run this module') diff --git a/networks/modules.py b/networks/modules.py index d65695c..2c25282 100644 --- a/networks/modules.py +++ b/networks/modules.py @@ -90,13 +90,15 @@ class Repeater(Module): class RNNOutputFilter(Module): - def __init__(self, return_output=True): + def __init__(self, return_output=True, only_last=False): super(RNNOutputFilter, self).__init__() + self.only_last = only_last self.return_output = return_output def forward(self, x: tuple): outputs, hidden = x - return outputs if self.return_output else hidden + out = outputs if self.return_output else hidden + return out if not self.only_last else out[:, -1, :] if __name__ == '__main__': diff --git a/run_basic_ae.py b/run_basic_ae.py new file mode 100644 index 0000000..5b86625 --- /dev/null +++ b/run_basic_ae.py @@ -0,0 +1,41 @@ +from networks.basic_ae import BasicAE, AELightningOverrides +from networks.modules import LightningModule +from torch.optim import Adam +from torch.utils.data import DataLoader +from pytorch_lightning import data_loader +from dataset import DataContainer + +from torch.nn import BatchNorm1d +from pytorch_lightning import Trainer + + +class AEModel(AELightningOverrides, LightningModule): + + def __init__(self, dataParams: dict): + super(AEModel, self).__init__() + self.dataParams = dataParams + # noinspection PyUnresolvedReferences + self.network = BasicAE(self.dataParams) + + + def configure_optimizers(self): + return [Adam(self.parameters(), lr=0.02)] + + + @data_loader + def tng_dataloader(self): + return DataLoader(DataContainer('data', **self.dataParams), shuffle=True, batch_size=100) + + + def forward(self, x): + return self.network.forward(x) + + +if __name__ == '__main__': + features = 6 + ae = AEModel( + dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)]) + ) + + trainer = Trainer() + trainer.fit(ae) diff --git a/basic_ae_lightning_torch.py b/run_basic_vae.py similarity index 65% rename from basic_ae_lightning_torch.py rename to run_basic_vae.py index 32c54fa..b74e430 100644 --- a/basic_ae_lightning_torch.py +++ b/run_basic_vae.py @@ -1,29 +1,28 @@ -from networks.basic_ae import BasicAE +from networks.basic_vae import BasicVAE, VAELightningOverrides from networks.modules import LightningModule import pytorch_lightning as pl from torch.nn.functional import mse_loss from torch.optim import Adam +import torch +from torch.nn import BatchNorm1d from torch.utils.data import DataLoader -from data.dataset import DataContainer +from dataset import DataContainer from pytorch_lightning import Trainer -class AEModel(LightningModule): +class AEModel(VAELightningOverrides, LightningModule): def __init__(self, dataParams: dict): super(AEModel, self).__init__() self.dataParams = dataParams - self.network = BasicAE(self.dataParams) + # noinspection PyUnresolvedReferences + self.network = BasicVAE(self.dataParams) def forward(self, x): return self.network.forward(x) - def training_step(self, x, batch_nb): - z, x_hat = self.forward(x) - return {'loss': mse_loss(x, x_hat)} - def configure_optimizers(self): # ToDo: Where do i get the Paramers from? return [Adam(self.parameters(), lr=0.02)] @@ -34,8 +33,9 @@ class AEModel(LightningModule): if __name__ == '__main__': + features = 6 ae = AEModel( - dict(refresh=False, size=5, step=5, features=6) + dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)]) ) trainer = Trainer() diff --git a/viz/__init__.py b/viz/__init__.py new file mode 100644 index 0000000..e69de29