Lightning integration basic ae, dataloaders and dataset

This commit is contained in:
Si11ium 2019-08-16 14:29:48 +02:00
parent fbe0600e24
commit 265c900f33
10 changed files with 406 additions and 49 deletions

203
.gitignore vendored Normal file
View File

@ -0,0 +1,203 @@
# My Local Settings
/data/
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# Created by https://www.gitignore.io/api/python
# Edit at https://www.gitignore.io/?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# End of https://www.gitignore.io/api/python

0
__init__.py Normal file
View File

View File

@ -47,7 +47,8 @@ class AbstractDataset(ConcatDataset, ABC):
# maps = ['hotel', 'tum','gallery', 'queens', 'oet']
@property
def maps(self):
return ['hotel', 'tum','gallery', 'queens', 'oet']
return ['test', 'test2']
# return ['hotel', 'tum','gallery', 'queens', 'oet']
@property
@abstractmethod
@ -66,9 +67,10 @@ class AbstractDataset(ConcatDataset, ABC):
def processed_paths(self):
return [os.path.join(self.path, 'processed', x) for x in self.processed_filenames]
def __init__(self, path, refresh=False, **kwargs):
def __init__(self, path, refresh=False, transforms=None, **kwargs):
self.path = path
self.refresh = refresh
self.transforms = transforms or None
super(AbstractDataset, self).__init__(datasets=self._load_datasets())
@abstractmethod
@ -92,12 +94,13 @@ class AbstractDataset(ConcatDataset, ABC):
):
while True:
try:
datasets.append(torch.load(self.processed_paths[map_idx]))
print(f'Dataset "{self.processed_paths[map_idx]}" loaded')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
datasets.append(torch.load(self.processed_paths[map_idx], map_location=device))
break
except FileNotFoundError:
os.makedirs(os.path.join(*os.path.split(self.processed_paths[map_idx])[:-1]), exist_ok=True)
processed = self.process(self.raw_paths[map_idx])
tqdm.write(f'Dataset "{self.processed_paths[map_idx]}" processed')
torch.save(processed, self.processed_paths[map_idx])
continue
return datasets
@ -122,6 +125,7 @@ class DataContainer(AbstractDataset):
def process(self, filepath):
dataDict = defaultdict(list)
total_lines = len(open(filepath,'r').readlines())
with open(filepath, 'r') as f:
delimiter = ','
# Separate the header
@ -129,14 +133,14 @@ class DataContainer(AbstractDataset):
headers.remove('inDoor')
# Iterate over every line and convert it to float / value
# ToDo: Make this nicer
for line in tqdm(f, total=len(self.maps), unit="lines"):
for line in tqdm(f, total=total_lines, unit=" lines", mininterval=1, miniters=1000):
if line == '':
continue
else:
for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]):
if attr not in ['inDoor']:
dataDict[attr].append(ast.literal_eval(x))
return Trajectories(self.size, self.step, headers, **dataDict)
return Trajectories(self.size, self.step, headers, transforms=self.transforms, **dataDict)
class Trajectories(Dataset):
@ -150,23 +154,28 @@ class Trajectories(Dataset):
def features(self):
return len(self.isovistMeasures)
def __init__(self, size, step, headers, **kwargs):
def __init__(self, size, step, headers, transforms=None, **kwargs):
super(Trajectories, self).__init__()
self.size: int = size
self.step: int = step
self.headers: list = headers
self.transforms: list = transforms or list()
self.data = self.__init_data_(**kwargs)
pass
def __init_data_(self, **kwargs):
dataDict = dict()
for key, val in kwargs.items():
if key in self.isovistMeasures:
dataDict[key] = torch.tensor(val)
# Check if all keys are of same length
assert len(set(x.size()[0] for x in dataDict.values() if torch.is_tensor(x))) <= 1
self.data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1)
pass
data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1)
for transformation in self.transforms:
# All but x,y
data[:, 2:] = transformation(data[:, 2:])
return data
def __iter_tenors__(self):
return
def __iter__(self):
for i in range(len(self)):
@ -179,7 +188,7 @@ class Trajectories(Dataset):
:return:
"""
subList = self.data[item:item + self.size * self.step or None:self.step]
xy, tensor = subList[:, 2], subList[:, 2:]
xy, tensor = subList[:, :2], subList[:, 2:]
return (xy, tensor) if coords else tensor
def __len__(self):

0
networks/__init__.py Normal file
View File

View File

@ -1,52 +1,73 @@
from torch.nn import Sequential, Linear, GRU
from data.dataset import DataContainer
from torch.nn import Sequential, Linear, GRU, ReLU, Tanh
from .modules import *
from torch.nn.functional import mse_loss
#######################
# Basic AE-Implementation
class BasicAE(Module, ABC):
@property
def name(self):
return self.__class__.__name__
def __init__(self, dataParams, **kwargs):
super(BasicAE, self).__init__()
self.dataParams = dataParams
self.latent_dim = kwargs.get('latent_dim', 2)
self.encoder = self._build_encoder()
self.decoder = self._build_decoder()
self.decoder = self._build_decoder(out_shape=self.dataParams['features'])
def _build_encoder(self):
encoder = Sequential()
encoder.add_module(f'EncoderLinear_{1}', Linear(6, 10, bias=True))
encoder.add_module(f'EncoderLinear_{2}', Linear(10, 10, bias=True))
gru = Sequential()
gru.add_module('Encoder', TimeDistributed(encoder))
gru.add_module('GRU', GRU(10, self.latent_dim))
encoder = Sequential(
Linear(6, 100, bias=True),
ReLU(),
Linear(100, 10, bias=True),
ReLU()
)
gru = Sequential(
TimeDistributed(encoder),
GRU(10, 10, batch_first=True),
RNNOutputFilter(only_last=True),
Linear(10, self.latent_dim)
)
return gru
def _build_decoder(self):
decoder = Sequential()
decoder.add_module(f'DecoderLinear_{1}', Linear(10, 10, bias=True))
decoder.add_module(f'DecoderLinear_{2}', Linear(10, self.dataParams['features'], bias=True))
def _build_decoder(self, out_shape):
decoder = Sequential(
Linear(10, 100, bias=True),
ReLU(),
Linear(100, out_shape, bias=True),
Tanh()
)
gru = Sequential()
# There needs to be ab propper bat
gru.add_module('Repeater', Repeater((1, self.dataParams['size'], -1)))
gru.add_module('GRU', GRU(self.latent_dim, 10))
gru.add_module('GRU Filter', RNNOutputFilter())
gru.add_module('Decoder', TimeDistributed(decoder))
gru = Sequential(
GRU(self.latent_dim, 10,batch_first=True),
RNNOutputFilter(),
TimeDistributed(decoder)
)
return gru
def forward(self, batch):
batch_size = batch.shape[0]
self.decoder.Repeater.shape = (batch_size, ) + self.decoder.Repeater.shape[-2:]
def forward(self, batch: torch.Tensor):
# Encoder
# outputs, hidden (Batch, Timesteps aka. Size, Features / Latent Dim Size)
outputs, _ = self.encoder(batch)
z = outputs[:, -1]
z = self.encoder(batch)
# Decoder
# First repeat the data accordingly to the batch size
z = Repeater((batch.shape[0], self.dataParams['size'], -1))(z)
x_hat = self.decoder(z)
return z, x_hat
class AELightningOverrides:
def training_step(self, x, batch_nb):
# z, x_hat
_, x_hat = self.forward(x)
loss = mse_loss(x, x_hat)
return {'loss': loss}
if __name__ == '__main__':
raise PermissionError('Get out of here - never run this module')

81
networks/basic_vae.py Normal file
View File

@ -0,0 +1,81 @@
from torch.nn import Sequential, Linear, GRU, ReLU
from .modules import *
from torch.nn.functional import mse_loss
#######################
# Basic AE-Implementation
class BasicVAE(Module, ABC):
@property
def name(self):
return self.__class__.__name__
def __init__(self, dataParams, **kwargs):
super(BasicVAE, self).__init__()
self.dataParams = dataParams
self.latent_dim = kwargs.get('latent_dim', 2)
self.encoder = self._build_encoder()
self.decoder = self._build_decoder(out_shape=self.dataParams['features'])
self.mu, self.logvar = Linear(10, self.latent_dim), Linear(10, self.latent_dim)
def _build_encoder(self):
linear_stack = Sequential(
Linear(6, 100, bias=True),
ReLU(),
Linear(100, 10, bias=True),
ReLU()
)
encoder = Sequential(
TimeDistributed(linear_stack),
GRU(10, 10, batch_first=True),
RNNOutputFilter(only_last=True),
)
return encoder
def reparameterize(self, mu, logvar):
# Lambda Layer, add gaussian noise
std = torch.exp(0.5*logvar)
eps = torch.randn_like(std)
return mu + eps*std
def _build_decoder(self, out_shape):
decoder = Sequential(
Linear(10, 100, bias=True),
ReLU(),
Linear(100, out_shape, bias=True),
ReLU()
)
sequential_decoder = Sequential(
GRU(self.latent_dim, 10, batch_first=True),
RNNOutputFilter(),
TimeDistributed(decoder)
)
return sequential_decoder
def forward(self, batch):
encoding = self.encoder(batch)
mu_logvar = self.mu(encoding), self.logvar(encoding)
z = self.reparameterize(*mu_logvar)
repeat = Repeater((batch.shape[0], self.dataParams['size'], -1))
x_hat = self.decoder(repeat(z))
return (x_hat, *mu_logvar)
class VAELightningOverrides:
def training_step(self, x, batch_nb):
x_hat, logvar, mu = self.forward(x)
BCE = mse_loss(x_hat, x, reduction='mean')
# see Appendix B from VAE paper:
# Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
# https://arxiv.org/abs/1312.6114
# 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return {'loss': BCE + KLD}
if __name__ == '__main__':
raise PermissionError('Get out of here - never run this module')

View File

@ -90,13 +90,15 @@ class Repeater(Module):
class RNNOutputFilter(Module):
def __init__(self, return_output=True):
def __init__(self, return_output=True, only_last=False):
super(RNNOutputFilter, self).__init__()
self.only_last = only_last
self.return_output = return_output
def forward(self, x: tuple):
outputs, hidden = x
return outputs if self.return_output else hidden
out = outputs if self.return_output else hidden
return out if not self.only_last else out[:, -1, :]
if __name__ == '__main__':

41
run_basic_ae.py Normal file
View File

@ -0,0 +1,41 @@
from networks.basic_ae import BasicAE, AELightningOverrides
from networks.modules import LightningModule
from torch.optim import Adam
from torch.utils.data import DataLoader
from pytorch_lightning import data_loader
from dataset import DataContainer
from torch.nn import BatchNorm1d
from pytorch_lightning import Trainer
class AEModel(AELightningOverrides, LightningModule):
def __init__(self, dataParams: dict):
super(AEModel, self).__init__()
self.dataParams = dataParams
# noinspection PyUnresolvedReferences
self.network = BasicAE(self.dataParams)
def configure_optimizers(self):
return [Adam(self.parameters(), lr=0.02)]
@data_loader
def tng_dataloader(self):
return DataLoader(DataContainer('data', **self.dataParams), shuffle=True, batch_size=100)
def forward(self, x):
return self.network.forward(x)
if __name__ == '__main__':
features = 6
ae = AEModel(
dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)])
)
trainer = Trainer()
trainer.fit(ae)

View File

@ -1,29 +1,28 @@
from networks.basic_ae import BasicAE
from networks.basic_vae import BasicVAE, VAELightningOverrides
from networks.modules import LightningModule
import pytorch_lightning as pl
from torch.nn.functional import mse_loss
from torch.optim import Adam
import torch
from torch.nn import BatchNorm1d
from torch.utils.data import DataLoader
from data.dataset import DataContainer
from dataset import DataContainer
from pytorch_lightning import Trainer
class AEModel(LightningModule):
class AEModel(VAELightningOverrides, LightningModule):
def __init__(self, dataParams: dict):
super(AEModel, self).__init__()
self.dataParams = dataParams
self.network = BasicAE(self.dataParams)
# noinspection PyUnresolvedReferences
self.network = BasicVAE(self.dataParams)
def forward(self, x):
return self.network.forward(x)
def training_step(self, x, batch_nb):
z, x_hat = self.forward(x)
return {'loss': mse_loss(x, x_hat)}
def configure_optimizers(self):
# ToDo: Where do i get the Paramers from?
return [Adam(self.parameters(), lr=0.02)]
@ -34,8 +33,9 @@ class AEModel(LightningModule):
if __name__ == '__main__':
features = 6
ae = AEModel(
dict(refresh=False, size=5, step=5, features=6)
dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)])
)
trainer = Trainer()

0
viz/__init__.py Normal file
View File