Lightning integration basic ae, dataloaders and dataset

This commit is contained in:
Si11ium 2019-08-16 14:29:48 +02:00
parent fbe0600e24
commit 265c900f33
10 changed files with 406 additions and 49 deletions

203
.gitignore vendored Normal file
View File

@ -0,0 +1,203 @@
# My Local Settings
/data/
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
# Created by https://www.gitignore.io/api/python
# Edit at https://www.gitignore.io/?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# End of https://www.gitignore.io/api/python

0
__init__.py Normal file
View File

View File

@ -47,7 +47,8 @@ class AbstractDataset(ConcatDataset, ABC):
# maps = ['hotel', 'tum','gallery', 'queens', 'oet'] # maps = ['hotel', 'tum','gallery', 'queens', 'oet']
@property @property
def maps(self): def maps(self):
return ['hotel', 'tum','gallery', 'queens', 'oet'] return ['test', 'test2']
# return ['hotel', 'tum','gallery', 'queens', 'oet']
@property @property
@abstractmethod @abstractmethod
@ -66,9 +67,10 @@ class AbstractDataset(ConcatDataset, ABC):
def processed_paths(self): def processed_paths(self):
return [os.path.join(self.path, 'processed', x) for x in self.processed_filenames] return [os.path.join(self.path, 'processed', x) for x in self.processed_filenames]
def __init__(self, path, refresh=False, **kwargs): def __init__(self, path, refresh=False, transforms=None, **kwargs):
self.path = path self.path = path
self.refresh = refresh self.refresh = refresh
self.transforms = transforms or None
super(AbstractDataset, self).__init__(datasets=self._load_datasets()) super(AbstractDataset, self).__init__(datasets=self._load_datasets())
@abstractmethod @abstractmethod
@ -92,12 +94,13 @@ class AbstractDataset(ConcatDataset, ABC):
): ):
while True: while True:
try: try:
datasets.append(torch.load(self.processed_paths[map_idx])) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Dataset "{self.processed_paths[map_idx]}" loaded') datasets.append(torch.load(self.processed_paths[map_idx], map_location=device))
break break
except FileNotFoundError: except FileNotFoundError:
os.makedirs(os.path.join(*os.path.split(self.processed_paths[map_idx])[:-1]), exist_ok=True) os.makedirs(os.path.join(*os.path.split(self.processed_paths[map_idx])[:-1]), exist_ok=True)
processed = self.process(self.raw_paths[map_idx]) processed = self.process(self.raw_paths[map_idx])
tqdm.write(f'Dataset "{self.processed_paths[map_idx]}" processed')
torch.save(processed, self.processed_paths[map_idx]) torch.save(processed, self.processed_paths[map_idx])
continue continue
return datasets return datasets
@ -122,6 +125,7 @@ class DataContainer(AbstractDataset):
def process(self, filepath): def process(self, filepath):
dataDict = defaultdict(list) dataDict = defaultdict(list)
total_lines = len(open(filepath,'r').readlines())
with open(filepath, 'r') as f: with open(filepath, 'r') as f:
delimiter = ',' delimiter = ','
# Separate the header # Separate the header
@ -129,14 +133,14 @@ class DataContainer(AbstractDataset):
headers.remove('inDoor') headers.remove('inDoor')
# Iterate over every line and convert it to float / value # Iterate over every line and convert it to float / value
# ToDo: Make this nicer # ToDo: Make this nicer
for line in tqdm(f, total=len(self.maps), unit="lines"): for line in tqdm(f, total=total_lines, unit=" lines", mininterval=1, miniters=1000):
if line == '': if line == '':
continue continue
else: else:
for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]): for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]):
if attr not in ['inDoor']: if attr not in ['inDoor']:
dataDict[attr].append(ast.literal_eval(x)) dataDict[attr].append(ast.literal_eval(x))
return Trajectories(self.size, self.step, headers, **dataDict) return Trajectories(self.size, self.step, headers, transforms=self.transforms, **dataDict)
class Trajectories(Dataset): class Trajectories(Dataset):
@ -150,23 +154,28 @@ class Trajectories(Dataset):
def features(self): def features(self):
return len(self.isovistMeasures) return len(self.isovistMeasures)
def __init__(self, size, step, headers, **kwargs): def __init__(self, size, step, headers, transforms=None, **kwargs):
super(Trajectories, self).__init__() super(Trajectories, self).__init__()
self.size: int = size self.size: int = size
self.step: int = step self.step: int = step
self.headers: list = headers self.headers: list = headers
self.transforms: list = transforms or list()
self.data = self.__init_data_(**kwargs)
pass
def __init_data_(self, **kwargs):
dataDict = dict() dataDict = dict()
for key, val in kwargs.items(): for key, val in kwargs.items():
if key in self.isovistMeasures: if key in self.isovistMeasures:
dataDict[key] = torch.tensor(val) dataDict[key] = torch.tensor(val)
# Check if all keys are of same length # Check if all keys are of same length
assert len(set(x.size()[0] for x in dataDict.values() if torch.is_tensor(x))) <= 1 assert len(set(x.size()[0] for x in dataDict.values() if torch.is_tensor(x))) <= 1
self.data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1) data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1)
pass for transformation in self.transforms:
# All but x,y
data[:, 2:] = transformation(data[:, 2:])
return data
def __iter_tenors__(self):
return
def __iter__(self): def __iter__(self):
for i in range(len(self)): for i in range(len(self)):
@ -179,7 +188,7 @@ class Trajectories(Dataset):
:return: :return:
""" """
subList = self.data[item:item + self.size * self.step or None:self.step] subList = self.data[item:item + self.size * self.step or None:self.step]
xy, tensor = subList[:, 2], subList[:, 2:] xy, tensor = subList[:, :2], subList[:, 2:]
return (xy, tensor) if coords else tensor return (xy, tensor) if coords else tensor
def __len__(self): def __len__(self):

0
networks/__init__.py Normal file
View File

View File

@ -1,52 +1,73 @@
from torch.nn import Sequential, Linear, GRU from torch.nn import Sequential, Linear, GRU, ReLU, Tanh
from data.dataset import DataContainer
from .modules import * from .modules import *
from torch.nn.functional import mse_loss
####################### #######################
# Basic AE-Implementation # Basic AE-Implementation
class BasicAE(Module, ABC): class BasicAE(Module, ABC):
@property
def name(self):
return self.__class__.__name__
def __init__(self, dataParams, **kwargs): def __init__(self, dataParams, **kwargs):
super(BasicAE, self).__init__() super(BasicAE, self).__init__()
self.dataParams = dataParams self.dataParams = dataParams
self.latent_dim = kwargs.get('latent_dim', 2) self.latent_dim = kwargs.get('latent_dim', 2)
self.encoder = self._build_encoder() self.encoder = self._build_encoder()
self.decoder = self._build_decoder() self.decoder = self._build_decoder(out_shape=self.dataParams['features'])
def _build_encoder(self): def _build_encoder(self):
encoder = Sequential() encoder = Sequential(
encoder.add_module(f'EncoderLinear_{1}', Linear(6, 10, bias=True)) Linear(6, 100, bias=True),
encoder.add_module(f'EncoderLinear_{2}', Linear(10, 10, bias=True)) ReLU(),
gru = Sequential() Linear(100, 10, bias=True),
gru.add_module('Encoder', TimeDistributed(encoder)) ReLU()
gru.add_module('GRU', GRU(10, self.latent_dim)) )
gru = Sequential(
TimeDistributed(encoder),
GRU(10, 10, batch_first=True),
RNNOutputFilter(only_last=True),
Linear(10, self.latent_dim)
)
return gru return gru
def _build_decoder(self): def _build_decoder(self, out_shape):
decoder = Sequential() decoder = Sequential(
decoder.add_module(f'DecoderLinear_{1}', Linear(10, 10, bias=True)) Linear(10, 100, bias=True),
decoder.add_module(f'DecoderLinear_{2}', Linear(10, self.dataParams['features'], bias=True)) ReLU(),
Linear(100, out_shape, bias=True),
Tanh()
)
gru = Sequential() gru = Sequential(
# There needs to be ab propper bat GRU(self.latent_dim, 10,batch_first=True),
gru.add_module('Repeater', Repeater((1, self.dataParams['size'], -1))) RNNOutputFilter(),
gru.add_module('GRU', GRU(self.latent_dim, 10)) TimeDistributed(decoder)
gru.add_module('GRU Filter', RNNOutputFilter()) )
gru.add_module('Decoder', TimeDistributed(decoder))
return gru return gru
def forward(self, batch): def forward(self, batch: torch.Tensor):
batch_size = batch.shape[0] # Encoder
self.decoder.Repeater.shape = (batch_size, ) + self.decoder.Repeater.shape[-2:]
# outputs, hidden (Batch, Timesteps aka. Size, Features / Latent Dim Size) # outputs, hidden (Batch, Timesteps aka. Size, Features / Latent Dim Size)
outputs, _ = self.encoder(batch) z = self.encoder(batch)
z = outputs[:, -1] # Decoder
# First repeat the data accordingly to the batch size
z = Repeater((batch.shape[0], self.dataParams['size'], -1))(z)
x_hat = self.decoder(z) x_hat = self.decoder(z)
return z, x_hat return z, x_hat
class AELightningOverrides:
def training_step(self, x, batch_nb):
# z, x_hat
_, x_hat = self.forward(x)
loss = mse_loss(x, x_hat)
return {'loss': loss}
if __name__ == '__main__': if __name__ == '__main__':
raise PermissionError('Get out of here - never run this module') raise PermissionError('Get out of here - never run this module')

81
networks/basic_vae.py Normal file
View File

@ -0,0 +1,81 @@
from torch.nn import Sequential, Linear, GRU, ReLU
from .modules import *
from torch.nn.functional import mse_loss
#######################
# Basic AE-Implementation
class BasicVAE(Module, ABC):
@property
def name(self):
return self.__class__.__name__
def __init__(self, dataParams, **kwargs):
super(BasicVAE, self).__init__()
self.dataParams = dataParams
self.latent_dim = kwargs.get('latent_dim', 2)
self.encoder = self._build_encoder()
self.decoder = self._build_decoder(out_shape=self.dataParams['features'])
self.mu, self.logvar = Linear(10, self.latent_dim), Linear(10, self.latent_dim)
def _build_encoder(self):
linear_stack = Sequential(
Linear(6, 100, bias=True),
ReLU(),
Linear(100, 10, bias=True),
ReLU()
)
encoder = Sequential(
TimeDistributed(linear_stack),
GRU(10, 10, batch_first=True),
RNNOutputFilter(only_last=True),
)
return encoder
def reparameterize(self, mu, logvar):
# Lambda Layer, add gaussian noise
std = torch.exp(0.5*logvar)
eps = torch.randn_like(std)
return mu + eps*std
def _build_decoder(self, out_shape):
decoder = Sequential(
Linear(10, 100, bias=True),
ReLU(),
Linear(100, out_shape, bias=True),
ReLU()
)
sequential_decoder = Sequential(
GRU(self.latent_dim, 10, batch_first=True),
RNNOutputFilter(),
TimeDistributed(decoder)
)
return sequential_decoder
def forward(self, batch):
encoding = self.encoder(batch)
mu_logvar = self.mu(encoding), self.logvar(encoding)
z = self.reparameterize(*mu_logvar)
repeat = Repeater((batch.shape[0], self.dataParams['size'], -1))
x_hat = self.decoder(repeat(z))
return (x_hat, *mu_logvar)
class VAELightningOverrides:
def training_step(self, x, batch_nb):
x_hat, logvar, mu = self.forward(x)
BCE = mse_loss(x_hat, x, reduction='mean')
# see Appendix B from VAE paper:
# Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
# https://arxiv.org/abs/1312.6114
# 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return {'loss': BCE + KLD}
if __name__ == '__main__':
raise PermissionError('Get out of here - never run this module')

View File

@ -90,13 +90,15 @@ class Repeater(Module):
class RNNOutputFilter(Module): class RNNOutputFilter(Module):
def __init__(self, return_output=True): def __init__(self, return_output=True, only_last=False):
super(RNNOutputFilter, self).__init__() super(RNNOutputFilter, self).__init__()
self.only_last = only_last
self.return_output = return_output self.return_output = return_output
def forward(self, x: tuple): def forward(self, x: tuple):
outputs, hidden = x outputs, hidden = x
return outputs if self.return_output else hidden out = outputs if self.return_output else hidden
return out if not self.only_last else out[:, -1, :]
if __name__ == '__main__': if __name__ == '__main__':

41
run_basic_ae.py Normal file
View File

@ -0,0 +1,41 @@
from networks.basic_ae import BasicAE, AELightningOverrides
from networks.modules import LightningModule
from torch.optim import Adam
from torch.utils.data import DataLoader
from pytorch_lightning import data_loader
from dataset import DataContainer
from torch.nn import BatchNorm1d
from pytorch_lightning import Trainer
class AEModel(AELightningOverrides, LightningModule):
def __init__(self, dataParams: dict):
super(AEModel, self).__init__()
self.dataParams = dataParams
# noinspection PyUnresolvedReferences
self.network = BasicAE(self.dataParams)
def configure_optimizers(self):
return [Adam(self.parameters(), lr=0.02)]
@data_loader
def tng_dataloader(self):
return DataLoader(DataContainer('data', **self.dataParams), shuffle=True, batch_size=100)
def forward(self, x):
return self.network.forward(x)
if __name__ == '__main__':
features = 6
ae = AEModel(
dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)])
)
trainer = Trainer()
trainer.fit(ae)

View File

@ -1,29 +1,28 @@
from networks.basic_ae import BasicAE from networks.basic_vae import BasicVAE, VAELightningOverrides
from networks.modules import LightningModule from networks.modules import LightningModule
import pytorch_lightning as pl import pytorch_lightning as pl
from torch.nn.functional import mse_loss from torch.nn.functional import mse_loss
from torch.optim import Adam from torch.optim import Adam
import torch
from torch.nn import BatchNorm1d
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from data.dataset import DataContainer from dataset import DataContainer
from pytorch_lightning import Trainer from pytorch_lightning import Trainer
class AEModel(LightningModule): class AEModel(VAELightningOverrides, LightningModule):
def __init__(self, dataParams: dict): def __init__(self, dataParams: dict):
super(AEModel, self).__init__() super(AEModel, self).__init__()
self.dataParams = dataParams self.dataParams = dataParams
self.network = BasicAE(self.dataParams) # noinspection PyUnresolvedReferences
self.network = BasicVAE(self.dataParams)
def forward(self, x): def forward(self, x):
return self.network.forward(x) return self.network.forward(x)
def training_step(self, x, batch_nb):
z, x_hat = self.forward(x)
return {'loss': mse_loss(x, x_hat)}
def configure_optimizers(self): def configure_optimizers(self):
# ToDo: Where do i get the Paramers from? # ToDo: Where do i get the Paramers from?
return [Adam(self.parameters(), lr=0.02)] return [Adam(self.parameters(), lr=0.02)]
@ -34,8 +33,9 @@ class AEModel(LightningModule):
if __name__ == '__main__': if __name__ == '__main__':
features = 6
ae = AEModel( ae = AEModel(
dict(refresh=False, size=5, step=5, features=6) dataParams=dict(refresh=False, size=5, step=5, features=features, transforms=[BatchNorm1d(features)])
) )
trainer = Trainer() trainer = Trainer()

0
viz/__init__.py Normal file
View File