diff --git a/dataset.py b/dataset.py index 19c4499..7987c99 100644 --- a/dataset.py +++ b/dataset.py @@ -1,9 +1,11 @@ import argparse +import bisect from collections import defaultdict from distutils.util import strtobool import os import ast from abc import ABC, abstractmethod +from torch.nn.modules import BatchNorm1d from tqdm import tqdm import numpy as np @@ -61,10 +63,9 @@ class AbstractDataset(ConcatDataset, ABC): def processed_paths(self): return [os.path.join(self.path, 'processed', x) for x in self.processed_filenames] - def __init__(self, path, refresh=False, transforms=None, **kwargs): + def __init__(self, path, refresh=False, **kwargs): self.path = path self.refresh = refresh - self.transforms = transforms or None self.maps = list(set([x.name.split('_')[0] for x in os.scandir(os.path.join(self.path, 'raw'))])) super(AbstractDataset, self).__init__(datasets=self._load_datasets()) @@ -139,7 +140,19 @@ class DataContainer(AbstractDataset): for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]): if attr not in ['inDoor']: dataDict[attr].append(ast.literal_eval(x)) - return Trajectories(self.size, self.step, headers, transforms=self.transforms, **dataDict) + return Trajectories(self.size, self.step, headers, **dataDict, normalize=True) + + def get_both_by_key(self, item): + if item < 0: + if -item > len(self): + raise ValueError("absolute value of index should not exceed dataset length") + item = len(self) + item + dataset_idx = bisect.bisect_right(self.cumulative_sizes, item) + if dataset_idx == 0: + sample_idx = item + else: + sample_idx = item - self.cumulative_sizes[dataset_idx - 1] + return self.datasets[dataset_idx].get_both_by_key(sample_idx) class Trajectories(Dataset): @@ -153,12 +166,12 @@ class Trajectories(Dataset): def features(self): return len(self.isovistMeasures) - def __init__(self, size, step, headers, transforms=None, **kwargs): + def __init__(self, size, step, headers, normalize=True, **kwargs): super(Trajectories, self).__init__() self.size: int = size self.step: int = step self.headers: list = headers - self.transforms: list = transforms or list() + self.normalize: bool = normalize self.data = self.__init_data_(**kwargs) pass @@ -170,9 +183,10 @@ class Trajectories(Dataset): # Check if all keys are of same length assert len(set(x.size()[0] for x in dataDict.values() if torch.is_tensor(x))) <= 1 data = torch.stack([dataDict[key] for key in self.isovistMeasures], dim=-1) - for transformation in self.transforms: + if self.normalize: # All but x,y - data[:, 2:] = transformation(data[:, 2:]) + std, mean = torch.std_mean(data[:, 2:], dim=0) + data[:, 2:] = (data[:, 2:] - mean) / std return data def __iter__(self): @@ -180,15 +194,18 @@ class Trajectories(Dataset): for i in range(len(self)): yield self[i] - def __getitem__(self, item, coords=False): - """ - Return a trajectory sample from the dataset by a specific key. - :param item: The index number of the trajectory to return. - :return: - """ - subList = self.data[item:item + self.size * self.step or None:self.step] - xy, tensor = subList[:, :2], subList[:, 2:] - return (xy, tensor) if coords else tensor + def __getitem__(self, item): + return self.data[item:item + self.size * self.step or None:self.step][:, 2:] + + def get_isovist_measures_by_key(self, item): + return self[item] + + def get_coordinates_by_key(self, item): + return self.data[item:item + self.size * self.step or None:self.step][:, :2] + + def get_both_by_key(self, item): + data = self.data[item:item + self.size * self.step or None:self.step] + return data def __len__(self): total_len = self.data.size()[0] @@ -224,18 +241,21 @@ class MapContainer(AbstractDataset): for attr, x in zip(headers, line.rstrip().split(delimiter)[None:None]): dataDict[attr].append(ast.literal_eval(x)) - return Map(np.asarray([dataDict[head] for head in headers])) + return Map(np.asarray([dataDict[head] for head in headers]), + name=os.path.splitext(os.path.basename(filepath))[0] + ) class Map(object): - def __init__(self, mapData: np.ndarray): + def __init__(self, mapData: np.ndarray, name='MapName'): """ This is a Container Class for triangulated basemaps in csv format. :param mapData: The map as np.ndarray, already read from disk. """ - self.map: np.ndarray = mapData + self.map: np.ndarray = np.transpose(mapData) + self.name = name self.minx, self.maxx = np.min(self.map[[0, 2, 4]]), np.max(self.map[[0, 2, 4]]) self.miny, self.maxy = np.min(self.map[[1, 3, 5]]), np.max(self.map[[1, 3, 5]]) diff --git a/eval/metrices.py b/eval/metrices.py new file mode 100644 index 0000000..7f9e1b8 --- /dev/null +++ b/eval/metrices.py @@ -0,0 +1,6 @@ +#ToDo: We need a metric that analysis sequences of coordinates of arbitrary length and clusters them based +# on their embedded type of mevement + +# ToDo: we ne a function, that compares the clustering outcome of our movement analysis with the AE output. + +# Do the variants of AE really adjust their latent space regarding the embedded moveement type? diff --git a/networks/adverserial_auto_encoder.py b/networks/adverserial_auto_encoder.py index d4b10d1..3da06c9 100644 --- a/networks/adverserial_auto_encoder.py +++ b/networks/adverserial_auto_encoder.py @@ -9,10 +9,10 @@ import torch device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -class AdversarialAutoEncoder(AutoEncoder): +class AdversarialAE(AutoEncoder): def __init__(self, *args, **kwargs): - super(AdversarialAutoEncoder, self).__init__(*args, **kwargs) + super(AdversarialAE, self).__init__(*args, **kwargs) self.discriminator = Discriminator(self.latent_dim, self.features) def forward(self, batch): @@ -26,10 +26,10 @@ class AdversarialAutoEncoder(AutoEncoder): return z, x_hat -class AdversarialAELightningOverrides(LightningModuleOverrides): +class AdversarialAE_LO(LightningModuleOverrides): def __init__(self): - super(AdversarialAELightningOverrides, self).__init__() + super(AdversarialAE_LO, self).__init__() def training_step(self, batch, _, optimizer_i): if optimizer_i == 0: diff --git a/networks/attention_based_auto_enoder.py b/networks/attention_based_auto_enoder.py index 3ef456e..1cb12b1 100644 --- a/networks/attention_based_auto_enoder.py +++ b/networks/attention_based_auto_enoder.py @@ -7,11 +7,11 @@ from torch import Tensor ####################### # Basic AE-Implementation -class AutoEncoder(AbstractNeuralNetwork, ABC): +class AE_WithAttention(AbstractNeuralNetwork, ABC): def __init__(self, latent_dim: int=0, features: int = 0, **kwargs): assert latent_dim and features - super(AutoEncoder, self).__init__() + super(AE_WithAttention, self).__init__() self.latent_dim = latent_dim self.features = features self.encoder = Encoder(self.latent_dim) @@ -28,10 +28,10 @@ class AutoEncoder(AbstractNeuralNetwork, ABC): return z, x_hat -class AutoEncoderLightningOverrides(LightningModuleOverrides): +class AE_WithAttention_LO(LightningModuleOverrides): def __init__(self): - super(AutoEncoderLightningOverrides, self).__init__() + super(AE_WithAttention_LO, self).__init__() def training_step(self, x, batch_nb): # ToDo: We need a new loss function, fullfilling all attention needs diff --git a/networks/auto_encoder.py b/networks/auto_encoder.py index a834d1d..74f351a 100644 --- a/networks/auto_encoder.py +++ b/networks/auto_encoder.py @@ -28,10 +28,10 @@ class AutoEncoder(AbstractNeuralNetwork, ABC): return z, x_hat -class AutoEncoderLightningOverrides(LightningModuleOverrides): +class AutoEncoder_LO(LightningModuleOverrides): def __init__(self): - super(AutoEncoderLightningOverrides, self).__init__() + super(AutoEncoder_LO, self).__init__() def training_step(self, x, batch_nb): # z, x_hat diff --git a/networks/modules.py b/networks/modules.py index f3be740..cf95c55 100644 --- a/networks/modules.py +++ b/networks/modules.py @@ -1,9 +1,13 @@ import os +from operator import mul +from functools import reduce import torch +from torch import randn import pytorch_lightning as pl from pytorch_lightning import data_loader -from torch.nn import Module, Linear, ReLU, Tanh, Sigmoid, Dropout, GRU +from torch.nn import Module, Linear, ReLU, Sigmoid, Dropout, GRU +from torchvision.transforms import Normalize from abc import ABC, abstractmethod @@ -29,8 +33,16 @@ class LightningModuleOverrides: @data_loader def tng_dataloader(self): num_workers = 0 # os.cpu_count() // 2 - return DataLoader(DataContainer(os.path.join('data', 'training'), self.size, self.step), + return DataLoader(DataContainer(os.path.join('data', 'training'), + self.size, self.step, transforms=[Normalize]), shuffle=True, batch_size=10000, num_workers=num_workers) + """ + @data_loader + def val_dataloader(self): + num_workers = 0 # os.cpu_count() // 2 + return DataLoader(DataContainer(os.path.join('data', 'validation'), self.size, self.step), + shuffle=True, batch_size=100, num_workers=num_workers) + """ class AbstractNeuralNetwork(Module): @@ -82,6 +94,7 @@ class LightningModule(pl.LightningModule, ABC): # return DataLoader(MNIST(os.getcwd(), train=True, download=True, # transform=transforms.ToTensor()), batch_size=32) + """ @pl.data_loader def val_dataloader(self): # OPTIONAL @@ -91,7 +104,7 @@ class LightningModule(pl.LightningModule, ABC): def test_dataloader(self): # OPTIONAL pass - + """ ####################### # Utility Modules @@ -185,7 +198,7 @@ class DecoderLinearStack(Module): self.l1 = Linear(10, 100, bias=True) self.l2 = Linear(100, out_shape, bias=True) self.activation = ReLU() - self.activation_out = Tanh() + self.activation_out = Sigmoid() def forward(self, x): tensor = self.l1(x) @@ -197,30 +210,53 @@ class DecoderLinearStack(Module): class EncoderLinearStack(Module): - def __init__(self): + @property + def shape(self): + x = randn(self.features).unsqueeze(0) + output = self(x) + return output.shape[1:] + + def __init__(self, features=6, separated=False, use_bias=True): super(EncoderLinearStack, self).__init__() # FixMe: Get Hardcoded shit out of here - self.l1 = Linear(6, 100, bias=True) - self.l2 = Linear(100, 10, bias=True) + self.separated = separated + self.features = features + if self.separated: + self.l1s = [Linear(1, 10, bias=use_bias) for _ in range(self.features)] + self.l2s = [Linear(10, 5, bias=use_bias) for _ in range(self.features)] + else: + self.l1 = Linear(self.features, self.features * 10, bias=use_bias) + self.l2 = Linear(self.features * 10, self.features * 5, bias=use_bias) + self.l3 = Linear(self.features * 5, 10, use_bias) self.activation = ReLU() def forward(self, x): - tensor = self.l1(x) - tensor = self.activation(tensor) - tensor = self.l2(tensor) + if self.separated: + x = x.unsqueeze(-1) + tensors = [self.l1s[idx](x[:, idx, :]) for idx in range(len(self.l1s))] + tensors = [self.activation(tensor) for tensor in tensors] + tensors = [self.l2s[idx](tensors[idx]) for idx in range(len(self.l2s))] + tensors = [self.activation(tensor) for tensor in tensors] + tensor = torch.cat(tensors, dim=-1) + else: + tensor = self.l1(x) + tensor = self.activation(tensor) + tensor = self.l2(tensor) + tensor = self.l3(tensor) tensor = self.activation(tensor) return tensor class Encoder(Module): - def __init__(self, lat_dim, variational=False): + def __init__(self, lat_dim, variational=False, separate_features=False, with_dense=True, features=6): self.lat_dim = lat_dim + self.features = features self.variational = variational - super(Encoder, self).__init__() - self.l_stack = TimeDistributed(EncoderLinearStack()) - self.gru = GRU(10, 10, batch_first=True) + self.l_stack = TimeDistributed(EncoderLinearStack(separated=separate_features, + features=features)) if with_dense else False + self.gru = GRU(10 if with_dense else self.features, 10, batch_first=True) self.filter = RNNOutputFilter(only_last=True) if variational: self.mu = Linear(10, self.lat_dim) @@ -229,8 +265,9 @@ class Encoder(Module): self.lat_dim_layer = Linear(10, self.lat_dim) def forward(self, x): - tensor = self.l_stack(x) - tensor = self.gru(tensor) + if self.l_stack: + x = self.l_stack(x) + tensor = self.gru(x) tensor = self.filter(tensor) if self.variational: tensor = self.mu(tensor), self.logvar(tensor) @@ -262,10 +299,10 @@ class PoolingEncoder(Module): self.p = AvgDimPool() self.l = EncoderLinearStack() if variational: - self.mu = Linear(10, self.lat_dim) - self.logvar = Linear(10, self.lat_dim) + self.mu = Linear(self.l.shape, self.lat_dim) + self.logvar = Linear(self.l.shape, self.lat_dim) else: - self.lat_dim_layer = Linear(10, self.lat_dim) + self.lat_dim_layer = Linear(reduce(mul, self.l.shape), self.lat_dim) def forward(self, x): tensor = self.p(x) diff --git a/networks/seperating_adversarial_auto_encoder.py b/networks/seperating_adversarial_auto_encoder.py index ad914d9..daae05b 100644 --- a/networks/seperating_adversarial_auto_encoder.py +++ b/networks/seperating_adversarial_auto_encoder.py @@ -4,15 +4,15 @@ from networks.modules import * import torch -class SeperatingAdversarialAutoEncoder(Module): +class SeperatingAAE(Module): def __init__(self, latent_dim, features): - super(SeperatingAdversarialAutoEncoder, self).__init__() + super(SeperatingAAE, self).__init__() self.latent_dim = latent_dim self.features = features self.spatial_encoder = PoolingEncoder(self.latent_dim) - self.temporal_encoder = Encoder(self.latent_dim) + self.temporal_encoder = Encoder(self.latent_dim, with_dense=False) self.decoder = Decoder(self.latent_dim * 2, self.features) self.spatial_discriminator = Discriminator(self.latent_dim, self.features) self.temporal_discriminator = Discriminator(self.latent_dim, self.features) @@ -29,10 +29,19 @@ class SeperatingAdversarialAutoEncoder(Module): return z_spatial, z_temporal, x_hat -class SeparatingAdversarialAELightningOverrides(LightningModuleOverrides): +class SuperSeperatingAAE(SeperatingAAE): + def __init__(self, *args): + super(SuperSeperatingAAE, self).__init__(*args) + self.temporal_encoder = Encoder(self.latent_dim, separate_features=True) + + def forward(self, batch): + return batch + + +class SeparatingAAE_LO(LightningModuleOverrides): def __init__(self): - super(SeparatingAdversarialAELightningOverrides, self).__init__() + super(SeparatingAAE_LO, self).__init__() def training_step(self, batch, _, optimizer_i): spatial_latent_fake, temporal_latent_fake, batch_hat = self.network.forward(batch) diff --git a/networks/variational_auto_encoder.py b/networks/variational_auto_encoder.py index aad4a54..31c61ba 100644 --- a/networks/variational_auto_encoder.py +++ b/networks/variational_auto_encoder.py @@ -6,7 +6,7 @@ from torch.nn.functional import mse_loss ####################### # Basic AE-Implementation -class VariationalAutoEncoder(AbstractNeuralNetwork, ABC): +class VariationalAE(AbstractNeuralNetwork, ABC): @property def name(self): @@ -14,7 +14,7 @@ class VariationalAutoEncoder(AbstractNeuralNetwork, ABC): def __init__(self, latent_dim=0, features=0, **kwargs): assert latent_dim and features - super(VariationalAutoEncoder, self).__init__() + super(VariationalAE, self).__init__() self.features = features self.latent_dim = latent_dim self.encoder = Encoder(self.latent_dim, variational=True) @@ -32,16 +32,16 @@ class VariationalAutoEncoder(AbstractNeuralNetwork, ABC): z = self.reparameterize(mu, logvar) repeat = Repeater((batch.shape[0], batch.shape[1], -1)) x_hat = self.decoder(repeat(z)) - return x_hat, mu, logvar + return mu, logvar, x_hat -class VariationalAutoEncoderLightningOverrides(LightningModuleOverrides): +class VAE_LO(LightningModuleOverrides): def __init__(self): - super(VariationalAutoEncoderLightningOverrides, self).__init__() + super(VAE_LO, self).__init__() def training_step(self, x, _): - x_hat, logvar, mu = self.forward(x) + mu, logvar, x_hat = self.forward(x) BCE = mse_loss(x_hat, x, reduction='mean') # see Appendix B from VAE paper: @@ -52,7 +52,7 @@ class VariationalAutoEncoderLightningOverrides(LightningModuleOverrides): return {'loss': BCE + KLD} def configure_optimizers(self): - return [Adam(self.parameters(), lr=0.02)] + return [Adam(self.parameters(), lr=0.004)] if __name__ == '__main__': diff --git a/run_models.py b/run_models.py index 4405a5b..294728a 100644 --- a/run_models.py +++ b/run_models.py @@ -1,30 +1,32 @@ from torch.distributions import Normal -from networks.auto_encoder import * import time -from networks.variational_auto_encoder import * -from networks.adverserial_auto_encoder import * -from networks.seperating_adversarial_auto_encoder import * -from networks.modules import LightningModule -from pytorch_lightning import Trainer -from test_tube import Experiment +import os from argparse import Namespace from argparse import ArgumentParser from distutils.util import strtobool +from networks.auto_encoder import AutoEncoder, AutoEncoder_LO +from networks.variational_auto_encoder import VariationalAE, VAE_LO +from networks.adverserial_auto_encoder import AdversarialAE_LO, AdversarialAE +from networks.seperating_adversarial_auto_encoder import SeperatingAAE, SeparatingAAE_LO, SuperSeperatingAAE +from networks.modules import LightningModule + +from pytorch_lightning import Trainer +from test_tube import Experiment + + args = ArgumentParser() -args.add_argument('--step', default=6) +args.add_argument('--step', default=5) args.add_argument('--features', default=6) args.add_argument('--size', default=9) -args.add_argument('--latent_dim', default=4) -args.add_argument('--model', default='Model') +args.add_argument('--latent_dim', default=2) +args.add_argument('--model', default='VAE_Model') args.add_argument('--refresh', type=strtobool, default=False) -# ToDo: How to implement this better? -# other_classes = [AutoEncoder, AutoEncoderLightningOverrides] -class Model(AutoEncoderLightningOverrides, LightningModule): +class AE_Model(AutoEncoder_LO, LightningModule): def __init__(self, parameters): assert all([x in parameters for x in ['step', 'size', 'latent_dim', 'features']]) @@ -32,11 +34,23 @@ class Model(AutoEncoderLightningOverrides, LightningModule): self.latent_dim = parameters.latent_dim self.features = parameters.features self.step = parameters.step - super(Model, self).__init__() + super(AE_Model, self).__init__() self.network = AutoEncoder(self.latent_dim, self.features) -class AdversarialModel(AdversarialAELightningOverrides, LightningModule): +class VAE_Model(VAE_LO, LightningModule): + + def __init__(self, parameters): + assert all([x in parameters for x in ['step', 'size', 'latent_dim', 'features']]) + self.size = parameters.size + self.latent_dim = parameters.latent_dim + self.features = parameters.features + self.step = parameters.step + super(VAE_Model, self).__init__() + self.network = VariationalAE(self.latent_dim, self.features) + + +class AAE_Model(AdversarialAE_LO, LightningModule): def __init__(self, parameters: Namespace): assert all([x in parameters for x in ['step', 'size', 'latent_dim', 'features']]) @@ -44,13 +58,13 @@ class AdversarialModel(AdversarialAELightningOverrides, LightningModule): self.latent_dim = parameters.latent_dim self.features = parameters.features self.step = parameters.step - super(AdversarialModel, self).__init__() + super(AAE_Model, self).__init__() self.normal = Normal(0, 1) - self.network = AdversarialAutoEncoder(self.latent_dim, self.features) + self.network = AdversarialAE(self.latent_dim, self.features) pass -class SeparatingAdversarialModel(SeparatingAdversarialAELightningOverrides, LightningModule): +class SAAE_Model(SeparatingAAE_LO, LightningModule): def __init__(self, parameters: Namespace): assert all([x in parameters for x in ['step', 'size', 'latent_dim', 'features']]) @@ -58,9 +72,23 @@ class SeparatingAdversarialModel(SeparatingAdversarialAELightningOverrides, Ligh self.latent_dim = parameters.latent_dim self.features = parameters.features self.step = parameters.step - super(SeparatingAdversarialModel, self).__init__() + super(SAAE_Model, self).__init__() self.normal = Normal(0, 1) - self.network = SeperatingAdversarialAutoEncoder(self.latent_dim, self.features) + self.network = SeperatingAAE(self.latent_dim, self.features) + pass + + +class SSAAE_Model(SeparatingAAE_LO, LightningModule): + + def __init__(self, parameters: Namespace): + assert all([x in parameters for x in ['step', 'size', 'latent_dim', 'features']]) + self.size = parameters.size + self.latent_dim = parameters.latent_dim + self.features = parameters.features + self.step = parameters.step + super(SSAAE_Model, self).__init__() + self.normal = Normal(0, 1) + self.network = SuperSeperatingAAE(self.latent_dim, self.features) pass @@ -84,8 +112,13 @@ if __name__ == '__main__': period=4 ) - trainer = Trainer(experiment=exp, max_nb_epochs=250, gpus=[0], - add_log_row_interval=1000, checkpoint_callback=checkpoint_callback) + trainer = Trainer(experiment=exp, + max_nb_epochs=250, + gpus=[0], + add_log_row_interval=1000, + # checkpoint_callback=checkpoint_callback + ) + trainer.fit(model) trainer.save_checkpoint(os.path.join(outpath, 'weights.ckpt')) diff --git a/viz/output.png b/viz/output.png new file mode 100644 index 0000000..2e3f1ad Binary files /dev/null and b/viz/output.png differ diff --git a/viz/print_movement_in_map.py b/viz/print_movement_in_map.py new file mode 100644 index 0000000..5d002a1 --- /dev/null +++ b/viz/print_movement_in_map.py @@ -0,0 +1,50 @@ +from argparse import ArgumentParser +import os + +from dataset import DataContainer +from viz.utils import MotionAnalyser, Printer, MapContainer, search_for_weights +import torch +from run_models import SAAE_Model, AAE_Model, VAE_Model, AE_Model + +arguments = ArgumentParser() +arguments.add_argument('--data', default='output') + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + +def load_and_viz(path_like_element): + # Define Loop to search for models and folder with visualizations + splitpath = path_like_element.split(os.sep) + base_dir = os.path.join(*splitpath[:4]) + model = globals()[splitpath[2]] + print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}') + pretrained_model = model.load_from_metrics( + weights_path=path_like_element, + tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'), + on_gpu=True if torch.cuda.is_available() else False, + # map_location=None + ) + + # Init model and freeze its weights ( for faster inference) + pretrained_model = pretrained_model.to(device) + pretrained_model.eval() + pretrained_model.freeze() + + dataIndex = 0 + + datasets = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device) + dataset = datasets.datasets[dataIndex] + # ToDO: use dataloader for iteration instead! - dataloader = DataLoader(dataset, ) + + maps = MapContainer(os.path.join(os.pardir, 'data', 'validation')) + base_map = maps.datasets[dataIndex] + + p = Printer(pretrained_model) + p.print_trajec_on_basemap(dataset, base_map, save=os.path.join(base_dir, f'{base_map.name}_movement.png'), + color_by_movement=True) + return True + + +if __name__ == '__main__': + args = arguments.parse_args() + search_for_weights(load_and_viz, args.data, file_type='movement') diff --git a/viz/utils.py b/viz/utils.py index 0248b79..0407ed8 100644 --- a/viz/utils.py +++ b/viz/utils.py @@ -1,13 +1,30 @@ -import os +from typing import Union +from functools import reduce + +from statistics import stdev + +from sklearn.cluster import Birch, KMeans, DBSCAN +from sklearn.manifold import TSNE +from sklearn.decomposition import PCA + +from dataset import * +from networks.modules import AbstractNeuralNetwork + +from matplotlib import pyplot as plt +from matplotlib.patches import Polygon +from matplotlib.collections import LineCollection, PatchCollection +import matplotlib.colors as mcolors +import matplotlib.cm as cmaps + +from math import pi -def search_for_weights(func, folder): +def search_for_weights(func, folder, file_type='latent_space'): while not os.path.exists(folder): if len(os.path.split(folder)) >= 50: raise FileNotFoundError(f'The folder "{folder}" could not be found') folder = os.path.join(os.pardir, folder) - - if any([x.name.endswith('.png') for x in os.scandir(folder)]): + if any([file_type in x.name for x in os.scandir(folder)]): return if any(['.ckpt' in element.name and element.is_dir() for element in os.scandir(folder)]): @@ -19,12 +36,324 @@ def search_for_weights(func, folder): for element in os.scandir(folder): if os.path.exists(element): if element.is_dir(): - search_for_weights(func, element.path) + search_for_weights(func, element.path, file_type=file_type) elif element.is_file() and element.name.endswith('.ckpt'): - func(element) + func(element.path) else: continue +class Printer(object): + + def __init__(self, model: AbstractNeuralNetwork, ax=None): + self.norm = mcolors.Normalize(vmin=0, vmax=1) + self.colormap = cmaps.gist_rainbow + self.network = model + self.fig = plt.figure(dpi=300) + self.ax = ax if ax else plt.subplot(1, 1, 1) + pass + + def colorize(self, x, min_val: Union[float, None] = None, max_val: Union[float, None] = None, + colormap=cmaps.rainbow, **kwargs): + norm = mcolors.Normalize(vmin=min_val, vmax=max_val) + colored = colormap(norm(x)) + return colored + + @staticmethod + def project_to_2d(data: np.ndarray, method: str = 'tsne') -> np.ndarray: + projector = TSNE() if method.lower() == 'tsne' else PCA() + print('Starting TSNE Transformation') + projected_data = projector.fit_transform(data) + assert projected_data.shape[-1] == 2 + print('TSNE Projection Successfull') + return projected_data + + @staticmethod + def cluster_data(data: np.ndarray, cluster_center_file: str = None) -> np.ndarray: + print('Start Clustering with Birch') + if cluster_center_file: + with open(cluster_center_file, 'r') as f: + cluster_center_string = f.readlines()[0] + centers = ast.literal_eval(cluster_center_string) + clusterer = Birch(n_clusters=len(centers)) + clusterer.init = np.asarray(centers) + else: + # clusterer = Birch(n_clusters=None) + clusterer = Birch() + + labels = clusterer.fit_predict(data) + print('Birch Clustering Sucessfull') + return labels + + def print_possible_latent_spaces(self, data: Trajectories, n: Union[int, str] = 1000, **kwargs): + predictions, _ = self._gather_predictions(data, n) + if len(predictions) >= 2: + predictions += (torch.cat(predictions, dim=-1), ) + + labels = self.cluster_data(predictions[-1]) + for idx, prediction in enumerate(predictions): + self.print_latent_space(prediction, labels, running_index=idx, **kwargs) + + def print_latent_space(self, prediction, labels, running_index=0, save=None): + + self.colormap = cmaps.tab20 + + if isinstance(prediction, torch.Tensor): + prediction = prediction.numpy() + elif isinstance(prediction, np.ndarray): + pass + elif isinstance(prediction, list): + prediction = np.asarray(prediction) + else: + raise RuntimeError + + if prediction.shape[-1] > 2: + fig, axs = plt.subplots(ncols=2, nrows=1) + transformers = [TSNE(2), PCA(2)] + print('Starting Dimensional Reduction') + for idx, transformer in enumerate(transformers): + transformed = transformer.fit_transform(prediction) + print(f'{transformer.__class__.__name__} Projection Sucessfull') + colored = self.colormap(labels) + ax = axs[idx] + ax.scatter(x=transformed[:, 0], y=transformed[:, 1], c=colored) + ax.set_title(transformer.__class__.__name__) + ax.set_xlim(np.min(transformed[:, 0])*1.1, np.max(transformed[:, 0]*1.1)) + ax.set_ylim(np.min(transformed[:, 1]*1.1), np.max(transformed[:, 1]*1.1)) + elif prediction.shape[-1] == 2: + fig, axs = plt.subplots() + + # TODO: Build transformation for lat_dim_size >= 3 + print('All Predictions sucesfully Gathered and Shaped ') + axs.set_xlim(np.min(prediction[:, 0]), np.max(prediction[:, 0])) + axs.set_ylim(np.min(prediction[:, 1]), np.max(prediction[:, 1])) + # ToDo: Insert Normalization + colored = self.colormap(labels) + plt.scatter(prediction[:, 0], prediction[:, 1], c=colored) + else: + raise NotImplementedError("Latent Dimensions can not be one-dimensional (yet).") + + if save: + plt.savefig(f'{save}_{running_index}.png') + + def print_latent_density(self): # , data: DataContainer): + raise NotImplementedError("My Future Self has to come up with smth") + + # fig, ax = plt.subplots() + + # preds = [] + # for i in range(data.len - data.width * data.stepsize): + # for i in range(5000): + # + # seq = data.sub_trajectory_by_key(i, stepsize=data.stepsize) + # + # preds.append(self.nn.encoder([seq[None, ...]])[0]) + # + # TODO: Build transformation for lat_dim_size >= 3 + # pred_array = np.asarray(preds).reshape((-1, nn.latDim)) + # k = KernelDensity() + # k.fit(pred_array) + # z = np.exp(k.score_samples(pred_array)) + # + # levels = np.linspace(0, z.max(), 25) + # xgrid, ygrid = np.meshgrid(pred_array[::5, 0], pred_array[::5, 1]) + # xy = np.vstack([xgrid.ravel(), ygrid.ravel()]).T + # z = np.exp(k.score_samples(xy)).reshape(xgrid.shape) + # + # plt.contourf(xgrid, ygrid, z, levels=levels, cmap=plt.cm.Reds) + # plt.show() + + def _gather_predictions(self, data: Trajectories, n: int = 1000, + color_by_movement=False, **kwargs): + """ + Check if any value for n is given and gather some random datapoints from the dataset. In accordance with the + maximal possible trajectory amount that is given by stepsize * width. + Also retunr the keys for all possible predictions. + :param data: + :type data: Dataset + :param n: + :param tsne: + :param kwargs: + :return: + """ + print("Gathering Predictions") + + n = n if isinstance(n, int) and n else len(data) - (data.size * data.step) + idxs = np.random.choice(np.arange(len(data) - data.step * data.size), n, replace=False) + complete_data = torch.stack([data.get_both_by_key(idx) for idx in idxs], dim=0) + segment_coords, trajectories = complete_data[:, :, :2], complete_data[:, :, 2:] + if color_by_movement: + motion_analyser = MotionAnalyser() + predictions = (motion_analyser.cluster_motion(segment_coords), ) + + else: + with torch.no_grad(): + predictions = self.network(trajectories)[:-1] + + return predictions, segment_coords + + @staticmethod + def colorize_as_hsv(self, x, min_val: Union[float, None] = None, max_val: Union[float, None] = None, + colormap=cmaps.rainbow, **kwargs): + norm = mcolors.Normalize(vmin=min_val, vmax=max_val) + colored = colormap(norm(x)) + return colored + + def _build_trajectory_shapes(self, predictions: np.ndarray, segment_coordinates, + axis=None, transformation=TSNE, **kwargs): + if not isinstance(predictions, np.ndarray): + predictions = tuple((x if torch.is_tensor(x) else torch.from_numpy(x) for x in predictions)) + predictions = torch.cat(predictions, dim=-1) + + if axis is not None: + predictions = predictions[:, axis][..., None] + + if predictions.shape[-1] >= 4: + if True: + predictions = Birch(n_clusters=3).fit_predict(predictions).reshape(-1, 1) + else: + transformer = transformation(n_components=3, random_state=42) + predictions = transformer.fit_transform(predictions) + + if predictions.shape[-1] == 1: + colored = self.colorize(predictions.reshape(-1), **kwargs) + + elif predictions.shape[-1] == 2: + colored = self.colorize(predictions[:, 0], **kwargs) + + if kwargs.get('min_val', None): + lightning = mcolors.Normalize(vmin=kwargs.get('min_val', None), vmax=kwargs.get('max_val', None)) + else: + lightning = mcolors.Normalize() + alpha = lightning(predictions[:, 1]) + colored[:, -1] = alpha + + elif predictions.shape[-1] == 3: + norm = mcolors.Normalize() + colored = [(r, g, b) for r,g,b in norm(predictions)] + + else: + raise NotImplementedError('Full Prediction Shape was: {}'.format(predictions.shape)) + # TODO Build a isomap or tsne transformation here to get a two dimensional space + + segment_coordinates = segment_coordinates.cpu() if torch.is_tensor(segment_coordinates) else segment_coordinates + + return LineCollection(segment_coordinates, linewidths=(1, 1, 1, 1), + colors=colored, linestyle='solid') + + @staticmethod + def _build_map_shapes(base_map: Map): + # Base Map Plotting + # filled Triangle + patches = [Polygon(base_map[i], True, color='black') for i in range(len(base_map))] + return PatchCollection(patches, color='black') + + def print_trajec_on_basemap(self, data, base_map: Map, save=False, color_by_movement=False, **kwargs): + """ + + :rtype: object + """ + prediction_segments = self._gather_predictions(data, color_by_movement=color_by_movement, **kwargs) + trajectory_shapes = self._build_trajectory_shapes(*prediction_segments, **kwargs) + map_shapes = self._build_map_shapes(base_map) + self.ax.add_collection(trajectory_shapes) + self.ax.axis('auto') + self.ax.add_collection(map_shapes) + + self.ax.set_title('Trajectories on BaseMap') + if save: + if isinstance(save, str): + self.save(save) + else: + self.save(base_map.name) + pass + + @staticmethod + def show(): + plt.show() + return True + + @staticmethod + def save(filename): + plt.savefig(filename) + + +class MotionAnalyser(object): + + def __init__(self): + pass + + def _sequential_pairwise_map(self, func, xy_sequence, on_deltas=False): + zipped_list = [x for x in zip(xy_sequence[:-1], xy_sequence[1:])] + + if on_deltas: + zipped_list = [self.delta(*movement) for movement in zipped_list] + else: + pass + + return [func(*xy) for xy in zipped_list] + + @staticmethod + def delta(x1y1, x2y2): + x1, y1 = x1y1 + x2, y2 = x2y2 + return x2-x1, y2-y1 + + @staticmethod + def get_r(deltax, deltay): + # https://mathinsight.org/polar_coordinates + r = torch.sqrt(deltax**2 + deltay**2) + return r + + @staticmethod + def get_theta(deltax, deltay, rad=False): + # https://mathinsight.org/polar_coordinates + theta = torch.atan2(deltay, deltax) + return theta if rad else theta * 180 / pi + + def get_theta_for_sequence(self, xy_sequence): + ts = self._sequential_pairwise_map(self.get_theta, xy_sequence, on_deltas=True) + return ts + + def get_r_for_sequence(self, xy_sequence): + rs = self._sequential_pairwise_map(self.get_r, xy_sequence, on_deltas=True) + return rs + + def get_unique_seq_identifier(self, xy_sequence): + + # Globals + global_delta = self.delta(xy_sequence[0], xy_sequence[-1]) + global_theta = self.get_theta(*global_delta) + global_r = self.get_r(*global_delta) + + # For Each + theta_seq = self.get_theta_for_sequence(xy_sequence) + mean_theta = sum(theta_seq) / len(theta_seq) + theta_sum = sum([abs(theta) for theta in theta_seq]) + std_theta = stdev(map(float, theta_seq)) + + return torch.stack((global_r, torch.as_tensor(std_theta), mean_theta, global_theta)) + + def cluster_motion(self, trajectory_samples, cluster_class=KMeans): + cluster_class = cluster_class(3) + + std, mean = torch.std_mean(trajectory_samples, dim=0) + trajectory_samples = (trajectory_samples - mean) / std + + unique_seq_identifiers = torch.stack([self.get_unique_seq_identifier(trajectory) + for trajectory in trajectory_samples]) + + clustered_movement = cluster_class.fit_predict(unique_seq_identifiers) + if False: + from sklearn.decomposition import PCA + p = PCA(2) + t = p.fit_transform(unique_seq_identifiers) + f = plt.figure() + plt.scatter(t[:, 0], t[:,1]) + plt.show() + + return clustered_movement.reshape(-1, 1) + + if __name__ == '__main__': raise PermissionError('This file should not be called.') diff --git a/viz/viz_latent.py b/viz/viz_latent.py index ab16f3f..ceaa132 100644 --- a/viz/viz_latent.py +++ b/viz/viz_latent.py @@ -1,12 +1,12 @@ -from sklearn.manifold import TSNE -from sklearn.decomposition import PCA - -import seaborn as sns -import matplotlib.pyplot as plt - +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) +import torch +from dataset import DataContainer +from viz.utils import search_for_weights, Printer from run_models import * -sns.set() + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def load_and_predict(path_like_element): @@ -14,73 +14,39 @@ def load_and_predict(path_like_element): splitpath = path_like_element.split(os.sep) base_dir = os.path.join(*splitpath[:4]) model = globals()[splitpath[2]] - print(f'... loading model named: "{Model.name}" from timestamp: {splitpath[3]}') pretrained_model = model.load_from_metrics( weights_path=path_like_element, tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'), on_gpu=True if torch.cuda.is_available() else False, - map_location=None + # map_location=None ) + print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}') # Init model and freeze its weights ( for faster inference) pretrained_model = pretrained_model.to(device) pretrained_model.eval() pretrained_model.freeze() - with torch.no_grad(): + # Load the data for prediction - # Load the data for prediction + # TODO!!!!!!!!!: + # Hier müssen natürlich auch die date parameter geladen werden! + # Muss ich die val-sets automatisch neu setzen, also immer auf refresh haben, wenn ich validieren möchte? + # Was ist denn eigentlich mein Val Dataset? + # Hab ich irgendwo eine ganze karte? + # Wie sorge ich dafür, dass gewisse karten, also größenverhältnisse usw nicht überrepräsentiert sind? + dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device) - # TODO!!!!!!!!!: - # Hier müssen natürlich auch die date parameter geladen werden! - # Muss ich die val-sets automatisch neu setzen, also immer auf refresh haben, wenn ich validieren möchte? - # Was ist denn eigentlich mein Val Dataset? - # Hab ich irgendwo eine ganze karte? - # Wie sorge ich dafür, dass gewisse karten, also größenverhältnisse usw nicht überrepräsentiert sind? - dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device) - dataloader = DataLoader(dataset, shuffle=True, batch_size=len(dataset)) + # Do the inference + # test_pred = [pretrained_model(test_sample)[:-1] for test_sample in dataloader][0] - # Do the inference - test_pred = [pretrained_model(test_sample)[:-1] for test_sample in dataloader][0] - - for idx, prediction in enumerate(test_pred): - plot, _ = viz_latent(prediction) - plot.savefig(os.path.join(base_dir, f'latent_space_{idx}.png')) - - -def viz_latent(prediction): - try: - prediction = prediction.cpu() - prediction = prediction.numpy() - except AttributeError: - pass - - if prediction.shape[-1] <= 1: - raise ValueError('How did this happen?') - elif prediction.shape[-1] == 2: - ax = sns.scatterplot(x=prediction[:, 0], y=prediction[:, 1]) - try: - plt.show() - except: - pass - return ax.figure, (ax) - else: - fig, axs = plt.subplots(ncols=2) - plots = [] - for idx, dim_reducer in enumerate([PCA, TSNE]): - predictions_reduced = dim_reducer(n_components=2).fit_transform(prediction) - plot = sns.scatterplot(x=predictions_reduced[:, 0], y=predictions_reduced[:, 1], - ax=axs[idx]) - plot.set_title(dim_reducer.__name__) - plots.append(plot) - - try: - plt.show() - except: - pass - return fig, (*plots, ) + p = Printer(pretrained_model) + # Important: + # Use all given valdiation samples, even if they relate to differnt maps. This is important since we want to have a + # view on the complete latent space, not just in relation to a single basemap, which would be a major bias. + p.print_possible_latent_spaces(dataset, save=os.path.join(base_dir, f'latent_space')) if __name__ == '__main__': path = 'output' - search_for_weights(search_for_weights, path) \ No newline at end of file + search_for_weights(load_and_predict, path, file_type='latent') \ No newline at end of file diff --git a/viz/viz_map.py b/viz/viz_map.py deleted file mode 100644 index 4b5a6eb..0000000 --- a/viz/viz_map.py +++ /dev/null @@ -1,50 +0,0 @@ - -from dataset import * -# Plotting -# import matplotlib as mlp -from matplotlib import pyplot as plt -from matplotlib.patches import Polygon -from matplotlib.collections import LineCollection, PatchCollection -import matplotlib.colors as mcolors -import matplotlib.cm as cmaps - -from sklearn.manifold import TSNE -from sklearn.decomposition import PCA - -import seaborn as sns -from argparse import ArgumentParser - -from viz.utils import search_for_weights - -from run_models import * - -sns.set() - - -arguments = ArgumentParser() -arguments.add_argument('--data', default=os.path.join('data', 'validation')) - -dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device) -dataloader = DataLoader(dataset, shuffle=True, batch_size=len(dataset)) - - - -def viz_map(self, base_map: MapContainer): - # Base Map Plotting - # filled Triangle - patches = [Polygon(base_map.get_triangle_by_key(i), True, color='k') for i in range(len(base_map))] - patch_collection = PatchCollection(patches, color='k') - - self.ax.add_collection(patch_collection) - print('Basemap Plotted') - - patches = [Polygon(base_map.get_triangle_by_key(i), True, color='k') for i in range(len(base_map))] - return PatchCollection(patches, color='k') - -def load_and_predict(folder): - pass - - -if __name__ == '__main__': - search_for_weights(load_and_predict, arguments.data) - # ToDo: THIS diff --git a/viz/viz_prediction_in_map.py b/viz/viz_prediction_in_map.py new file mode 100644 index 0000000..72e2975 --- /dev/null +++ b/viz/viz_prediction_in_map.py @@ -0,0 +1,55 @@ +from argparse import ArgumentParser +import os + +from dataset import DataContainer +from viz.utils import MotionAnalyser, Printer, MapContainer, search_for_weights +import torch + +arguments = ArgumentParser() +arguments.add_argument('--data', default='output') + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + +from viz.utils import * +from run_models import * + +arguments = ArgumentParser() +arguments.add_argument('--data', default='output') + +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + +def load_and_viz(path_like_element): + # Define Loop to search for models and folder with visualizations + splitpath = path_like_element.split(os.sep) + base_dir = os.path.join(*splitpath[:4]) + model = globals()[splitpath[2]] + print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}') + pretrained_model = model.load_from_metrics( + weights_path=path_like_element, + tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'), + on_gpu=True if torch.cuda.is_available() else False, + # map_location=None + ) + + # Init model and freeze its weights ( for faster inference) + pretrained_model = pretrained_model.to(device) + pretrained_model.eval() + pretrained_model.freeze() + + dataIndex = 0 + + datasets = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device) + dataset = datasets.datasets[dataIndex] + # ToDO: use dataloader for iteration instead! - dataloader = DataLoader(dataset, ) + + maps = MapContainer(os.path.join(os.pardir, 'data', 'validation')) + base_map = maps.datasets[dataIndex] + + p = Printer(pretrained_model) + p.print_trajec_on_basemap(dataset, base_map, save=os.path.join(base_dir, f'{base_map.name}_map.png')) + + +if __name__ == '__main__': + args = arguments.parse_args() + search_for_weights(load_and_viz, args.data, file_type='map')