Visualization approach n

This commit is contained in:
Si11ium
2019-09-29 09:37:30 +02:00
parent 1386cdfd33
commit a70c9b7fef
15 changed files with 652 additions and 197 deletions

BIN
viz/output.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 55 KiB

View File

@ -0,0 +1,50 @@
from argparse import ArgumentParser
import os
from dataset import DataContainer
from viz.utils import MotionAnalyser, Printer, MapContainer, search_for_weights
import torch
from run_models import SAAE_Model, AAE_Model, VAE_Model, AE_Model
arguments = ArgumentParser()
arguments.add_argument('--data', default='output')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_and_viz(path_like_element):
# Define Loop to search for models and folder with visualizations
splitpath = path_like_element.split(os.sep)
base_dir = os.path.join(*splitpath[:4])
model = globals()[splitpath[2]]
print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}')
pretrained_model = model.load_from_metrics(
weights_path=path_like_element,
tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'),
on_gpu=True if torch.cuda.is_available() else False,
# map_location=None
)
# Init model and freeze its weights ( for faster inference)
pretrained_model = pretrained_model.to(device)
pretrained_model.eval()
pretrained_model.freeze()
dataIndex = 0
datasets = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device)
dataset = datasets.datasets[dataIndex]
# ToDO: use dataloader for iteration instead! - dataloader = DataLoader(dataset, )
maps = MapContainer(os.path.join(os.pardir, 'data', 'validation'))
base_map = maps.datasets[dataIndex]
p = Printer(pretrained_model)
p.print_trajec_on_basemap(dataset, base_map, save=os.path.join(base_dir, f'{base_map.name}_movement.png'),
color_by_movement=True)
return True
if __name__ == '__main__':
args = arguments.parse_args()
search_for_weights(load_and_viz, args.data, file_type='movement')

View File

@ -1,13 +1,30 @@
import os
from typing import Union
from functools import reduce
from statistics import stdev
from sklearn.cluster import Birch, KMeans, DBSCAN
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from dataset import *
from networks.modules import AbstractNeuralNetwork
from matplotlib import pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import LineCollection, PatchCollection
import matplotlib.colors as mcolors
import matplotlib.cm as cmaps
from math import pi
def search_for_weights(func, folder):
def search_for_weights(func, folder, file_type='latent_space'):
while not os.path.exists(folder):
if len(os.path.split(folder)) >= 50:
raise FileNotFoundError(f'The folder "{folder}" could not be found')
folder = os.path.join(os.pardir, folder)
if any([x.name.endswith('.png') for x in os.scandir(folder)]):
if any([file_type in x.name for x in os.scandir(folder)]):
return
if any(['.ckpt' in element.name and element.is_dir() for element in os.scandir(folder)]):
@ -19,12 +36,324 @@ def search_for_weights(func, folder):
for element in os.scandir(folder):
if os.path.exists(element):
if element.is_dir():
search_for_weights(func, element.path)
search_for_weights(func, element.path, file_type=file_type)
elif element.is_file() and element.name.endswith('.ckpt'):
func(element)
func(element.path)
else:
continue
class Printer(object):
def __init__(self, model: AbstractNeuralNetwork, ax=None):
self.norm = mcolors.Normalize(vmin=0, vmax=1)
self.colormap = cmaps.gist_rainbow
self.network = model
self.fig = plt.figure(dpi=300)
self.ax = ax if ax else plt.subplot(1, 1, 1)
pass
def colorize(self, x, min_val: Union[float, None] = None, max_val: Union[float, None] = None,
colormap=cmaps.rainbow, **kwargs):
norm = mcolors.Normalize(vmin=min_val, vmax=max_val)
colored = colormap(norm(x))
return colored
@staticmethod
def project_to_2d(data: np.ndarray, method: str = 'tsne') -> np.ndarray:
projector = TSNE() if method.lower() == 'tsne' else PCA()
print('Starting TSNE Transformation')
projected_data = projector.fit_transform(data)
assert projected_data.shape[-1] == 2
print('TSNE Projection Successfull')
return projected_data
@staticmethod
def cluster_data(data: np.ndarray, cluster_center_file: str = None) -> np.ndarray:
print('Start Clustering with Birch')
if cluster_center_file:
with open(cluster_center_file, 'r') as f:
cluster_center_string = f.readlines()[0]
centers = ast.literal_eval(cluster_center_string)
clusterer = Birch(n_clusters=len(centers))
clusterer.init = np.asarray(centers)
else:
# clusterer = Birch(n_clusters=None)
clusterer = Birch()
labels = clusterer.fit_predict(data)
print('Birch Clustering Sucessfull')
return labels
def print_possible_latent_spaces(self, data: Trajectories, n: Union[int, str] = 1000, **kwargs):
predictions, _ = self._gather_predictions(data, n)
if len(predictions) >= 2:
predictions += (torch.cat(predictions, dim=-1), )
labels = self.cluster_data(predictions[-1])
for idx, prediction in enumerate(predictions):
self.print_latent_space(prediction, labels, running_index=idx, **kwargs)
def print_latent_space(self, prediction, labels, running_index=0, save=None):
self.colormap = cmaps.tab20
if isinstance(prediction, torch.Tensor):
prediction = prediction.numpy()
elif isinstance(prediction, np.ndarray):
pass
elif isinstance(prediction, list):
prediction = np.asarray(prediction)
else:
raise RuntimeError
if prediction.shape[-1] > 2:
fig, axs = plt.subplots(ncols=2, nrows=1)
transformers = [TSNE(2), PCA(2)]
print('Starting Dimensional Reduction')
for idx, transformer in enumerate(transformers):
transformed = transformer.fit_transform(prediction)
print(f'{transformer.__class__.__name__} Projection Sucessfull')
colored = self.colormap(labels)
ax = axs[idx]
ax.scatter(x=transformed[:, 0], y=transformed[:, 1], c=colored)
ax.set_title(transformer.__class__.__name__)
ax.set_xlim(np.min(transformed[:, 0])*1.1, np.max(transformed[:, 0]*1.1))
ax.set_ylim(np.min(transformed[:, 1]*1.1), np.max(transformed[:, 1]*1.1))
elif prediction.shape[-1] == 2:
fig, axs = plt.subplots()
# TODO: Build transformation for lat_dim_size >= 3
print('All Predictions sucesfully Gathered and Shaped ')
axs.set_xlim(np.min(prediction[:, 0]), np.max(prediction[:, 0]))
axs.set_ylim(np.min(prediction[:, 1]), np.max(prediction[:, 1]))
# ToDo: Insert Normalization
colored = self.colormap(labels)
plt.scatter(prediction[:, 0], prediction[:, 1], c=colored)
else:
raise NotImplementedError("Latent Dimensions can not be one-dimensional (yet).")
if save:
plt.savefig(f'{save}_{running_index}.png')
def print_latent_density(self): # , data: DataContainer):
raise NotImplementedError("My Future Self has to come up with smth")
# fig, ax = plt.subplots()
# preds = []
# for i in range(data.len - data.width * data.stepsize):
# for i in range(5000):
#
# seq = data.sub_trajectory_by_key(i, stepsize=data.stepsize)
#
# preds.append(self.nn.encoder([seq[None, ...]])[0])
#
# TODO: Build transformation for lat_dim_size >= 3
# pred_array = np.asarray(preds).reshape((-1, nn.latDim))
# k = KernelDensity()
# k.fit(pred_array)
# z = np.exp(k.score_samples(pred_array))
#
# levels = np.linspace(0, z.max(), 25)
# xgrid, ygrid = np.meshgrid(pred_array[::5, 0], pred_array[::5, 1])
# xy = np.vstack([xgrid.ravel(), ygrid.ravel()]).T
# z = np.exp(k.score_samples(xy)).reshape(xgrid.shape)
#
# plt.contourf(xgrid, ygrid, z, levels=levels, cmap=plt.cm.Reds)
# plt.show()
def _gather_predictions(self, data: Trajectories, n: int = 1000,
color_by_movement=False, **kwargs):
"""
Check if any value for n is given and gather some random datapoints from the dataset. In accordance with the
maximal possible trajectory amount that is given by stepsize * width.
Also retunr the keys for all possible predictions.
:param data:
:type data: Dataset
:param n:
:param tsne:
:param kwargs:
:return:
"""
print("Gathering Predictions")
n = n if isinstance(n, int) and n else len(data) - (data.size * data.step)
idxs = np.random.choice(np.arange(len(data) - data.step * data.size), n, replace=False)
complete_data = torch.stack([data.get_both_by_key(idx) for idx in idxs], dim=0)
segment_coords, trajectories = complete_data[:, :, :2], complete_data[:, :, 2:]
if color_by_movement:
motion_analyser = MotionAnalyser()
predictions = (motion_analyser.cluster_motion(segment_coords), )
else:
with torch.no_grad():
predictions = self.network(trajectories)[:-1]
return predictions, segment_coords
@staticmethod
def colorize_as_hsv(self, x, min_val: Union[float, None] = None, max_val: Union[float, None] = None,
colormap=cmaps.rainbow, **kwargs):
norm = mcolors.Normalize(vmin=min_val, vmax=max_val)
colored = colormap(norm(x))
return colored
def _build_trajectory_shapes(self, predictions: np.ndarray, segment_coordinates,
axis=None, transformation=TSNE, **kwargs):
if not isinstance(predictions, np.ndarray):
predictions = tuple((x if torch.is_tensor(x) else torch.from_numpy(x) for x in predictions))
predictions = torch.cat(predictions, dim=-1)
if axis is not None:
predictions = predictions[:, axis][..., None]
if predictions.shape[-1] >= 4:
if True:
predictions = Birch(n_clusters=3).fit_predict(predictions).reshape(-1, 1)
else:
transformer = transformation(n_components=3, random_state=42)
predictions = transformer.fit_transform(predictions)
if predictions.shape[-1] == 1:
colored = self.colorize(predictions.reshape(-1), **kwargs)
elif predictions.shape[-1] == 2:
colored = self.colorize(predictions[:, 0], **kwargs)
if kwargs.get('min_val', None):
lightning = mcolors.Normalize(vmin=kwargs.get('min_val', None), vmax=kwargs.get('max_val', None))
else:
lightning = mcolors.Normalize()
alpha = lightning(predictions[:, 1])
colored[:, -1] = alpha
elif predictions.shape[-1] == 3:
norm = mcolors.Normalize()
colored = [(r, g, b) for r,g,b in norm(predictions)]
else:
raise NotImplementedError('Full Prediction Shape was: {}'.format(predictions.shape))
# TODO Build a isomap or tsne transformation here to get a two dimensional space
segment_coordinates = segment_coordinates.cpu() if torch.is_tensor(segment_coordinates) else segment_coordinates
return LineCollection(segment_coordinates, linewidths=(1, 1, 1, 1),
colors=colored, linestyle='solid')
@staticmethod
def _build_map_shapes(base_map: Map):
# Base Map Plotting
# filled Triangle
patches = [Polygon(base_map[i], True, color='black') for i in range(len(base_map))]
return PatchCollection(patches, color='black')
def print_trajec_on_basemap(self, data, base_map: Map, save=False, color_by_movement=False, **kwargs):
"""
:rtype: object
"""
prediction_segments = self._gather_predictions(data, color_by_movement=color_by_movement, **kwargs)
trajectory_shapes = self._build_trajectory_shapes(*prediction_segments, **kwargs)
map_shapes = self._build_map_shapes(base_map)
self.ax.add_collection(trajectory_shapes)
self.ax.axis('auto')
self.ax.add_collection(map_shapes)
self.ax.set_title('Trajectories on BaseMap')
if save:
if isinstance(save, str):
self.save(save)
else:
self.save(base_map.name)
pass
@staticmethod
def show():
plt.show()
return True
@staticmethod
def save(filename):
plt.savefig(filename)
class MotionAnalyser(object):
def __init__(self):
pass
def _sequential_pairwise_map(self, func, xy_sequence, on_deltas=False):
zipped_list = [x for x in zip(xy_sequence[:-1], xy_sequence[1:])]
if on_deltas:
zipped_list = [self.delta(*movement) for movement in zipped_list]
else:
pass
return [func(*xy) for xy in zipped_list]
@staticmethod
def delta(x1y1, x2y2):
x1, y1 = x1y1
x2, y2 = x2y2
return x2-x1, y2-y1
@staticmethod
def get_r(deltax, deltay):
# https://mathinsight.org/polar_coordinates
r = torch.sqrt(deltax**2 + deltay**2)
return r
@staticmethod
def get_theta(deltax, deltay, rad=False):
# https://mathinsight.org/polar_coordinates
theta = torch.atan2(deltay, deltax)
return theta if rad else theta * 180 / pi
def get_theta_for_sequence(self, xy_sequence):
ts = self._sequential_pairwise_map(self.get_theta, xy_sequence, on_deltas=True)
return ts
def get_r_for_sequence(self, xy_sequence):
rs = self._sequential_pairwise_map(self.get_r, xy_sequence, on_deltas=True)
return rs
def get_unique_seq_identifier(self, xy_sequence):
# Globals
global_delta = self.delta(xy_sequence[0], xy_sequence[-1])
global_theta = self.get_theta(*global_delta)
global_r = self.get_r(*global_delta)
# For Each
theta_seq = self.get_theta_for_sequence(xy_sequence)
mean_theta = sum(theta_seq) / len(theta_seq)
theta_sum = sum([abs(theta) for theta in theta_seq])
std_theta = stdev(map(float, theta_seq))
return torch.stack((global_r, torch.as_tensor(std_theta), mean_theta, global_theta))
def cluster_motion(self, trajectory_samples, cluster_class=KMeans):
cluster_class = cluster_class(3)
std, mean = torch.std_mean(trajectory_samples, dim=0)
trajectory_samples = (trajectory_samples - mean) / std
unique_seq_identifiers = torch.stack([self.get_unique_seq_identifier(trajectory)
for trajectory in trajectory_samples])
clustered_movement = cluster_class.fit_predict(unique_seq_identifiers)
if False:
from sklearn.decomposition import PCA
p = PCA(2)
t = p.fit_transform(unique_seq_identifiers)
f = plt.figure()
plt.scatter(t[:, 0], t[:,1])
plt.show()
return clustered_movement.reshape(-1, 1)
if __name__ == '__main__':
raise PermissionError('This file should not be called.')

View File

@ -1,12 +1,12 @@
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
import torch
from dataset import DataContainer
from viz.utils import search_for_weights, Printer
from run_models import *
sns.set()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_and_predict(path_like_element):
@ -14,73 +14,39 @@ def load_and_predict(path_like_element):
splitpath = path_like_element.split(os.sep)
base_dir = os.path.join(*splitpath[:4])
model = globals()[splitpath[2]]
print(f'... loading model named: "{Model.name}" from timestamp: {splitpath[3]}')
pretrained_model = model.load_from_metrics(
weights_path=path_like_element,
tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'),
on_gpu=True if torch.cuda.is_available() else False,
map_location=None
# map_location=None
)
print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}')
# Init model and freeze its weights ( for faster inference)
pretrained_model = pretrained_model.to(device)
pretrained_model.eval()
pretrained_model.freeze()
with torch.no_grad():
# Load the data for prediction
# Load the data for prediction
# TODO!!!!!!!!!:
# Hier müssen natürlich auch die date parameter geladen werden!
# Muss ich die val-sets automatisch neu setzen, also immer auf refresh haben, wenn ich validieren möchte?
# Was ist denn eigentlich mein Val Dataset?
# Hab ich irgendwo eine ganze karte?
# Wie sorge ich dafür, dass gewisse karten, also größenverhältnisse usw nicht überrepräsentiert sind?
dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device)
# TODO!!!!!!!!!:
# Hier müssen natürlich auch die date parameter geladen werden!
# Muss ich die val-sets automatisch neu setzen, also immer auf refresh haben, wenn ich validieren möchte?
# Was ist denn eigentlich mein Val Dataset?
# Hab ich irgendwo eine ganze karte?
# Wie sorge ich dafür, dass gewisse karten, also größenverhältnisse usw nicht überrepräsentiert sind?
dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device)
dataloader = DataLoader(dataset, shuffle=True, batch_size=len(dataset))
# Do the inference
# test_pred = [pretrained_model(test_sample)[:-1] for test_sample in dataloader][0]
# Do the inference
test_pred = [pretrained_model(test_sample)[:-1] for test_sample in dataloader][0]
for idx, prediction in enumerate(test_pred):
plot, _ = viz_latent(prediction)
plot.savefig(os.path.join(base_dir, f'latent_space_{idx}.png'))
def viz_latent(prediction):
try:
prediction = prediction.cpu()
prediction = prediction.numpy()
except AttributeError:
pass
if prediction.shape[-1] <= 1:
raise ValueError('How did this happen?')
elif prediction.shape[-1] == 2:
ax = sns.scatterplot(x=prediction[:, 0], y=prediction[:, 1])
try:
plt.show()
except:
pass
return ax.figure, (ax)
else:
fig, axs = plt.subplots(ncols=2)
plots = []
for idx, dim_reducer in enumerate([PCA, TSNE]):
predictions_reduced = dim_reducer(n_components=2).fit_transform(prediction)
plot = sns.scatterplot(x=predictions_reduced[:, 0], y=predictions_reduced[:, 1],
ax=axs[idx])
plot.set_title(dim_reducer.__name__)
plots.append(plot)
try:
plt.show()
except:
pass
return fig, (*plots, )
p = Printer(pretrained_model)
# Important:
# Use all given valdiation samples, even if they relate to differnt maps. This is important since we want to have a
# view on the complete latent space, not just in relation to a single basemap, which would be a major bias.
p.print_possible_latent_spaces(dataset, save=os.path.join(base_dir, f'latent_space'))
if __name__ == '__main__':
path = 'output'
search_for_weights(search_for_weights, path)
search_for_weights(load_and_predict, path, file_type='latent')

View File

@ -1,50 +0,0 @@
from dataset import *
# Plotting
# import matplotlib as mlp
from matplotlib import pyplot as plt
from matplotlib.patches import Polygon
from matplotlib.collections import LineCollection, PatchCollection
import matplotlib.colors as mcolors
import matplotlib.cm as cmaps
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import seaborn as sns
from argparse import ArgumentParser
from viz.utils import search_for_weights
from run_models import *
sns.set()
arguments = ArgumentParser()
arguments.add_argument('--data', default=os.path.join('data', 'validation'))
dataset = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device)
dataloader = DataLoader(dataset, shuffle=True, batch_size=len(dataset))
def viz_map(self, base_map: MapContainer):
# Base Map Plotting
# filled Triangle
patches = [Polygon(base_map.get_triangle_by_key(i), True, color='k') for i in range(len(base_map))]
patch_collection = PatchCollection(patches, color='k')
self.ax.add_collection(patch_collection)
print('Basemap Plotted')
patches = [Polygon(base_map.get_triangle_by_key(i), True, color='k') for i in range(len(base_map))]
return PatchCollection(patches, color='k')
def load_and_predict(folder):
pass
if __name__ == '__main__':
search_for_weights(load_and_predict, arguments.data)
# ToDo: THIS

View File

@ -0,0 +1,55 @@
from argparse import ArgumentParser
import os
from dataset import DataContainer
from viz.utils import MotionAnalyser, Printer, MapContainer, search_for_weights
import torch
arguments = ArgumentParser()
arguments.add_argument('--data', default='output')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from viz.utils import *
from run_models import *
arguments = ArgumentParser()
arguments.add_argument('--data', default='output')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_and_viz(path_like_element):
# Define Loop to search for models and folder with visualizations
splitpath = path_like_element.split(os.sep)
base_dir = os.path.join(*splitpath[:4])
model = globals()[splitpath[2]]
print(f'... loading model named: "{model.name}" from timestamp: {splitpath[3]}')
pretrained_model = model.load_from_metrics(
weights_path=path_like_element,
tags_csv=os.path.join(base_dir, 'default', 'version_0', 'meta_tags.csv'),
on_gpu=True if torch.cuda.is_available() else False,
# map_location=None
)
# Init model and freeze its weights ( for faster inference)
pretrained_model = pretrained_model.to(device)
pretrained_model.eval()
pretrained_model.freeze()
dataIndex = 0
datasets = DataContainer(os.path.join(os.pardir, 'data', 'validation'), 9, 6).to(device)
dataset = datasets.datasets[dataIndex]
# ToDO: use dataloader for iteration instead! - dataloader = DataLoader(dataset, )
maps = MapContainer(os.path.join(os.pardir, 'data', 'validation'))
base_map = maps.datasets[dataIndex]
p = Printer(pretrained_model)
p.print_trajec_on_basemap(dataset, base_map, save=os.path.join(base_dir, f'{base_map.name}_map.png'))
if __name__ == '__main__':
args = arguments.parse_args()
search_for_weights(load_and_viz, args.data, file_type='map')