Add files via upload

This commit is contained in:
Steffen Illium 2017-07-22 21:13:27 +02:00 committed by GitHub
parent cd5bf9dc72
commit 9ff4153dc1
19 changed files with 2546 additions and 0 deletions

314
PCHA.py Normal file
View File

@ -0,0 +1,314 @@
"""Principal Convex Hull Analysis (PCHA) / Archetypal Analysis."""
from __future__ import division
import numpy as np
from numpy.matlib import repmat
from scipy.sparse import csr_matrix
from datetime import datetime as dt
import time
#from furthest_sum import furthest_sum
def PCHA(X, noc, I=None, U=None, delta=0, verbose=False, conv_crit=1E-6, maxiter=500):
"""Return archetypes of dataset.
Note: Commonly data is formatted to have shape (examples, dimensions).
This function takes input and returns output of the transposed shape,
(dimensions, examples).
Parameters
----------
X : numpy.2darray
Data matrix in which to find archetypes
noc : int
Number of archetypes to find
I : 1d-array
Entries of X to use for dictionary in C (optional)
U : 1d-array
Entries of X to model in S (optional)
Output
------
XC : numpy.2darray
I x noc feature matrix (i.e. XC=X[:,I]*C forming the archetypes)
S : numpy.2darray
noc x length(U) matrix, S>=0 |S_j|_1=1
C : numpy.2darray
noc x length(U) matrix, S>=0 |S_j|_1=1
SSE : float
Sum of Squared Errors
varexlp : float
Percent variation explained by the model
"""
def S_update(S, XCtX, CtXtXC, muS, SST, SSE, niter):
"""Update S for one iteration of the algorithm."""
noc, J = S.shape
e = np.ones((noc, 1))
for k in range(niter):
SSE_old = SSE
g = (np.dot(CtXtXC, S) - XCtX) / (SST / J)
g = g - e * np.sum(g.A * S.A, axis=0)
S_old = S
while True:
S = (S_old - g * muS).clip(min=0)
S = S / np.dot(e, np.sum(S, axis=0))
SSt = S * S.T
SSE = SST - 2 * np.sum(XCtX.A * S.A) + np.sum(CtXtXC.A * SSt.A)
if SSE <= SSE_old * (1 + 1e-9):
muS = muS * 1.2
break
else:
muS = muS / 2
return S, SSE, muS, SSt
def C_update(X, XSt, XC, SSt, C, delta, muC, mualpha, SST, SSE, niter=1):
"""Update C for one iteration of the algorithm."""
J, nos = C.shape
if delta != 0:
alphaC = np.sum(C, axis=0).A[0]
C = np.dot(C, np.diag(1 / alphaC))
e = np.ones((J, 1))
XtXSt = np.dot(X.T, XSt)
for k in range(niter):
# Update C
SSE_old = SSE
g = (np.dot(X.T, np.dot(XC, SSt)) - XtXSt) / SST
if delta != 0:
g = np.dot(g, np.diag(alphaC))
g = g.A - e * np.sum(g.A * C.A, axis=0)
C_old = C
while True:
C = (C_old - muC * g).clip(min=0)
nC = np.sum(C, axis=0) + np.finfo(float).eps
C = np.dot(C, np.diag(1 / nC.A[0]))
if delta != 0:
Ct = C * np.diag(alphaC)
else:
Ct = C
XC = np.dot(X, Ct)
CtXtXC = np.dot(XC.T, XC)
SSE = SST - 2 * np.sum(XC.A * XSt.A) + np.sum(CtXtXC.A * SSt.A)
if SSE <= SSE_old * (1 + 1e-9):
muC = muC * 1.2
break
else:
muC = muC / 2
# Update alphaC
SSE_old = SSE
if delta != 0:
g = (np.diag(CtXtXC * SSt).T / alphaC - np.sum(C.A * XtXSt.A)) / (SST * J)
alphaC_old = alphaC
while True:
alphaC = alphaC_old - mualpha * g
alphaC[alphaC < 1 - delta] = 1 - delta
alphaC[alphaC > 1 + delta] = 1 + delta
XCt = np.dot(XC, np.diag(alphaC / alphaC_old))
CtXtXC = np.dot(XCt.T, XCt)
SSE = SST - 2 * np.sum(XCt.A * XSt.A) + np.sum(CtXtXC.A * SSt.A)
if SSE <= SSE_old * (1 + 1e-9):
mualpha = mualpha * 1.2
XC = XCt
break
else:
mualpha = mualpha / 2
if delta != 0:
C = C * np.diag(alphaC)
return C, SSE, muC, mualpha, CtXtXC, XC
N, M = X.shape
if I is None:
I = range(M)
if U is None:
U = range(M)
SST = np.sum(X[:, U] * X[:, U])
# Initialize C
try:
i = furthest_sum(X[:, I], noc, [int(np.ceil(len(I) * np.random.rand()))])
except IndexError:
class InitializationException(Exception): pass
raise InitializationException("Initialization does not converge. Too few examples in dataset.")
j = range(noc)
C = csr_matrix((np.ones(len(i)), (i, j)), shape=(len(I), noc)).todense()
XC = np.dot(X[:, I], C)
muS, muC, mualpha = 1, 1, 1
# Initialise S
XCtX = np.dot(XC.T, X[:, U])
CtXtXC = np.dot(XC.T, XC)
S = -np.log(np.random.random((noc, len(U))))
S = S / np.dot(np.ones((noc, 1)), np.mat(np.sum(S, axis=0)))
SSt = np.dot(S, S.T)
SSE = SST - 2 * np.sum(XCtX.A * S.A) + np.sum(CtXtXC.A * SSt.A)
S, SSE, muS, SSt = S_update(S, XCtX, CtXtXC, muS, SST, SSE, 25)
# Set PCHA parameters
iter_ = 0
dSSE = np.inf
t1 = dt.now()
varexpl = (SST - SSE) / SST
if verbose:
print('\nPrincipal Convex Hull Analysis / Archetypal Analysis')
print('A ' + str(noc) + ' component model will be fitted')
print('To stop algorithm press control C\n')
dheader = '%10s | %10s | %10s | %10s | %10s | %10s | %10s | %10s' % ('Iteration', 'Expl. var.', 'Cost func.', 'Delta SSEf.', 'muC', 'mualpha', 'muS', ' Time(s) ')
dline = '-----------+------------+------------+-------------+------------+------------+------------+------------+'
while np.abs(dSSE) >= conv_crit * np.abs(SSE) and iter_ < maxiter and varexpl < 0.9999:
if verbose and iter_ % 100 == 0:
print(dline)
print(dheader)
print(dline)
told = t1
iter_ += 1
SSE_old = SSE
# C (and alpha) update
XSt = np.dot(X[:, U], S.T)
C, SSE, muC, mualpha, CtXtXC, XC = C_update(
X[:, I], XSt, XC, SSt, C, delta, muC, mualpha, SST, SSE, 10
)
# S update
XCtX = np.dot(XC.T, X[:, U])
S, SSE, muS, SSt = S_update(
S, XCtX, CtXtXC, muS, SST, SSE, 10
)
# Evaluate and display iteration
dSSE = SSE_old - SSE
t1 = dt.now()
if iter_ % 1 == 0:
time.sleep(0.000001)
varexpl = (SST - SSE) / SST
if verbose:
print('%10.0f | %10.4f | %10.4e | %10.4e | %10.4e | %10.4e | %10.4e | %10.4f \n' % (iter_, varexpl, SSE, dSSE/np.abs(SSE), muC, mualpha, muS, (t1-told).seconds))
# Display final iteration
varexpl = (SST - SSE) / SST
if verbose:
print(dline)
print(dline)
print('%10.0f | %10.4f | %10.4e | %10.4e | %10.4e | %10.4e | %10.4e | %10.4f \n' % (iter_, varexpl, SSE, dSSE/np.abs(SSE), muC, mualpha, muS, (t1-told).seconds))
# Sort components according to importance
ind, vals = zip(
*sorted(enumerate(np.sum(S, axis=1)), key=lambda x: x[0], reverse=1)
)
S = S[ind, :]
C = C[:, ind]
XC = XC[:, ind]
return XC, S, C, SSE, varexpl
"""FurthestSum algorithm to efficiently generate initial seeds/archetypes."""
def furthest_sum(K, noc, i, exclude=[]):
"""Furthest sum algorithm, to efficiently generat initial seed/archetypes.
Note: Commonly data is formatted to have shape (examples, dimensions).
This function takes input and returns output of the transposed shape,
(dimensions, examples).
Parameters
----------
K : numpy 2d-array
Either a data matrix or a kernel matrix.
noc : int
Number of candidate archetypes to extract.
i : int
inital observation used for to generate the FurthestSum.
exclude : numpy.1darray
Entries in K that can not be used as candidates.
Output
------
i : int
The extracted candidate archetypes
"""
def max_ind_val(l):
return max(zip(range(len(l)), l), key=lambda x: x[1])
I, J = K.shape
index = np.array(range(J))
index[exclude] = 0
index[i] = -1
ind_t = i
sum_dist = np.zeros((1, J), np.complex128)
if J > noc * I:
Kt = K
Kt2 = np.sum(Kt ** 2, axis=0)
for k in range(1, noc + 11):
if k > noc - 1:
Kq = np.dot(Kt[:, i[0]], Kt)
sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[i[0]])
index[i[0]] = i[0]
i = i[1:]
t = np.where(index != -1)[0]
Kq = np.dot(Kt[:, ind_t].T, Kt)
sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[ind_t])
ind, val = max_ind_val(sum_dist[:, t][0].real)
ind_t = t[ind]
i.append(ind_t)
index[ind_t] = -1
else:
if I != J or np.sum(K - K.T) != 0: # Generate kernel if K not one
Kt = K
K = np.dot(Kt.T, Kt)
K = np.lib.scimath.sqrt(
repmat(np.diag(K), J, 1) - 2 * K + \
repmat(np.mat(np.diag(K)).T, 1, J)
)
Kt2 = np.diag(K) # Horizontal
for k in range(1, noc + 11):
if k > noc - 1:
sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * K[i[0], :] + Kt2[i[0]])
index[i[0]] = i[0]
i = i[1:]
t = np.where(index != -1)[0]
sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * K[ind_t, :] + Kt2[ind_t])
ind, val = max_ind_val(sum_dist[:, t][0].real)
ind_t = t[ind]
i.append(ind_t)
index[ind_t] = -1
return i

582
Trainer.py Normal file
View File

@ -0,0 +1,582 @@
# Keras Utility Imports
from keras import backend as K
from keras.utils import plot_model
from collections import defaultdict
from random import shuffle
# Numpy
import numpy as np
# Maths
from math import sqrt
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import silhouette_samples, silhouette_score
from sklearn.manifold.t_sne import TSNE
from sklearn.decomposition import PCA
# Plotting
import matplotlib.pyplot as plt
from PIL import ImageDraw, Image
import matplotlib.cm as cm
"""
UseFull Links:
Keras Timedistributed Wrapper is applysing the SAME Layer, which means same WEIGHTS:
http://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/
"""
class Trainer(object):
def __init__(self, mode, trackCollection, classes, categorical_distribution,
batchSize=400, timesteps=5, filters=0, rotating=False):
if mode.lower() not in ['gumble', 'vae', 'refined']:
raise ValueError('Needs to be eather "gumble", "vae" or "refined"')
self.mode = mode.lower()
self.tc = trackCollection if isinstance(trackCollection, list) else list(trackCollection)
self.rotating = rotating
self.timesteps = timesteps
self.classes = classes
self.cD = categorical_distribution
self.batchSize = batchSize
self.epochs = 100
self.epsilon_std = 0.01
self.tau = K.variable(5.0, name="temperature")
self.anneal_rate = 0.0003
self.min_temperature = 0.5
_, _, height, width, _ = self.tc[0].as_n_sample_4D(
self.timesteps, for_track=list(self.tc[0].keys())[0]).shape
self.height = height
self.width = width
self.original_dim = self.timesteps * self.width * self.height * 1 # = 5*30*30 = 4500px
self.filters = int(sqrt(self.width ** 2 + self.height ** 2)) // 2 if not filters else filters
self.trained = False
self.model = None
self.encoder = None
self.generator = None
def set_model(self, model, loss, optimizer='adagrad'):
self.model = model
self.model.compile(optimizer=optimizer, loss=loss)
self.model.summary()
def set_generator(self, generator):
self.generator = generator
def set_encoder(self, encoder):
self.encoder = encoder
def load_weights(self, fileName):
self.model.load_weights(fileName)
self.trained = True
def save_weights(self, fileName):
self.model.save_weights(fileName)
def train(self, fileName=None):
for i in range(self.epochs):
tracklists = [list(x.keys()) for x in self.tc] # TODO: add additional shuffeling!!!
for keys in zip(*tracklists):
data = np.empty(0)
while data.shape[0] < self.batchSize:
tempData = [self.tc[idx].as_n_sample_4D(self.timesteps, in_walk_dir=self.rotating, for_track=key)
for idx, key in enumerate(keys)]
tempData = np.row_stack(tempData)
if data.shape[0] == 0:
data = tempData
else:
data = np.row_stack((data, tempData))
np.random.shuffle(data)
smoothing = data.shape[0] // self.batchSize * self.batchSize
if smoothing:
data = data[:smoothing]
self.model.fit(data, data, shuffle=True, epochs=1, batch_size=self.batchSize)
data = None
K.set_value(self.tau,
np.max([K.get_value(self.tau) * np.exp(- self.anneal_rate * i),
self.min_temperature]))
if fileName:
self.save_weights(fileName)
def plot_model(self, filename, show_shapes=True, show_layer_names=True):
plot_model(self.model, filename, show_shapes=show_shapes, show_layer_names=show_layer_names)
def color_random_track(self, completeSequence=False, show=True, fileName='', primaryTC=0,
nClusters=0, multiPath=False, cMode='kmeans', aMode='none'):
if not self.trained:
raise EnvironmentError('Please train this Model first!')
self.tc[primaryTC].map.refresh_random_clock()
track = self.tc[primaryTC].map.return_random_path()
if fileName:
fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName
return self.color_track(track, show=show, completeSequence=completeSequence, primaryTC=primaryTC,
fileName=fileName, nClusters=nClusters, multiPath=multiPath, cMode=cMode, aMode=aMode)
def color_track(self, track, completeSequence=False, show=True, fileName='', nClusters=0, multiPath=False,
cMode='kmeans', aMode='none', primaryTC=0):
if not self.trained:
raise EnvironmentError('Please train this Model first!')
isoArray = self.tc[primaryTC].map.isovists.get_items_for_track(track, dim='full', in_walk_dir=True).swapaxes(0, 2)[
..., None].transpose((0, 2, 1, 3))
smoothing = (isoArray.shape[0] // self.timesteps) * self.timesteps
isoArray = isoArray[:smoothing]
sequenceArray = np.array([isoArray[i:i+self.timesteps] for i in range(len(isoArray)-self.timesteps)])
dummyData = self.tc[primaryTC].as_n_sample_4D(self.timesteps).astype(int)
dummyData = dummyData.reshape((-1, self.timesteps, self.height, self.width, 1))
testdata = np.row_stack((dummyData, sequenceArray))[-1000:]
keys = [track[i+self.timesteps//2] for i in range(len(sequenceArray))]
tsneArray = self.reduce_and_color(testData=testdata, nClusters=nClusters,
primaryTC=primaryTC, aMode=aMode,
cMode=cMode)[-len(sequenceArray):] + 1 # This is for color correction
npmax = np.max(tsneArray[:, -1])+1
figure = np.where(self.tc[primaryTC].map.imgArray > 0, npmax, 0)
for i in range(len(sequenceArray)):
figure[keys[i]] = tsneArray[i, -1]
if multiPath:
return keys, tsneArray[-1]
else:
self.print_n_show(figure, 'img', npmax, fileName=fileName, show=show)
self.print_n_show(tsneArray, 'scatter', npmax, fileName=fileName, show=show)
if completeSequence:
if fileName:
fileName = '%s_sequence.tif' % fileName[:fileName.find('.')]
self._Trainer__colored_sequence(tsneArray, isoArray, maxVal=npmax, show=show, fileName=fileName)
def __colored_sequence(self, tsneArray, isoArray, maxVal=0, show=True, fileName=''):
"""
Returns all the Isovist sequences for a Track, next to its class color.
:param tsneArray:
:type tsneArray:
:param isoArray:
:type isoArray:
:param maxVal:
:type maxVal:
:param show:
:type show:
:param fileName:
:type fileName:
:return:
:rtype:
"""
if not self.trained:
raise EnvironmentError('Please train this Model first!')
if maxVal == 0:
maxVal, np.max(tsneArray[:, -1])
spacing = 2
figure = np.full(((spacing + len(tsneArray) * (self.height + spacing)), (self.timesteps + 1) * self.width), 2)
# Iterate through a 4 by 4 grid with 100 spacing, to place the image
for i in range(len(tsneArray)):
backGround = np.full((self.height, self.width * (self.timesteps + 1)), tsneArray[i, -1])
sequence = isoArray[i:i + self.timesteps].swapaxes(0, 1).reshape((
self.height, self.timesteps * self.width))
sequence = np.where(sequence > 0, maxVal, 0)
backGround[:, 0:-self.width] = sequence
figure[i * self.height + i * spacing: (i + 1)*self.height + i*spacing, :] = backGround
if fileName:
fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName
self.print_n_show(figure, 'img', maxVal, show=show, fileName=fileName)
@staticmethod
def print_n_show(x, mode, maxValue, show=True, fileName=''):
# Scatterplot with Classes
fig, ax = plt.subplots()
# make the picture
if mode == 'img' or mode == 'fig':
pic = ax.imshow(x, cmap='gist_ncar', vmin=0, vmax=maxValue)
cb = plt.colorbar(pic, spacing='proportional', ticks=np.linspace(0, maxValue, maxValue + 1))
elif mode == 'rgba':
pic = ax.imshow(x, cmap='gist_ncar', vmax=255)
cb = plt.colorbar(pic, spacing='proportional', ticks=np.linspace(0, 255, maxValue + 1))
elif mode == 'scatter':
scat = ax.scatter(x[:, 0], x[:, 1], c=x[:, -1], )
cb = plt.colorbar(scat, spacing='proportional', ticks=np.linspace(0, maxValue, maxValue + 1))
elif mode == 'bars':
objects = list(x.keys())
y_pos = list(range(len(objects)))
performance = [x[key] for key in objects]
bar = ax.bar(y_pos, performance, align='center')
# fig.xticks(y_pos, objects)
# fig.ylabel('Usage')
# fig.title('Programming language usage')
else:
raise ValueError('Mode needs to be "img", "fig", "bars", "rgba" or "scatter".')
fig.tight_layout()
if show:
plt.show()
if fileName:
plt.savefig(fileName)
return True
def reduce_and_color(self, testData=None, aMode='tsne', nClusters=0, cMode='kmeans', eps=5, primaryTC=0):
"""
:param testData: Numpy Arraym, shape (n, timesteps, height, width, 1)
:type testData: sdf
:param eps: When using cMode=DBSCAN,
:type eps: int
:param aMode: Dimensonal reduction mode, default = 'pca', other='tsne'
:type aMode: str
:param cMode: Clustering mode, default='kmeans', other='DBSCAN'
:type cMode: str
:param nClusters: Number of Clusters for kmeans-clustering if 0 nClusters=self.classes
:type nClusters: int
:param primaryTC: Index Number of the TrackCollection used for Basemap etc.
:type primaryTC: int
:return: Numpy Array (n, X, Y, Labels)
:rtype: np.ndarray
"""
if isinstance(testData, np.ndarray):
if testData.shape[1:] != (self.timesteps, self.height, self.width, 1):
raise ValueError('Shape must be (n, timesteps, height, width, 1), but was ', testData.shape)
if not isinstance(testData, np.ndarray):
testData = self.tc[primaryTC].as_n_sample_4D(self.timesteps)
testData = testData.reshape((-1, self.timesteps, self.height, self.width, 1))
n = testData.shape[0]
C = np.zeros((n, self.cD * self.classes if self.mode == 'gumble' else self.classes))
for i in range(0, n, 100):
c = self.encoder([testData[i:i + 100]])[0]
C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes)
if aMode == 'tsne':
array = TSNE(metric='hamming').fit_transform(C.reshape(n, -1))
elif aMode == 'pca':
array = PCA(n_components=self.classes).fit_transform(C.reshape(n, -1))
elif aMode.lower() == 'none':
array = C.reshape(n, -1)
else:
raise ValueError('"aMode" needs to be either "pca" or "tsne".')
if cMode.lower() == 'dbscan':
labels = DBSCAN(eps=eps, min_samples=10).fit_predict(array)
elif cMode.lower() in ['kmeans', 'kmean', 'k-mean', 'k-means']:
nClusters = nClusters if nClusters > 0 else self.classes
labels = KMeans(n_clusters=nClusters).fit_predict(array)
else:
raise ValueError('"cMode" needs to be either "kmeans" or "dbscan".')
if len(np.unique(labels)) > 1:
color = labels
else:
# Color Generating
X = array[:, 0] + np.min(array[:, 0])
Y = (np.min(array[:, 1]) + array[:, 1]) // (np.max(array[:, 1]) + np.min(array[:, 1]))
color = X * Y
return np.column_stack((array, color))
def viz_clusters(self, aMode='pca', cMode='kmeans', testdata=None, fileName=''):
dataArray = self.reduce_and_color(aMode=aMode, cMode=cMode, testData=testdata)
self.print_n_show(dataArray, 'scatter', np.max(dataArray[:, -1]), fileName=fileName)
return True
# THIS WORKS in all modes
def show_prediction(self, n, dataArray=None, show=True, fileName='', startI=0, primaryTC=0):
if not fileName and not show:
raise ValueError('Why are you doing this? Print smth or show it!')
if self.mode in ['gumble', 'vae', 'refined']:
if not isinstance(dataArray, np.ndarray):
dataArray = self.tc[primaryTC].as_n_sample_4D(self.timesteps)
seqWidth = self.width * self.timesteps
spacing = 1
sqrtDim = int(sqrt(n)) + 1
fullwidth = (seqWidth + spacing) * sqrtDim
fullheight = (self.height*2 + spacing) * sqrtDim
figure = np.zeros((fullheight, fullwidth))
for i in range(n):
array = dataArray[i+startI]
arr_h = self.model.predict(array.reshape((1, self.timesteps, self.height, self.width, 1)))
f = np.ones((self.height*2, seqWidth))
f[:self.height, :seqWidth] = array.reshape(seqWidth, self.height).swapaxes(0, 1)
f[self.height:self.height*2, : seqWidth] = arr_h.reshape(seqWidth, self.height).swapaxes(0, 1)
try:
y, x = divmod(i, sqrtDim)
except ZeroDivisionError:
x, y = 0, 0
figure[y*self.height*2 + y*spacing: (y+1)*self.height*2 + y*spacing,
x*seqWidth + x*spacing: (x+1)*seqWidth + x*spacing] = f
if fileName:
fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName
self.print_n_show(figure, 'img', maxValue=np.max(figure), fileName=fileName)
if show:
self.print_n_show(figure, 'img', maxValue=np.max(figure))
return True
def sample_latent(self, nSamples, show=True, fileName=''):
if self.mode not in ['gumble', 'vae', 'refined']:
raise ValueError('Needs to be either of "gumble", "vae", "refined"')
if self.classes >= self.height:
raise NotImplementedError('This cannot be shown, edit the Funciton!')
seqWidth = self.width * self.timesteps
spacing = 1
sqrtDim = int(sqrt(nSamples)) + 1
fullwidth = (seqWidth + spacing) * sqrtDim
if self.mode == 'gumble':
if self.cD >= fullwidth:
raise NotImplementedError('This cannot be shown, please edit the Function!!!')
lHSpace = (self.height - self.classes) // 2
lWSpace = (seqWidth - self.cD) // 2
lShape = (self.classes, self.cD)
else:
if self.classes >= seqWidth:
raise NotImplementedError('To many Samples, this cannot be displayed, please edit the Function!!!')
lHSpace = (self.height - 1) // 2
lWSpace = (seqWidth - self.classes) // 2
lShape = (-1, self.classes)
fullheight = (self.height*2 + spacing) * sqrtDim
figure = np.zeros((fullheight, fullwidth))
for i in range(nSamples):
f = np.ones((self.height * 2, seqWidth))
if self.mode == 'gumble':
# https://stackoverflow.com/a/42874726/7746808
oneHot = np.eye(self.classes)[np.random.randint(0, self.classes, self.cD)]
sample = oneHot.reshape((-1, self.classes*self.cD))
f[lHSpace:lHSpace + self.classes, lWSpace:lWSpace + self.cD] = sample.swapaxes(0, 1).reshape(lShape)
else:
if self.mode == 'vae':
sampleSpace = np.random.randn(nSamples, self.classes)
elif self.mode == 'refined':
sampleSpace = np.random.rand(nSamples, self.classes)
else:
raise ValueError('Needs to be either of "gumble", "vae", "refined"')
sample = sampleSpace[i].reshape(lShape)
f[lHSpace:lHSpace+1, lWSpace:lWSpace + self.classes] = sample
arr_h = self.generator.predict(sample)
f[self.height:self.height*2, : seqWidth] = arr_h.reshape(seqWidth, self.height).swapaxes(0, 1)
try:
y, x = divmod(i, sqrtDim)
except ZeroDivisionError:
x, y = 0, 0
figure[y * self.height * 2 + y*spacing: (y + 1) * self.height * 2 + y*spacing,
x * seqWidth + x*spacing: (x + 1) * seqWidth + x*spacing] = f
if fileName:
fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName
self.print_n_show(figure, 'img', maxValue=np.max(figure), fileName=fileName)
if show:
self.print_n_show(figure, 'img', maxValue=np.max(figure))
return True
def multi_path_coloring(self, nClusters, fileName='', state='', primaryTC=0, uncertainty=False, rgba=False):
if nClusters <= 2:
raise ValueError('More than 2 Classes are needed')
if fileName and state.lower() == 'load':
import pickle
with open(fileName, 'rb') as file:
patchDict = pickle.load(file)
else:
patchDict = defaultdict(list)
for key in self.tc[primaryTC].keys():
tempKeys, tempSequence = self.tc[primaryTC].as_n_sample_4D(self.timesteps,
in_walk_dir=True,
keys=True,
moving_window=True,
for_track=key)
C = np.zeros((len(tempSequence), self.cD * self.classes if self.mode == 'gumble' else self.classes))
for i in range(0, len(tempSequence), 100):
c = self.encoder([tempSequence[i:i + 100]])[0]
C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes)
for i, tempKey in enumerate(tempKeys):
patchDict[tempKey].append(list(C[i]))
if fileName and state.lower() == 'dump':
with open(fileName, 'wb') as f:
import pickle
pickle.dump(patchDict, f, pickle.HIGHEST_PROTOCOL)
l = list()
for x in patchDict.keys():
for elem in patchDict[x]:
l.append(elem + list(x))
a = np.array(l)
k = KMeans(nClusters).fit_predict(a[:, :-2]) + 1 # Color Correction
s = np.zeros((a.shape[0], a.shape[1] + 1))
s[:, :-1] = a
s[:, -1] = k
patchDict = defaultdict(list)
for i in range(s.shape[0]):
key = int(s[i][-3]), int(s[i][-2])
patchDict[key].append(int(s[i][-1]))
from collections import Counter
c = Counter()
for key in patchDict.keys():
c[len(set(patchDict[key]))] += 1
npmax = np.max(s[:, -1]) + 1
# npmax = 4
self.print_n_show(c, 'bars', npmax)
if rgba:
figure = Image.fromarray(np.where(self.tc[primaryTC].map.imgArray > 0, 255, 0)).convert('RGBA')
draw = ImageDraw.Draw(figure, 'RGBA')
from matplotlib import cm
cmap = cm.get_cmap('gist_ncar', 12) # 12 discrete colors
for key in patchDict.keys():
for value in patchDict[key]:
color = [int(x*255) for x in cmap(int(value), alpha=0.3)]
draw.point((key[1], key[0]), fill=tuple(color))
self.print_n_show(figure, 'rgba', npmax)
else:
figure = np.where(self.tc[primaryTC].map.imgArray > 0, npmax, 0)
for key in patchDict.keys():
c = Counter(patchDict[key])
figure[key] = c.most_common(1)[0][0]
self.print_n_show(figure, 'img', npmax)
if uncertainty:
uncertainfig = np.where(self.tc[primaryTC].map.imgArray > 0, npmax + 1, 0)
for key in patchDict.keys():
uncertainfig[key] = len(set(patchDict[key])) * 2
self.print_n_show(uncertainfig, 'img', npmax+1)
return
def show_silhouette_score(self, k_list, primaryTC=0):
X = None
for key in list(self.tc[primaryTC].keys())[:100]:
tempSequence = self.tc[primaryTC].as_n_sample_4D(self.timesteps,
in_walk_dir=True,
keys=False,
moving_window=True,
for_track=key)
C = np.zeros((len(tempSequence), self.cD * self.classes if self.mode == 'gumble' else self.classes))
for i in range(0, len(tempSequence), 100):
c = self.encoder([tempSequence[i:i + 100]])[0]
C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes)
if isinstance(X, np.ndarray):
X = np.row_stack((X, C))
else:
X = C
for n_clusters in k_list:
# Create a subplot with 1 row and 2 columns
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(18, 7)
# The 1st subplot is the silhouette plot
# The silhouette coefficient can range from -1, 1 but in this example all
# lie within [-0.1, 1]
ax1.set_xlim([-0.1, 1])
# The (n_clusters+1)*10 is for inserting blank space between silhouette
# plots of individual clusters, to demarcate them clearly.
ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10])
# Initialize the clusterer with n_clusters value and a random generator
# seed of 10 for reproducibility.
clusterer = KMeans(n_clusters=n_clusters, random_state=10)
cluster_labels = clusterer.fit_predict(X)
# The silhouette_score gives the average value for all the samples.
# This gives a perspective into the density and separation of the formed
# clusters
silhouette_avg = silhouette_score(X, cluster_labels)
print("For n_clusters =", n_clusters,
"The average silhouette_score is :", silhouette_avg)
# Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(X, cluster_labels)
y_lower = 10
for i in range(n_clusters):
# Aggregate the silhouette scores for samples belonging to
# cluster i, and sort them
ith_cluster_silhouette_values = \
sample_silhouette_values[cluster_labels == i]
ith_cluster_silhouette_values.sort()
size_cluster_i = ith_cluster_silhouette_values.shape[0]
y_upper = y_lower + size_cluster_i
color = cm.spectral(float(i) / n_clusters)
ax1.fill_betweenx(np.arange(y_lower, y_upper),
0, ith_cluster_silhouette_values,
facecolor=color, edgecolor=color, alpha=0.7)
# Label the silhouette plots with their cluster numbers at the middle
ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
# Compute the new y_lower for next plot
y_lower = y_upper + 10 # 10 for the 0 samples
ax1.set_title("The silhouette plot for the various clusters.")
ax1.set_xlabel("The silhouette coefficient values")
ax1.set_ylabel("Cluster label")
# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
ax1.set_yticks([]) # Clear the yaxis labels / ticks
ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
# 2nd Plot showing the actual clusters formed
colors = cm.spectral(cluster_labels.astype(float) / n_clusters)
ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7,
c=colors)
# Labeling the clusters
centers = clusterer.cluster_centers_
# Draw white circles at cluster centers
ax2.scatter(centers[:, 0], centers[:, 1],
marker='o', c="white", alpha=1, s=200)
for i, c in enumerate(centers):
ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50)
ax2.set_title("The visualization of the clustered data.")
ax2.set_xlabel("Feature space for the 1st feature")
ax2.set_ylabel("Feature space for the 2nd feature")
plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
"with n_clusters = %d" % n_clusters),
fontsize=14, fontweight='bold')
plt.show()

146
VAETraining.py Normal file
View File

@ -0,0 +1,146 @@
from keras.layers import (Input, TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, MaxPooling2D,
Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda)
from keras.models import Model, Sequential
from keras import backend as K
from keras.metrics import binary_crossentropy
import numpy as np
import pickle
from math import sqrt
from Trainer import Trainer
def get_batch(X, size):
a = np.random.choice(len(X), size, replace=False)
return X[a]
def load_preprocesseed_data(filename):
if not filename.endswith('.pik'):
raise TypeError('input File needs to be a Pickle object ".pik"!')
with open(filename, 'rb') as f:
data = pickle.load(f)
return data
# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15
# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf
def slerp(val, low, high):
"""Spherical interpolation. val has a range of 0 to 1."""
if val <= 0:
return low
elif val >= 1:
return high
elif np.allclose(low, high):
return low
# noinspection PyTypeChecker
omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15))
so = np.sin(omega)
re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high
return re * np.random.rand(low.shape[-1])
if __name__ == '__main__':
K.set_image_dim_ordering('tf')
# Data PreProcessing
trackCollection = load_preprocesseed_data('test_track.pik') # Tate_10000
'''HERE IS THE TRAINING!!!!!'''
T = Trainer('vae', trackCollection, 10, categorical_distribution=0,
batchSize=1, timesteps=5, filters=10) # BatchSize: 400 for Training / 1 for Testing
# PreStage 1: Encoder Input
enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input')
# Stage 1: Encoding
enc_seq = Sequential(name='Encoder')
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1),
name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1)))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1'))
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1),
name='Conv2'))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2'))
enc_seq.add(TimeDistributed(Flatten(), name='Flatten'))
enc_seq.add(LSTM(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode'))
encoding = enc_seq(enc_input)
# Stage 2: Bottleneck
out_z_mean = Dense(T.classes, name='Dense_Mean')(encoding)
out_z_log_var = Dense(T.classes, name='Std_Dev')(encoding)
def sampling(args, batch_size=500, classes=3, epsilon_std=0.01):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, classes), mean=0., stddev=epsilon_std)
return z_mean + K.exp(z_log_var / 2) * epsilon
z = Lambda(sampling, name='Sampling', arguments={'batch_size': T.batchSize,
'classes': T.classes,
'epsilon_std': 0.01})([out_z_mean, out_z_log_var])
# Stage 3: Decoding
dec_seq = Sequential(name='Decoder')
dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,)))
dec_seq.add(LSTM(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='LSTM_Decode'))
reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters))
dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1),
name='DeConv1'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1),
name='DeConv2'))
dec_output = dec_seq(z)
# Loss function minimized by autoencoder
def vae_objective(true, pred):
true = K.reshape(true, (-1, T.original_dim)) # !
pred = K.reshape(pred, (-1, T.original_dim)) # !
loss = binary_crossentropy(true, pred)
kl_regu = -.5 * K.sum(1. + out_z_log_var - K.square(
out_z_mean) - K.exp(out_z_log_var), axis=-1)
return loss + kl_regu
# Model
T.set_model(Model(inputs=enc_input, outputs=dec_output), vae_objective)
# Separate encoder from input to latent space
encoder = K.function([enc_input], [out_z_mean])
T.set_encoder(encoder)
# Generatorfrom latent to input space
decoder_input = Input(shape=(T.classes,))
decoder_output = dec_seq(decoder_input)
T.set_generator(Model(inputs=decoder_input, outputs=decoder_output))
if False:
T.train('VAEweights')
T.save_weights('VAEweights')
if True:
T.load_weights('VAEweights')
if False:
T.plot_model('vae.png', show_shapes=True, show_layer_names=True)
if False:
T.color_track(trackCollection[list(trackCollection.keys())[2200]],
nClusters=10, cMode='kmeans', aMode='None') # 2600
# T.color_random_track(completeSequence=False, nClusters=10, cMode='kmeans', aMode='None')
if False:
T.show_prediction(200)
if False:
T.sample_latent(200)
if False:
T.multi_path_coloring(10, fileName='', state='')
if True:
T.show_silhouette_score([120])

BIN
VAE_Gumble_Softmax.pdf Normal file

Binary file not shown.

159
VAEtate.py Normal file
View File

@ -0,0 +1,159 @@
from keras.layers import (Input, TimeDistributed, Dense, GRU, UpSampling2D, RepeatVector, MaxPooling2D,
Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda)
from keras.models import Model, Sequential
from keras import backend as K
from keras.metrics import binary_crossentropy
import numpy as np
import pickle
from math import sqrt
from Trainer import Trainer
def get_batch(X, size):
a = np.random.choice(len(X), size, replace=False)
return X[a]
def load_preprocesseed_data(filename):
"""
:param filename: The Filename of the pikled Dataset to lod
:type filename: str
:return: Trackcollection
:rtype: tools.TrackCollection
"""
if not filename.endswith('.pik'):
raise TypeError('input File needs to be a Pickle object ".pik"!')
with open(filename, 'rb') as f:
data = pickle.load(f)
return data
# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15
# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf
def slerp(val, low, high):
"""Spherical interpolation. val has a range of 0 to 1."""
if val <= 0:
return low
elif val >= 1:
return high
elif np.allclose(low, high):
return low
# noinspection PyTypeChecker
omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15))
so = np.sin(omega)
re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high
return re * np.random.rand(low.shape[-1])
if __name__ == '__main__':
K.set_image_dim_ordering('tf')
# Data PreProcessing
tate = load_preprocesseed_data('Tate_1000.pik')
maze = load_preprocesseed_data('Maze_1000.pik')
oet = load_preprocesseed_data('Oet_1000.pik')
tum = load_preprocesseed_data('Tum_1000.pik')
doom = load_preprocesseed_data('Doom_1000.pik')
priz = load_preprocesseed_data('Priz_1000.pik')
cross = load_preprocesseed_data('crossing.pik')
'''HERE IS THE TRAINING!!!!!'''
# T = Trainer('vae', [maze, tate, oet, tum, doom, priz], 10, categorical_distribution=0,
T = Trainer('vae', [tate, maze, oet, tum, doom, priz], 10, categorical_distribution=0,
batchSize=1, timesteps=9, filters=0, rotating=True) # BatchSize: 1600 for Training / 1 for Testing
# PreStage 1: Encoder Input
enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input')
# Stage 1: Encoding
enc_seq = Sequential(name='Encoder')
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1),
name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1)))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1'))
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1),
name='Conv2'))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2'))
enc_seq.add(TimeDistributed(Flatten(), name='Flatten'))
enc_seq.add(GRU(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='GRU_Encode'))
encoding = enc_seq(enc_input)
# Stage 2: Bottleneck
out_z_mean = Dense(T.classes, name='Dense_Mean')(encoding)
out_z_log_var = Dense(T.classes, name='Std_Dev')(encoding)
def sampling(args, batch_size=500, classes=3, epsilon_std=0.01):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, classes), mean=0., stddev=epsilon_std)
return z_mean + K.exp(z_log_var / 2) * epsilon
z = Lambda(sampling, name='Sampling', arguments={'batch_size': T.batchSize,
'classes': T.classes,
'epsilon_std': 0.01})([out_z_mean, out_z_log_var])
# Stage 3: Decoding
dec_seq = Sequential(name='Decoder')
dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,)))
dec_seq.add(GRU(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='GRU_Decode'))
reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters))
dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters, kernel_size=(4, 4), strides=1),
name='DeConv1'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1),
name='DeConv2'))
dec_output = dec_seq(z)
# Loss function minimized by autoencoder
def vae_objective(true, pred):
true = K.reshape(true, (-1, T.original_dim)) # !
pred = K.reshape(pred, (-1, T.original_dim)) # !
loss = binary_crossentropy(true, pred)
kl_regu = -.5 * K.sum(1. + out_z_log_var - K.square(
out_z_mean) - K.exp(out_z_log_var), axis=-1)
return loss + kl_regu
# Model
T.set_model(Model(inputs=enc_input, outputs=dec_output), vae_objective)
# Separate encoder from input to latent space
encoder = K.function([enc_input], [out_z_mean])
T.set_encoder(encoder)
# Generatorfrom latent to input space
decoder_input = Input(shape=(T.classes,))
decoder_output = dec_seq(decoder_input)
T.set_generator(Model(inputs=decoder_input, outputs=decoder_output))
if False:
# T.load_weights('VAEall_1k_9t_GRU')
T.train('VAEall_1k_9t_GRU')
T.save_weights('VAEall_1k_9t_GRU')
if True:
T.load_weights('VAEall_1k_9t_GRU')
if False:
T.plot_model('vae.png', show_shapes=True, show_layer_names=True)
if False:
T.color_track(cross[list(cross.keys())[0]], completeSequence=True,
nClusters=10, cMode='kmeans', aMode='None') # 2600 for the 10k dataset
# T.color_random_track(completeSequence=False, nClusters=10, cMode='kmeans', aMode='None')
if False:
T.show_prediction(200)
if False:
T.sample_latent(200)
if True:
T.multi_path_coloring(10, fileName='all', state='dump', primaryTC=-2, uncertainty=True)

135
VaeGumble.py Normal file
View File

@ -0,0 +1,135 @@
from keras.layers import (Input, TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, MaxPooling2D,
Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda)
from keras.models import Model, Sequential
from keras import backend as K
from keras.metrics import binary_crossentropy
from keras.activations import softmax
import numpy as np
import pickle
from math import sqrt
from Trainer import Trainer
def get_batch(X, size):
a = np.random.choice(len(X), size, replace=False)
return X[a]
def load_preprocesseed_data(filename):
if not filename.endswith('.pik'):
raise TypeError('input File needs to be a Pickle object ".pik"!')
with open(filename, 'rb') as f:
data = pickle.load(f)
return data
if __name__ == '__main__':
K.set_image_dim_ordering('tf')
'''HERE IS THE TRAINING!!!!!'''
# Paper From https://github.com/nzw0301/keras-examples/blob/master/gumbel_softmax_vae_MNIST.ipynb
# https://arxiv.org/pdf/1611.01144.pdf
# Data PreProcessing, keep the Batchsize Shmall because of Small memory 500 Should do, rerun the fitting!
trackCollection = load_preprocesseed_data('test_track.pik')
T = Trainer('gumble', trackCollection, 10, 30, filters=10)
# PreStage 1: Encoder Input
enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='main_input')
# Stage 1: Encoding
enc_seq = Sequential(name='Encoder')
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1),
name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1)))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1'))
enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1),
name='Conv2'))
enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2'))
enc_seq.add(TimeDistributed(Flatten(), name='Flatten'))
enc_seq.add(LSTM(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode'))
encoding = enc_seq(enc_input)
# Stage 2: Bottleneck
logits_y = Dense(T.classes * T.cD)(encoding) # activation='softmax' ICh denke nicht
# Sampling Function
def sampling(logits):
U = K.random_uniform(K.shape(logits), 0, 1)
y = logits - K.log(-K.log(U + 1e-20) + 1e-20) # logits + gumbel noise
y = softmax(K.reshape(y, (-1, T.cD, T.classes)) / T.tau)
y = K.reshape(y, (-1, T.cD * T.classes))
return y
z = Lambda(sampling,)(logits_y)
# Stage 3: Decoding
dec_seq = Sequential(name='Decoder')
dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes * T.cD,)))
dec_seq.add(LSTM(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='LSTM_Decode'))
reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters))
dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1),
name='DeConv1'))
dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2'))
dec_seq.add(TimeDistributed(Deconvolution2D(activation='hard_sigmoid', filters=1, kernel_size=(5, 5), strides=1),
name='DeConv2'))
dec_output = dec_seq(z)
# Gumble Loss Function
def gumbel_loss(x, x_hat):
# N = T.cD; M = T.classes
q_y = K.reshape(logits_y, (-1, T.cD, T.classes))
q_y = softmax(q_y)
log_q_y = K.log(q_y + 1e-20)
kl_tmp = q_y * (log_q_y - K.log(1.0 / T.classes))
KL = K.sum(kl_tmp, axis=(1, 2))
x = K.reshape(x, (-1, T.original_dim)) # !
x_hat = K.reshape(x_hat, (-1, T.original_dim)) # !
elbo = T.original_dim * binary_crossentropy(x, x_hat) - KL
return elbo
T.set_model(Model(inputs=enc_input, outputs=dec_output), gumbel_loss, optimizer='adagrad')
# Generatorfrom latent to input space
decoder_input = Input(shape=(T.classes * T.cD,))
decoder_output = dec_seq(decoder_input)
T.set_generator(Model(inputs=decoder_input, outputs=decoder_output))
# Separate encoder from input to latent space
argmax_y = K.max(K.reshape(logits_y, (-1, T.cD, T.classes)), axis=-1, keepdims=True)
argmax_y = K.equal(K.reshape(logits_y, (-1, T.cD, T.classes)), argmax_y)
encoder = K.function([enc_input], [argmax_y])
T.set_encoder(encoder)
if False:
T.train('GumbleLSTMWeights')
T.save_weights('GumbleLSTMWeights')
if True:
T.load_weights('GumbleWeights')
if False:
T.plot_model('GumbleLSTM.png', show_shapes=True, show_layer_names=True)
if False:
# T.color_track(trackCollection[list(trackCollection.keys())[2200]], nClusters=4) # 2600
T.color_random_track(completeSequence=False, nClusters=4)
if False:
T.show_prediction(200, startI=200)
if False:
T.sample_latent(200)
if False:
T.multi_path_coloring(10)
if True:
T.show_silhouette_score([2,4,6,8,10,12,14])

Binary file not shown.

107
indoor_Navigation_master.py Normal file
View File

@ -0,0 +1,107 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division, absolute_import
from PIL import Image
import numpy as np
from pylab import imshow, savefig # , show, get_cmap
from tools import Worker, Isovist, TrackCollection, IndoorToolset # , Track, IsovistCollection
# import tensorflow as tf
if __name__ == '__main__':
walkableTiles = 255
w = Worker()
# file = 'maps\Tate.bmp'
# file = 'maps\Map.bmp'
# file = 'maps\Maze.bmp'
# file = 'maps\oet.bmp'
# file = 'maps\doom.bmp'
# file = 'maps\priz.bmp'
# file = 'maps\\tum.bmp'
file = 'maps\crossing.bmp'
with Image.open(file) as f:
baseToolset = IndoorToolset(np.array(f), walkableTiles, worker=w, isoVistSize=30)
baseToolset.refresh_random_clock()
"""HEATMAP, Random"""
# Heatmap with random starts/targets
if False:
pCol = TrackCollection(baseToolset, worker=w)
pCol.add_n_bunch_random(100, penalty=2, safe=True)
pCol.show(graphUpdate=True, saveIMG=True)
"""Homotroph Analysis"""
# Homotrphic Colored Pixels with same Starts/Targets, vertices iterating over all walkable pixels.
if False:
st = baseToolset.getRandomPos()
baseToolset.refresh_random_clock()
ta = baseToolset.getRandomPos()
pCol = TrackCollection(baseToolset)
pCol.add_n_bunch_tracks('all', st, ta)
pCol.homotopic_classification()
pCol.show(hClass=True, saveIMG=True)
"""Principal Convex Hull Analysis (PCHA) / Archetypal Analysis."""
# refer to https://github.com/ulfaslak/py_pcha for further Information and original Editor
if False:
if True:
st = baseToolset.getRandomPos()
baseToolset.refresh_random_clock()
ta = baseToolset.getRandomPos()
if False: # NOTE: use this with Maze.bmp!!!!!!
st = (98, 6)
ta = (98, 195)
if False: # NOTE: use this with Map.bmp!!!!!!
st = (15, 15)
ta = (190, 280)
pCol = TrackCollection(baseToolset)
# ToDo: Print the k value as chart
# noinspection PyUnboundLocalVariable
pCol.add_n_bunch_tracks(20, st, ta, penalty=2)
pCol.homotopic_classification()
if False:
ArchetypeList = pCol.return_archetypes(2)
pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True)
ArchetypeList = pCol.return_archetypes(4)
pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True)
ArchetypeList = pCol.return_archetypes(5)
pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True)
ArchetypeList = pCol.return_archetypes(3)
pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True)
"""Isovists Testing"""
# Some Explanation text here
if False:
point = baseToolset.getRandomPos()
print(point)
i = Isovist(*point, array=baseToolset.imgArray, walkable=walkableTiles, rangeLimit=30)
i.saveImg()
baseToolset.imgArray[point] = 160
imshow(baseToolset.imgArray)
savefig('startpos.tif', interpolation='none', cmap='gray')
"""Synthesyse n-bunch Tracks/ minimum length / storage for TensorFlow"""
# Some Explenation text here
if True:
pCol = TrackCollection(baseToolset)
pCol.add_single_track((67, 103), (67, 64), qhull=False)
pCol.add_single_track((47, 82), (88, 82), qhull=False)
#pCol.add_n_bunch_random(
# 1000, penalty=None, safe=False,
# minLen=50
# # minLen=int(sqrt(baseToolset.width * baseToolset.height) / 4)
#)
pCol.save_to_disc('crossing')
# pCol3 = TrackCollection(baseToolset)
# pCol3.recover_from_disc('synthetic_tracks_sizeDIV4')
print('Success!')
pass
pass

BIN
maps/Map.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

BIN
maps/Maze.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

BIN
maps/Tate.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 102 KiB

BIN
maps/crossing.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
maps/doom.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

BIN
maps/moep.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

BIN
maps/oet.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 248 KiB

BIN
maps/priz.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 198 KiB

BIN
maps/tum.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 159 KiB

117
refinedTraining.py Normal file
View File

@ -0,0 +1,117 @@
from keras.layers import TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, \
MaxPooling2D, Convolution2D, Deconvolution2D, Flatten, Reshape, Input
from keras.models import Sequential, Model
from keras import backend as K
import numpy as np
import pickle
from math import sqrt
from Trainer import Trainer
def get_batch(X, size):
a = np.random.choice(len(X), size, replace=False)
return X[a]
def load_preprocesseed_data(filename):
if not filename.endswith('.pik'):
raise TypeError('input File needs to be a Pickle object ".pik"!')
with open(filename, 'rb') as f:
data = pickle.load(f)
return data
# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15
# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf
def slerp(val, low, high):
"""Spherical interpolation. val has a range of 0 to 1."""
if val <= 0:
return low
elif val >= 1:
return high
elif np.allclose(low, high):
return low
# noinspection PyTypeChecker
omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15))
so = np.sin(omega)
re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high
return re * np.random.rand(low.shape[-1])
if __name__ == '__main__':
'''HERE IS THE TRAINING!!!!!'''
trackCollection = load_preprocesseed_data('test_track.pik')
K.set_image_dim_ordering('tf')
T = Trainer('refined', trackCollection, 10, categorical_distribution=0, batchSize=400, filters=10)
# PreStage 1: Encoder Input
enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input')
enc = Sequential()
enc.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1,
name='Conv1'), input_shape=(T.timesteps, T.width, T.height, 1)))
enc.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2, name='MaxPool1')))
enc.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1,
name='Conv2')))
enc.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2, name='MaxPool2')))
enc.add(TimeDistributed(Flatten(name='Flatten')))
enc.add(LSTM(int(enc.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode'))
encoding = enc(enc_input)
# Stage 2: Bottleneck
z = Dense(T.classes, activation='softmax', name='Clustering')(encoding)
#
dec = Sequential()
dec.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,)))
dec.add(LSTM(enc.layers[-2].output_shape[-1], return_sequences=True, name='LSTM_Decode'))
reValue = int(sqrt(dec.layers[-1].output_shape[-1]//T.filters))
dec.add(TimeDistributed(Reshape((reValue, reValue, T.filters))))
dec.add(TimeDistributed(UpSampling2D(2, name='Up1')))
dec.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1,
name='DeConv1')))
dec.add(TimeDistributed(UpSampling2D(2, name='Up2')))
dec.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1,
name='DeConv2')))
dec_output = dec(z)
T.set_model(Model(inputs=enc_input, outputs=dec_output), optimizer='adagrad', loss='binary_crossentropy')
decoder_input = Input(shape=(T.classes,))
decoded = dec(decoder_input)
T.set_generator(Model(inputs=decoder_input, outputs=decoded))
encoder = K.Function([enc_input], [z])
T.set_encoder(encoder)
if False:
T.train('refinedWeights')
T.save_weights('refinedWeights')
if True:
T.load_weights('refinedWeights')
if False:
T.plot_model('refined.png', show_shapes=True, show_layer_names=True)
if False:
# T.color_track(trackCollection[list(trackCollection.keys())[2200]]) # 2600
T.color_track(trackCollection[list(trackCollection.keys())[2200]], nClusters=4) # 2600
# T.color_random_track(completeSequence=True)
if False:
T.show_prediction(200)
if False:
T.sample_latent(200)
if False:
T.multi_path_coloring(10)
if True:
T.show_silhouette_score([120])

986
tools.py Normal file
View File

@ -0,0 +1,986 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import division, absolute_import
import pickle
from multiprocessing import Pool
import networkx as nx
from PIL import Image, ImageDraw
from math import sqrt, hypot, degrees, atan2
import random
import numpy as np
import time
from pylab import imshow, show, get_cmap, savefig
from collections import UserList, UserDict
import scipy.spatial as sp
from scipy import ndimage
from PCHA import PCHA
# from py_pcha.PCHA import PCHA
from operator import itemgetter
from dtw import dtw
workercount = 4
class Worker(object):
def __init__(self, n=workercount):
self.pool = Pool(processes=n)
self.timer = time.clock()
def do_work_onClass(self, itemList, taskName, *args):
results = []
for item in itemList:
task = getattr(item, taskName)
results.append(self.pool.apply_async(task, args=(*args,)))
self.pool.close()
self.pool.join()
return [r.get() for r in results]
def do_work(self, itemList, task):
results = []
for item in itemList:
results.append((self.pool.apply_async(task, args=(item, ))))
self.pool.close()
self.pool.join()
return [r.get() for r in results]
def do_2_work(self, itemList1, itemList2, task):
if len(itemList1) != len(itemList2):
raise ValueError('ItemLists need to be of same shape!')
results = []
for item1, item2 in itemList1, itemList2:
results.append((self.pool.apply_async(task, args=(item1, item2, ))))
self.pool.close()
self.pool.join()
return [r.get() for r in results]
def init_many(self, classObject, argsList):
results = self.pool.starmap(classObject, argsList)
self.pool.close()
self.pool.join()
return results
class IsovistCollection(UserDict):
def __init__(self, walkable, rangeLimit, tileArray, worker=None):
super(IsovistCollection, self).__init__()
if not isinstance(worker, Worker):
raise TypeError
self.data = dict()
self.walkable = walkable
self.tileArray = tileArray
self.rangeLimit = rangeLimit
self.lfr = None
if isinstance(self.tileArray, np.ndarray):
workerResult = worker.init_many(
Isovist, [(*npIdx, self.tileArray, self.walkable, self.rangeLimit)
for npIdx, value in np.ndenumerate(self.tileArray) if value == self.walkable])
self.data = {isovist.vertex: isovist for isovist in workerResult}
# The following would be a non multithreaded approach, maybe activate it for smaller blueprints later
# TODO: Activate this for smaller Blueprints, when multithreading would lead to overhead
# for ndIndex, value in np.ndenumerate(self.tileArray):
# if value == self.walkable:
# self.addIsovist(*ndIndex)
else:
pass
# TODO Nachträglich mehrere Isovisten durch multiprozesse hinzufügen
def add_isovist(self, x, y):
"""
Generate and add Isovist for specif coordinate.
:param x: X-Coordinate
:type x: int
:param y: Y-Coordinate
:type y: int
:return: Just a boolean as control-function.
:rtype: bool
"""
self[(x, y)] = (Isovist(x, y, self.tileArray, self.walkable, self.rangeLimit))
return True
@staticmethod
def get_angle(point, target):
"""
Calculate the angle between two points in degrees
https://stackoverflow.com/questions/9970281/java-calculating-the-angle-between-two-points-in-degrees
:param point: The point from where to measure.
:type point: (int, int)
:param target: The point to where to measure.
:type target: (int, int)
:return: Angle between two points in degree.
:rtype: int
"""
angle = degrees(atan2(target[1] - point[1], target[0] - point[0]))
if angle < 0:
angle += 360
return int(angle)
def rotateIsovist(self, isovist, prev, curr):
"""
Rotate an numpy-array or Isovist class object regarding the angle between the two points.
How-to-rotate-a-matrix-by-45-degrees:
https://math.stackexchange.com/questions/732679/how-to-rotate-a-matrix-by-45-degrees
https://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.ndimage.interpolation.rotate.html
:param isovist: Numpy array or Isovist class object.
:type isovist: np.ndarray or Isovist
:param prev: T-1 point comming from.
:type prev: (int, int)
:param curr: T-0 Point currently standing on.
:type curr: (int, int)
:return:
:rtype: np.ndarray or Isovist
"""
if isinstance(isovist, Isovist):
array = isovist.visArray
elif isinstance(isovist, np.ndarray):
array = isovist
else:
raise TypeError('Must be either np.ndarray or Isovist class object, but was: ', type(isovist))
# Calculate how many times to rotate by 90°
cur_angle = self.get_angle(prev, curr)
if self.lfr is not None:
if abs(self.lfr - cur_angle) == 45:
rotation = abs(self.lfr // 90)
else:
self.lfr = cur_angle
rotation = abs(cur_angle // 90)
else:
self.lfr = cur_angle
rotation = abs(cur_angle // 90)
if isinstance(isovist, Isovist):
isovist.visArray = np.rot90(array, rotation)
return isovist
else:
return np.rot90(array, rotation)
def get_items_for_track(self, track, dim='flat', in_walk_dir=False):
"""
:param track: Track class Object or list of int holding path coordinates.
:type track: Track or list of (int, int)
:param dim: Either 'flat' for a flattened numpy array or 'full' for all dimensions.
:type dim: str
:param in_walk_dir: Determine if the Isovist should rotate in walking direktion.
:type in_walk_dir: bool
:return: An array of Isovist arrays, one for each coordinate in track.
:rtype: np.ndarray
"""
if dim not in ['flat', 'full']:
raise ValueError('Dim can either be "flat" or "full".')
if not isinstance(track, Track):
raise TypeError('Please provide a Track object')
if not in_walk_dir:
if dim == 'flat':
return np.dstack([self[point].get_1D_array() for point in track])
else:
return np.dstack([self[point].visArray for point in track])
elif in_walk_dir and dim == 'full':
self.lfr = None
return np.dstack([self.rotateIsovist(self[currP].visArray, prevP, currP)
for prevP, currP in zip(track[:-1], track[1:])])
else:
raise ValueError('For a rotation in walking direction, please choose "full" output-mode.')
# [nb_samples, nb_frames, width, height, channels]
class Isovist(object):
def __init__(self, x, y, array, walkable, rangeLimit):
"""
"Calculate lit squares from the given location and radius through 'Shadow Casting'"
Source:
http://www.roguebasin.com/index.php?title=FOV_using_recursive_shadowcasting
http://www.roguebasin.com/index.php?title=PythonShadowcastingImplementation
:param x: y-part of the center coordinate from where the 'light' travels
:type x: int
:param y: X-part of the center coordinate from where the 'light' travels
:type y: int
:param array: Numpy Array holding the background image
:type array: np.ndarray
:param walkable: The value which identifies positions in the array through which light can travel
:type walkable: int or (int, int, int)
:param rangeLimit: Determine the radius in which pixels of the shadow needs to be calculated
:type rangeLimit: int
"""
mult = [[1, 0, 0, -1, -1, 0, 0, 1],
[0, 1, -1, 0, 0, -1, 1, 0],
[0, 1, 1, 0, 0, -1, -1, 0],
[1, 0, 0, 1, -1, 0, 0, -1]]
self.x = x
self.y = y
self.vertex = (self.x, self.y)
self.rangeLimit = rangeLimit if rangeLimit else int(sqrt(array.shape[0] * array.shape[1]))
self.visArray = np.zeros(array.shape, dtype=bool)
for octant in range(8):
self.__cast_light(self.x, self.y, 1, 1.0, 0.0, self.rangeLimit,
mult[0][octant], mult[1][octant],
mult[2][octant], mult[3][octant], 0, array, walkable)
offset = int(rangeLimit/2)
# self.visArray = self.visArray[
# max(int(x-self.rangeLimit), 0):min(self.visArray.shape[0], int(x+self.rangeLimit)),
# max(int(y-self.rangeLimit), 0):min(self.visArray.shape[1], int(y+self.rangeLimit))]
self.visArray = np.pad(self.visArray, ((offset, offset), (offset, offset)),
mode='constant')[self.x:self.x+rangeLimit, self.y:self.y+rangeLimit]
self.size = np.sum(self.visArray)
centroid = ndimage.measurements.center_of_mass(self.visArray.astype(int))
# centroid = np.average(self.visArray[:,:2], axis=0, weights=self.visArray[:,2]) # TODO: Baue eine np.Lösung
# https://stackoverflow.com/questions/29356825/python-calculate-center-of-mass
self.Xcent = centroid[0]
self.Ycent = centroid[1]
@staticmethod
def __blocksLight(x, y, array, walkable):
if x < 0 or y < 0:
return True
try:
return False if array[x, y] == walkable else True
except IndexError:
return True
def __setVisible(self, x, y):
if x > 0 or y > 0:
try:
self.visArray[x, y] = True
return
except IndexError:
return
def __isVisible(self, x, y):
return self.visArray[x, y]
def __cast_light(self, cx, cy, row, start, end, radius, xx, xy, yx, yy, idx, array, walkable):
"""Recursive lightcasting function"""
if start < end:
return
radius_squared = radius * radius
for j in range(row, radius + 1):
dx, dy = -j - 1, -j
blocked = False
while dx <= 0:
dx += 1
# Translate the dx, dy coordinates into map coordinates:
X, Y = cx + dx * xx + dy * xy, cy + dx * yx + dy * yy
# l_slope and r_slope store the slopes of the left and right
# extremities of the square we're considering:
l_slope, r_slope = (dx - 0.5) / (dy + 0.5), (dx + 0.5) / (dy - 0.5)
if start < r_slope:
continue
elif end > l_slope:
break
else:
# Our light beam is touching this square; light it:
if dx * dx + dy * dy < radius_squared:
self.__setVisible(X, Y)
if blocked:
# we're scanning a row of blocked squares:
if self.__blocksLight(X, Y, array, walkable):
new_start = r_slope
continue
else:
blocked = False
start = new_start
else:
if self.__blocksLight(X, Y, array, walkable) and j < radius:
# This is a blocking square, start a child scan:
blocked = True
self.__cast_light(cx, cy, j + 1, start, l_slope,
radius, xx, xy, yx, yy, idx + 1, array, walkable)
new_start = r_slope
# Row is scanned; do next row unless last square was blocked:
if blocked:
break
def saveImg(self, filename='Isovist.tif'):
filename = filename if filename.endswith('.tif') else '%s.tif' % filename
imshow(self.visArray, interpolation='none', cmap='gray')
savefig(filename)
def get_1D_array(self):
return self.visArray.ravel()
class TrackCollection(UserDict):
def __init__(self, indoorToolset, worker=None):
"""
:param indoorToolset: An indoorToolset with baseMap holding cell values
:type indoorToolset: indoor_Toolset
:rtype: TrackCollection
"""
if not isinstance(indoorToolset, IndoorToolset):
raise TypeError
super(UserDict, self).__init__()
self.data = dict()
self.map = indoorToolset
self.hDict = dict()
self.archeArray = None
self.maxLen = self.__update_maxLen()
self.worker = worker if worker else Worker(n=workercount)
def __update_maxLen(self):
if self:
return max([len(self[track]) for track in self.keys()])
else:
return 0
def homotopic_classification(self):
print('Homotopic Classification started!')
baseArray = self.map.imgArray.astype(bool).astype(int)
massNames = list(self.keys())
for track in massNames.copy():
found = False
for key in reversed(list(self.hDict.keys())):
if self[key].isHomotopic(self[track], self.map, baseArray=baseArray):
self.hDict[key].append(track)
self[track].group = key
found = True
break
if not found:
self.hDict[track] = [track]
self[track].group = track
print('All Done\n%i Classes could be observed.' % len(self.hDict))
return True
def add_single_track(self, start, target, penalty=None, qhull=True):
track = self.map.calculate_path(start, target, penalty=penalty, qhull=qhull)
key = self.__find_list_middle(track)
track.vertex = key
self[key] = track
return True
def add_n_bunch_tracks(self, n, start, target, nbunch=None, penalty=None):
def build_track(segment1, segment2):
combined = list()
combined.extend(segment1 + list(reversed(segment2)))
return Track(combined, self.map.walkableTile)
if isinstance(penalty, int) or isinstance(penalty, float):
for i in range(n):
track = self.map.calculate_path(start, target, penalty=penalty)
key = self.__find_list_middle(track)
track.vertex = key
self[key] = track
else:
if nbunch and not n:
if isinstance(nbunch, list):
if all([isinstance(track, Track) for track in nbunch]):
for i, track in enumerate(nbunch):
key = self.__find_list_middle(track)
track.vertex = key
self[key] = track
n = i
else:
singleSourceDij_S = nx.single_source_dijkstra_path(self.map.graph, start, weight='weight')
print('Start-Tree Created!')
singleSourceDij_T = nx.single_source_dijkstra_path(self.map.graph, target, weight='weight')
print('Target-Tree Created!')
if isinstance(n, str):
if n == 'all':
n = len(self.map.graph)
if n > 10:
allNodes = self.map.graph.nodes()
[allNodes.remove(p) for p in [start, target]]
if len(allNodes) > n:
rand = random.Random()
while len(allNodes) > n:
rand.seed(time.clock())
allNodes.remove(rand.randrange(len(allNodes)))
allTracks = self.worker.do_2_work(
[singleSourceDij_S[point] for point in allNodes if
singleSourceDij_S.get(point, None) and singleSourceDij_T.get(point, None)],
[singleSourceDij_T[point] for point in allNodes if
singleSourceDij_S.get(point, None) and singleSourceDij_T.get(point, None)],
build_track)
for track in allTracks:
self[track.vertex] = track
else:
for i in range(n):
point = self.map.getRandomPos()
segmentS = singleSourceDij_S[point]
segmentT = singleSourceDij_T[point]
self[point] = build_track(segmentS, segmentT)
self[point].vertex = point
self.maxLen = self.__update_maxLen()
print('%i tracks added!!' % n)
return True
@staticmethod
def __find_list_middle(input_list):
middle = float(len(input_list)) / 2
if middle % 2 != 0:
return input_list[int(middle - .5)]
else:
return input_list[int(middle - 1)]
def add_n_bunch_random(self, n, penalty=None, safe=True, minLen=0):
"""
:type n: int
:type penalty: float or int or None
:type safe: bool
:type minLen: int
:rtype: bool
"""
if not isinstance(n, int) or not isinstance(minLen, int):
raise TypeError
if n >= 50:
results = self.worker.do_work([None] * n, getattr(self.map, 'return_random_path'))
# TODO: Hier geht es weiter --> Pool object in self.map class maybe change the multiprocess call
for track in results:
while minLen and len(track) <= minLen:
print('Was too Small..')
track = self.map.return_random_path(penalty=penalty, safe=safe)
mid = self.__find_list_middle(track)
track.vertex = mid
self[mid] = track
else:
for i in range(n):
track = self.map.return_random_path(penalty=penalty, safe=safe)
while minLen and len(track) <= minLen:
track = self.map.return_random_path(penalty=penalty, safe=safe)
mid = self.__find_list_middle(track)
track.vertex = mid
self[mid] = track
self.maxLen = self.__update_maxLen()
print('returning %i Tracks' % n)
return True
def show(self, graphUpdate=False, saveIMG=False, hClass=False, trackList=None, allTracks=False):
"""
:param graphUpdate: Determine whether Node Values in Connected Graph are used.
:type graphUpdate: bool
:param saveIMG: Additionally save it as ".tif"-File
:type saveIMG: bool
:param hClass: Draw the hClasses
:type hClass: bool
:param trackList: None or list
:param allTracks: Show all Tracks grey in Background
:type allTracks: bool
:return : Show or Print the Bitmap
:rtype : None
"""
if allTracks:
allTracks = self.values()
if hClass:
self.map.show(self.hDict, hClass=True, graphUpdate=graphUpdate, saveIMG=saveIMG,
trackList=trackList, allTracks=allTracks)
else:
self.map.show(graphUpdate=graphUpdate, saveIMG=saveIMG, trackList=trackList, allTracks=allTracks)
def fill_archeArray(self):
idxLenList = [[idx, trck.tracklen] for idx, trck in zip(self.keys(), self.values())]
idxLenList.sort(key=itemgetter(1))
p = Pool()
# TODO: WAS mache ich denn eigentlich hier?
# TODO: Für sowas habe ich doch einen Worker Pool und brauche keinen neuen zu deployen
poolResults = []
t = time.clock()
print('All_Core_MultiProcessing_Pool started\n'
'Set Up & Loaded with %i tasks \ntime is %f' % ((len(self)), t))
for track in self.keys():
poolResults.append(p.apply_async(extract_arch_attributes,
args=(self[track], self[idxLenList[0][0]], list(self.hDict.keys()),)
))
p.close()
p.join()
print('Pool closed, merging... time is %F\n%f seconds taken...' % (time.clock(), time.clock() - t))
for result in poolResults:
if self.archeArray is not None:
self.archeArray = np.vstack([self.archeArray, result.get()])
if self.archeArray is None:
self.archeArray = result.get()
print(len(self.archeArray) if self.archeArray is not None else 0, ' Attributes Added!\n')
return True
def return_archetypes(self, k):
if not self.hDict:
self.homotopic_classification()
if self.archeArray is None:
self.fill_archeArray()
if self.archeArray is not None:
data = self.archeArray.copy()[:, 1:].T.astype(np.float64)
# https: // github.com / ulfaslak / py_pcha
# X = np.random.random((dimensions, examples)) Transpose array with array.T
# needed to change the data-dtype
XC, S, C, SSE, varexpl = PCHA(data, noc=k, delta=0.1, verbose=False)
print(' Arc #1, Arc #2, Arc #3 Arc #4\n\n', XC,
'\n\n Variables explained: ', varexpl)
else:
print('not yet implemented')
return False
# Ref -- http://stackoverflow.com/questions/1401712/how-can-the-euclidean-distance-be-calculated-with-numpy
# Ref2 -- http://stackoverflow.com/questions/8049798/understanding-nested-list-comprehension
# noinspection PyTypeChecker
idxArray = np.argmin([[np.linalg.norm(canidate - arche)
for canidate in self.archeArray[:, 1:]] for arche in XC.T], axis=1)
return [self[key] for key in [self.archeArray[:, 0][idx] for idx in idxArray]]
def return_walkableTileList(self):
return [npIndex for npIndex, value in np.ndenumerate(self.map.imgArray) if value == self.map.walkableTile]
def save_to_disc(self, filename):
filename = filename if filename.endswith('.pik') else '%s%s' % (filename, '.pik')
self.worker = None
with open(filename, 'wb') as output:
pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
self.worker = Worker(n=workercount)
def recover_from_disc(self, filename):
filename = filename if filename.endswith('.pik') else '%s%s' % (filename, '.pik')
with open(filename, 'rb') as file:
pick = pickle.load(file)
self.data = pick.data
self.map = pick.map
self.hDict = pick.hDict
self.archeArray = pick.archeArray
self.maxLen = self.__update_maxLen()
def as_n_sample_4D(self, timessteps, moving_window=False, stackSize=0, start=0,
in_walk_dir=False, keys=False, for_track=None):
stackList, keyList = list(), list()
if moving_window:
for i, key in enumerate(self.keys()):
if for_track:
track = self[for_track]
else:
track = self[key]
if stackSize and len(stackList) > stackSize:
stackList = stackList[:stackSize]
if keys:
keyList = keyList[:stackSize]
break
if i >= start:
isoArray = self.map.isovists.get_items_for_track(track, dim='full', in_walk_dir=in_walk_dir)
isoArray = isoArray.swapaxes(0, 2)
tempSequence = [isoArray[j:j + timessteps] for j in range(len(isoArray) - (timessteps - 1))]
if keys:
tempKeys = [track[(j + 1) + timessteps // 2] for j in range(len(tempSequence))]
keyList.extend(tempKeys)
stackList.extend(tempSequence)
if for_track:
break
else:
for i, key in enumerate(self.keys()):
if for_track:
track = self[for_track]
else:
track = self[key]
if stackSize and i > stackSize*(start+1):
break
if i >= start*stackSize:
diaStack = self.map.isovists.get_items_for_track(track, dim='full',
in_walk_dir=in_walk_dir).swapaxes(0, 2)
x = diaStack.shape[0] // timessteps
tempSequence = np.array_split(diaStack[:timessteps*x], x)
if keys:
tempKeys = [track[(j * timessteps) + 1 + (timessteps // 2)] for j in range(len(tempSequence))]
keyList.extend(tempKeys)
stackList.extend(tempSequence)
if for_track:
break
if keys:
return keyList, np.stack(stackList).astype(int)[..., None]
else:
return np.stack(stackList).astype(int)[..., None]
def as_flatTfArray(self, maxLen=0):
if maxLen == -1: # Ignore the maxLen Parameter
return np.dstack([self.map.isovists.get_items_for_track(self[key]) for key in self.keys()])
if not maxLen: # Default maxLen of longest track in collection, not calles when: maxLen >= 1
maxLen = self.maxLen
resultArray = None
for track in self:
sizedArray = np.dstack([self.map.isovists[self[track][-1]].get_1D_array() for _ in range(maxLen)])
isoArray = self.map.isovists.get_items_for_track(self[track])
sizedArray[:isoArray.shape[0], :isoArray.shape[1], :isoArray.shape[2]] = isoArray
if resultArray is not None:
resultArray = np.vstack([resultArray, sizedArray.ravel()])
elif resultArray is None:
resultArray = sizedArray.ravel()
return resultArray
def draw_track(self, key, saveIMG=''):
imArray = self.map.imgArray.copy()
for pixel in self[key]:
imArray[pixel] = 15
imshow(self.map.imgArray)
if saveIMG:
if not isinstance(saveIMG, str):
raise TypeError('Needs to be a String or Basestring as Name')
saveIMG = saveIMG if saveIMG.endswith('.tif') else '%s.tif' % saveIMG
savefig(saveIMG)
class Track(UserList):
def __init__(self, NodeList, walkableTile, qhull=True):
if not isinstance(NodeList, list):
raise TypeError
super(UserList, self).__init__()
self.walkableTile = walkableTile
self.data = NodeList.copy()
self.group = None
self.vertex = None
self.hull = sp.ConvexHull(np.array(self)) if qhull else None
self.tracklen = self.length()
def __setitem__(self, i, item):
self.data[i] = item
self.tracklen = self.length()
def __delitem__(self, i):
del self.data[i]
self.tracklen = self.length()
def length(self, *args):
"""
:param args: Pass a foreign list if Points list(tuple(x,y)), this function then acts as @static method
:type args: [(int,int)]
:return: Sum of distance between every following point.
:rtype: float
Reference:
http://stackoverflow.com/questions/21216841/length-of-a-list-of-points/21217048
"""
if len(args) == 0:
return sum([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in zip(self[:-1], self[1:])])
else:
return sum([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in zip(args[0][:-1], args[0][1:])])
def areaCHull(self):
# http://stackoverflow.com/questions/21727199/
# python-convex-hull-with-scipy-spatial-delaunay-how-to-eleminate-points-inside-t
if not self.hull:
self.hull = sp.ConvexHull(np.array(self))
xList, yList = [self.hull.points[i][0] for i in self.hull.vertices], \
[self.hull.points[i][1] for i in self.hull.vertices]
# http: // stackoverflow.com / questions / 19873596 / convex - hull - area - in -python
return 0.5 * np.abs(np.dot(xList, np.roll(yList, 1)) - np.dot(yList, np.roll(xList, 1)))
def getStart(self):
return self[0]
def getTarget(self):
return self[-1]
def isHomotopic(self, track, indoorToolset, baseArray=None):
if not isinstance(track, Track):
raise TypeError
l, l2 = self.data.copy(), track.data.copy()
l.extend(reversed(l2))
img = Image.new('L', (indoorToolset.width, indoorToolset.height), 0)
ImageDraw.Draw(img).polygon(l, outline=1, fill=1)
binPoly = np.array(img)
if baseArray is None:
baseArray = indoorToolset.imgArray
a = (binPoly * baseArray).sum()
if a >= 1:
return False
else:
return True
def mergeWith(self, track):
"""
:param track: A track to merge with
:type track: Track or list
:return: Two merged tracks
:rtype: Track
"""
if isinstance(track, Track):
l2 = track.data
elif isinstance(track, list):
l2 = track
else:
typ, prefix = str(type(track)), ('a', 'e', 'i', 'o', 'u')
raise TypeError('The has to be a List or a Track, but was %s: "%s"' %
('an' if typ.startswith(prefix) else 'a', typ))
l = self.data
l.extend(l2)
return Track(l, self.walkableTile)
def return_isovists(self, trackCollection=None, indoorToolset=None):
if isinstance(trackCollection, TrackCollection):
return [trackCollection.map.isovists[point] for point in self]
elif isinstance(indoorToolset, IndoorToolset):
return [trackCollection.isovists[point] for point in self]
else:
print('Please provide a TrackCollection or a Indoortoolset that holds the Isovist reference.')
return False
class IndoorToolset(object):
def __init__(self, imageArray, walkableTile, graph=None, worker=None, isoVistSize=25):
"""
:param graph: An optional Graph
:type graph: nx.Graph
"""
if not isinstance(imageArray, np.ndarray) or not isinstance(worker, Worker):
raise TypeError
self.walkableTile = walkableTile
self.imgArray = imageArray
self.shape = self.imgArray.shape
self.height = self.shape[0]
self.width = self.shape[1]
if graph is not None and isinstance(graph, nx.Graph):
self.graph = graph.copy()
else:
self.graph = self.translate_to_graph()
self.__rand = random.Random()
self.isovists = IsovistCollection(self.walkableTile, isoVistSize, self.imgArray, worker=worker)
def refresh_random_clock(self):
self.__rand.seed(time.clock())
def copy(self):
return IndoorToolset(self.imgArray.copy(), self.walkableTile, graph=self.graph.copy())
def __len__(self):
return self.width * self.height
def show(self, *args, graphUpdate=False, saveIMG=False, hClass=False, trackList=None, allTracks=None):
def print_n_show(img, name, hot=False):
# http://stackoverflow.com/questions/9406400/how-can-i-use-a-pre-made-color-map-for-my-heat-map-in-matplotlib
if hot:
imshow(img, cmap=get_cmap("hot")) # , interpolation='nearest') # Additional Option
else:
imshow(img)
if saveIMG:
savefig(name)
show()
if graphUpdate:
maX = max(nx.get_node_attributes(self.graph, 'count').values())
for node in self.graph.nodes():
maX = maX if maX >= 1 else 1
color = int(self.graph.node[node]['count'] / maX * 100)
self.imgArray[node] = color if color is not 0 else 255
print_n_show(self.imgArray, "heatMap.tif", hot=True)
if hClass:
if not args[0]:
print('Not Classified!')
pass
else:
hDict = args[0].copy()
for i, hClassKey in enumerate(hDict.keys()):
color = i + 1 * 255 / len(hDict)
for track in hDict[hClassKey]:
self.imgArray[track] = int(color)
print_n_show(self.imgArray.copy(), 'hClass.tif')
if allTracks:
for track in allTracks:
for pixel in track:
self.imgArray[pixel] = 7
if trackList:
if isinstance(trackList, list):
for i, track in enumerate(trackList):
color = i + 1 * 255 / len(trackList)
for pixel in track:
self.imgArray[pixel] = int(color)
print_n_show(self.imgArray, 'tracks.tif')
def getRandomPos(self, verbose=False):
"""
:param verbose: Print more infromation
:type verbose: bool
:return: A random walkable position in the graph
:rtype: (int, int)
"""
self.refresh_random_clock()
rs = self.graph.nodes()[self.__rand.randrange(len(self.graph))]
if verbose:
print(rs, ' is random Position -> checking accessibiliy')
notWalkable = False
if self.imgArray[rs] != self.walkableTile:
notWalkable = True
while notWalkable:
self.refresh_random_clock()
rs = self.graph.nodes()[self.__rand.randrange(len(self.graph))]
if verbose:
print(rs, 'needed to be computed, upper was not walkable')
if verbose:
print('is valid!')
return rs
def translate_to_graph(self):
graph = nx.Graph()
for idx, value in np.ndenumerate(self.imgArray):
if value == self.walkableTile:
x, y = idx
graph.add_node((x, y), count=0)
# up
if graph.has_node((x, y - 1)):
graph.add_edge((x, y), (x, y - 1), weight=1)
# upLeft
if graph.has_node((x - 1, y - 1)):
graph.add_edge((x, y), (x - 1, y - 1), weight=sqrt(2))
# lowerLeft
if graph.has_node((x - 1, y + 1)):
graph.add_edge((x, y), (x - 1, y + 1), weight=sqrt(2))
# left
if graph.has_node((x - 1, y)):
graph.add_edge((x, y), (x - 1, y), weight=1)
return graph
def calculate_path(self, source, target, alg='dijkstra', path=None, penalty=None, qhull=True):
"""
Calculate a path through the graph, based on the Bitmap you imported.
:param source: (X,Y) Positions Tuple to route from
:type source: (int, int)
:param target: (X,Y) Positions Tuple to route to
:type target: (int, int)
:param alg: Define the Routing Algorithm through the graph
:type alg: basestring
:param path: Use this for Updating edge weights for an allready given Path
in form of a (X,Y) Position Tuple List
:type path: ((int, int))
:param penalty: Set a Nummer for applying edge weights
:type penalty: None or float
:return: Calculates an Path with the given algorithm, default: 'Dijkstra'
:rtype: Track
"""
dij_s_p = list()
if not path and not isinstance(path, list):
if alg == 'dijkstra':
dij_s_p = nx.dijkstra_path(self.graph, source, target, weight='weight')
elif alg == 'bi_dijkstra':
dij_s_p = nx.bidirectional_dijkstra(self.graph, source, target, weight='weight')
else:
dij_s_p = path
if penalty and (isinstance(penalty, float) or isinstance(penalty, int)):
for node in dij_s_p.copy():
oldCount = self.graph.node[node]['count']
self.graph.add_node(node, count=oldCount + penalty)
for currNode, nextNode in zip(dij_s_p[:-1], dij_s_p[1:]):
oldWeight = self.graph.edge[currNode][nextNode]['weight']
self.graph.add_edge(currNode, nextNode, weight=oldWeight + penalty)
track = Track(dij_s_p, self.walkableTile, qhull=qhull)
print(len(dij_s_p), ' Len Path generated -> Nodes: ', len(self.graph), ' -> Edges: ', len(self.graph.edges()))
return track
def return_random_path(self, penalty=None, safe=False):
"""
Calculate a single random shortest path
:param penalty: Set a Nummer for applying edge weights
:type penalty: None or float
:param safe: Apply a connectivity check before returning.
Affects the performance, only usefull when dealing with multiple non connected components.
:type safe: bool
:return: A Random shortest Path.
:rtype: Track
"""
p, p2 = (0, 0), (0, 0)
# while angle in degree modulo 45 == 0 or distance <= sqrt(2)
while degrees(atan2(p[1] - p2[1], p[0] - p2[0])) % 45 == 0 or hypot(p[0] - p2[0], p[1] - p2[1]) <= sqrt(2):
p, p2 = self.getRandomPos(), self.getRandomPos()
print('source: ', p, 'target: ', p2, ' - generated')
if safe:
while not nx.has_path(self.graph, p, p2):
p, p2 = self.getRandomPos(), self.getRandomPos()
print('unconnected -> New Try: S=', p, ' T=', p2)
return self.calculate_path(p, p2, penalty=penalty)
# Extraction Function - had to be static because of multiprocessing
def extract_arch_attributes(track, shortestT, hClassList):
attributes = list()
# ▪ TrackID - do not use as real attribute for analysis input!!!!!!!!!!
# remove by: dataArray[:, 1:] Subset without first Attribute
attributes.append(track.vertex)
# ▪ Convex hulls area,
attributes.append(track.areaCHull())
# ▪ length,
attributes.append(track.length(track.hull.points))
#
# Longest Distance between two vertives of the Convex Hull, this is probably the euclidian distance
# between start and target - ups
# cLD = max([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in combinations(track.hull.points, 2)]))
# attributes.append(cLD))
#
# ▪ vertices,
attributes.append(len(track))
# ▪ ,centroid - (distance to mipoint between start und target)
# http://stackoverflow.com/questions/31562534/scipy-centroid-of-convex-hull
centroid = list((np.mean(track.hull.vertices[0]), np.mean(track.hull.vertices[1])))
midpoint = list(((track[0][0] + track[-1][0]) / 2, (track[0][1] + track[-1][1]) / 2))
cMidDist = hypot(centroid[0] - midpoint[0], centroid[1] - midpoint[1])
attributes.append(cMidDist)
# ▪ and orientation --http://stackoverflow.com/questions/31735499/calculate-angle-clockwise-between-two-points
# https://docs.python.org/dev/reference/expressions.html#calls
ang1 = np.arctan2(*centroid[::-1])
ang2 = np.arctan2(*midpoint[::-1])
attributes.append(np.rad2deg((ang1 - ang2) % (2 * np.pi)))
# ▪ Length
attributes.append(track.tracklen)
# ▪ Angular sum(cancelling / positive)
# Not yet implemented
# ▪ Relative length Regarding the shortest route
attributes.append(track.tracklen / shortestT.tracklen)
# ▪ DTW distance
dist, cost, acc, path = \
dtw(np.array(track), np.array(shortestT), dist=lambda x, y: np.linalg.norm(x - y, ord=1))
attributes.append(dist)
# ▪ Pixels average / min “heat”
# Not yet implemented
# ▪ Homotrophic class - remap (x,y)Coord-Tuple-Keys to int representation
attributes.append(hClassList.index(track.group))
return np.array(attributes)