diff --git a/PCHA.py b/PCHA.py new file mode 100644 index 0000000..e032f3b --- /dev/null +++ b/PCHA.py @@ -0,0 +1,314 @@ +"""Principal Convex Hull Analysis (PCHA) / Archetypal Analysis.""" + +from __future__ import division +import numpy as np +from numpy.matlib import repmat +from scipy.sparse import csr_matrix +from datetime import datetime as dt +import time + +#from furthest_sum import furthest_sum + + +def PCHA(X, noc, I=None, U=None, delta=0, verbose=False, conv_crit=1E-6, maxiter=500): + """Return archetypes of dataset. + + Note: Commonly data is formatted to have shape (examples, dimensions). + This function takes input and returns output of the transposed shape, + (dimensions, examples). + + Parameters + ---------- + X : numpy.2darray + Data matrix in which to find archetypes + + noc : int + Number of archetypes to find + + I : 1d-array + Entries of X to use for dictionary in C (optional) + + U : 1d-array + Entries of X to model in S (optional) + + + Output + ------ + XC : numpy.2darray + I x noc feature matrix (i.e. XC=X[:,I]*C forming the archetypes) + + S : numpy.2darray + noc x length(U) matrix, S>=0 |S_j|_1=1 + + C : numpy.2darray + noc x length(U) matrix, S>=0 |S_j|_1=1 + + SSE : float + Sum of Squared Errors + + varexlp : float + Percent variation explained by the model + """ + def S_update(S, XCtX, CtXtXC, muS, SST, SSE, niter): + """Update S for one iteration of the algorithm.""" + noc, J = S.shape + e = np.ones((noc, 1)) + for k in range(niter): + SSE_old = SSE + g = (np.dot(CtXtXC, S) - XCtX) / (SST / J) + g = g - e * np.sum(g.A * S.A, axis=0) + + S_old = S + while True: + S = (S_old - g * muS).clip(min=0) + S = S / np.dot(e, np.sum(S, axis=0)) + SSt = S * S.T + SSE = SST - 2 * np.sum(XCtX.A * S.A) + np.sum(CtXtXC.A * SSt.A) + if SSE <= SSE_old * (1 + 1e-9): + muS = muS * 1.2 + break + else: + muS = muS / 2 + + return S, SSE, muS, SSt + + def C_update(X, XSt, XC, SSt, C, delta, muC, mualpha, SST, SSE, niter=1): + """Update C for one iteration of the algorithm.""" + J, nos = C.shape + + if delta != 0: + alphaC = np.sum(C, axis=0).A[0] + C = np.dot(C, np.diag(1 / alphaC)) + + e = np.ones((J, 1)) + XtXSt = np.dot(X.T, XSt) + + for k in range(niter): + + # Update C + SSE_old = SSE + g = (np.dot(X.T, np.dot(XC, SSt)) - XtXSt) / SST + + if delta != 0: + g = np.dot(g, np.diag(alphaC)) + g = g.A - e * np.sum(g.A * C.A, axis=0) + + C_old = C + while True: + C = (C_old - muC * g).clip(min=0) + nC = np.sum(C, axis=0) + np.finfo(float).eps + C = np.dot(C, np.diag(1 / nC.A[0])) + + if delta != 0: + Ct = C * np.diag(alphaC) + else: + Ct = C + + XC = np.dot(X, Ct) + CtXtXC = np.dot(XC.T, XC) + SSE = SST - 2 * np.sum(XC.A * XSt.A) + np.sum(CtXtXC.A * SSt.A) + + if SSE <= SSE_old * (1 + 1e-9): + muC = muC * 1.2 + break + else: + muC = muC / 2 + + # Update alphaC + SSE_old = SSE + if delta != 0: + g = (np.diag(CtXtXC * SSt).T / alphaC - np.sum(C.A * XtXSt.A)) / (SST * J) + alphaC_old = alphaC + while True: + alphaC = alphaC_old - mualpha * g + alphaC[alphaC < 1 - delta] = 1 - delta + alphaC[alphaC > 1 + delta] = 1 + delta + + XCt = np.dot(XC, np.diag(alphaC / alphaC_old)) + CtXtXC = np.dot(XCt.T, XCt) + SSE = SST - 2 * np.sum(XCt.A * XSt.A) + np.sum(CtXtXC.A * SSt.A) + + if SSE <= SSE_old * (1 + 1e-9): + mualpha = mualpha * 1.2 + XC = XCt + break + else: + mualpha = mualpha / 2 + + if delta != 0: + C = C * np.diag(alphaC) + + return C, SSE, muC, mualpha, CtXtXC, XC + + N, M = X.shape + + + if I is None: + I = range(M) + if U is None: + U = range(M) + + SST = np.sum(X[:, U] * X[:, U]) + + # Initialize C + try: + i = furthest_sum(X[:, I], noc, [int(np.ceil(len(I) * np.random.rand()))]) + except IndexError: + class InitializationException(Exception): pass + raise InitializationException("Initialization does not converge. Too few examples in dataset.") + + j = range(noc) + C = csr_matrix((np.ones(len(i)), (i, j)), shape=(len(I), noc)).todense() + + XC = np.dot(X[:, I], C) + + muS, muC, mualpha = 1, 1, 1 + + # Initialise S + XCtX = np.dot(XC.T, X[:, U]) + CtXtXC = np.dot(XC.T, XC) + S = -np.log(np.random.random((noc, len(U)))) + S = S / np.dot(np.ones((noc, 1)), np.mat(np.sum(S, axis=0))) + SSt = np.dot(S, S.T) + SSE = SST - 2 * np.sum(XCtX.A * S.A) + np.sum(CtXtXC.A * SSt.A) + S, SSE, muS, SSt = S_update(S, XCtX, CtXtXC, muS, SST, SSE, 25) + + # Set PCHA parameters + iter_ = 0 + dSSE = np.inf + t1 = dt.now() + varexpl = (SST - SSE) / SST + + if verbose: + print('\nPrincipal Convex Hull Analysis / Archetypal Analysis') + print('A ' + str(noc) + ' component model will be fitted') + print('To stop algorithm press control C\n') + + dheader = '%10s | %10s | %10s | %10s | %10s | %10s | %10s | %10s' % ('Iteration', 'Expl. var.', 'Cost func.', 'Delta SSEf.', 'muC', 'mualpha', 'muS', ' Time(s) ') + dline = '-----------+------------+------------+-------------+------------+------------+------------+------------+' + + while np.abs(dSSE) >= conv_crit * np.abs(SSE) and iter_ < maxiter and varexpl < 0.9999: + if verbose and iter_ % 100 == 0: + print(dline) + print(dheader) + print(dline) + told = t1 + iter_ += 1 + SSE_old = SSE + + # C (and alpha) update + XSt = np.dot(X[:, U], S.T) + C, SSE, muC, mualpha, CtXtXC, XC = C_update( + X[:, I], XSt, XC, SSt, C, delta, muC, mualpha, SST, SSE, 10 + ) + + # S update + XCtX = np.dot(XC.T, X[:, U]) + S, SSE, muS, SSt = S_update( + S, XCtX, CtXtXC, muS, SST, SSE, 10 + ) + + # Evaluate and display iteration + dSSE = SSE_old - SSE + t1 = dt.now() + if iter_ % 1 == 0: + time.sleep(0.000001) + varexpl = (SST - SSE) / SST + if verbose: + print('%10.0f | %10.4f | %10.4e | %10.4e | %10.4e | %10.4e | %10.4e | %10.4f \n' % (iter_, varexpl, SSE, dSSE/np.abs(SSE), muC, mualpha, muS, (t1-told).seconds)) + + # Display final iteration + varexpl = (SST - SSE) / SST + if verbose: + print(dline) + print(dline) + print('%10.0f | %10.4f | %10.4e | %10.4e | %10.4e | %10.4e | %10.4e | %10.4f \n' % (iter_, varexpl, SSE, dSSE/np.abs(SSE), muC, mualpha, muS, (t1-told).seconds)) + + # Sort components according to importance + ind, vals = zip( + *sorted(enumerate(np.sum(S, axis=1)), key=lambda x: x[0], reverse=1) + ) + S = S[ind, :] + C = C[:, ind] + XC = XC[:, ind] + + return XC, S, C, SSE, varexpl + + +"""FurthestSum algorithm to efficiently generate initial seeds/archetypes.""" +def furthest_sum(K, noc, i, exclude=[]): + """Furthest sum algorithm, to efficiently generat initial seed/archetypes. + + Note: Commonly data is formatted to have shape (examples, dimensions). + This function takes input and returns output of the transposed shape, + (dimensions, examples). + + Parameters + ---------- + K : numpy 2d-array + Either a data matrix or a kernel matrix. + + noc : int + Number of candidate archetypes to extract. + + i : int + inital observation used for to generate the FurthestSum. + + exclude : numpy.1darray + Entries in K that can not be used as candidates. + + Output + ------ + i : int + The extracted candidate archetypes + """ + + def max_ind_val(l): + return max(zip(range(len(l)), l), key=lambda x: x[1]) + + I, J = K.shape + index = np.array(range(J)) + index[exclude] = 0 + index[i] = -1 + ind_t = i + sum_dist = np.zeros((1, J), np.complex128) + + if J > noc * I: + Kt = K + Kt2 = np.sum(Kt ** 2, axis=0) + for k in range(1, noc + 11): + if k > noc - 1: + Kq = np.dot(Kt[:, i[0]], Kt) + sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[i[0]]) + index[i[0]] = i[0] + i = i[1:] + t = np.where(index != -1)[0] + Kq = np.dot(Kt[:, ind_t].T, Kt) + sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[ind_t]) + ind, val = max_ind_val(sum_dist[:, t][0].real) + ind_t = t[ind] + i.append(ind_t) + index[ind_t] = -1 + else: + if I != J or np.sum(K - K.T) != 0: # Generate kernel if K not one + Kt = K + K = np.dot(Kt.T, Kt) + K = np.lib.scimath.sqrt( + repmat(np.diag(K), J, 1) - 2 * K + \ + repmat(np.mat(np.diag(K)).T, 1, J) + ) + + Kt2 = np.diag(K) # Horizontal + for k in range(1, noc + 11): + if k > noc - 1: + sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * K[i[0], :] + Kt2[i[0]]) + index[i[0]] = i[0] + i = i[1:] + t = np.where(index != -1)[0] + sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * K[ind_t, :] + Kt2[ind_t]) + ind, val = max_ind_val(sum_dist[:, t][0].real) + ind_t = t[ind] + i.append(ind_t) + index[ind_t] = -1 + + return i diff --git a/Trainer.py b/Trainer.py new file mode 100644 index 0000000..77fb5d6 --- /dev/null +++ b/Trainer.py @@ -0,0 +1,582 @@ +# Keras Utility Imports +from keras import backend as K +from keras.utils import plot_model + +from collections import defaultdict +from random import shuffle + +# Numpy +import numpy as np + +# Maths +from math import sqrt +from sklearn.cluster import DBSCAN, KMeans +from sklearn.metrics import silhouette_samples, silhouette_score +from sklearn.manifold.t_sne import TSNE +from sklearn.decomposition import PCA + +# Plotting +import matplotlib.pyplot as plt +from PIL import ImageDraw, Image +import matplotlib.cm as cm +""" +UseFull Links: + +Keras Timedistributed Wrapper is applysing the SAME Layer, which means same WEIGHTS: +http://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/ + +""" + + +class Trainer(object): + def __init__(self, mode, trackCollection, classes, categorical_distribution, + batchSize=400, timesteps=5, filters=0, rotating=False): + if mode.lower() not in ['gumble', 'vae', 'refined']: + raise ValueError('Needs to be eather "gumble", "vae" or "refined"') + + self.mode = mode.lower() + self.tc = trackCollection if isinstance(trackCollection, list) else list(trackCollection) + + self.rotating = rotating + self.timesteps = timesteps + + self.classes = classes + self.cD = categorical_distribution + self.batchSize = batchSize + self.epochs = 100 + self.epsilon_std = 0.01 + self.tau = K.variable(5.0, name="temperature") + self.anneal_rate = 0.0003 + self.min_temperature = 0.5 + _, _, height, width, _ = self.tc[0].as_n_sample_4D( + self.timesteps, for_track=list(self.tc[0].keys())[0]).shape + + self.height = height + self.width = width + self.original_dim = self.timesteps * self.width * self.height * 1 # = 5*30*30 = 4500px + self.filters = int(sqrt(self.width ** 2 + self.height ** 2)) // 2 if not filters else filters + + self.trained = False + self.model = None + self.encoder = None + self.generator = None + + def set_model(self, model, loss, optimizer='adagrad'): + self.model = model + self.model.compile(optimizer=optimizer, loss=loss) + self.model.summary() + + def set_generator(self, generator): + self.generator = generator + + def set_encoder(self, encoder): + self.encoder = encoder + + def load_weights(self, fileName): + self.model.load_weights(fileName) + self.trained = True + + def save_weights(self, fileName): + self.model.save_weights(fileName) + + def train(self, fileName=None): + for i in range(self.epochs): + tracklists = [list(x.keys()) for x in self.tc] # TODO: add additional shuffeling!!! + for keys in zip(*tracklists): + data = np.empty(0) + while data.shape[0] < self.batchSize: + tempData = [self.tc[idx].as_n_sample_4D(self.timesteps, in_walk_dir=self.rotating, for_track=key) + for idx, key in enumerate(keys)] + tempData = np.row_stack(tempData) + if data.shape[0] == 0: + data = tempData + else: + data = np.row_stack((data, tempData)) + np.random.shuffle(data) + smoothing = data.shape[0] // self.batchSize * self.batchSize + if smoothing: + data = data[:smoothing] + self.model.fit(data, data, shuffle=True, epochs=1, batch_size=self.batchSize) + data = None + + K.set_value(self.tau, + np.max([K.get_value(self.tau) * np.exp(- self.anneal_rate * i), + self.min_temperature])) + if fileName: + self.save_weights(fileName) + + def plot_model(self, filename, show_shapes=True, show_layer_names=True): + plot_model(self.model, filename, show_shapes=show_shapes, show_layer_names=show_layer_names) + + def color_random_track(self, completeSequence=False, show=True, fileName='', primaryTC=0, + nClusters=0, multiPath=False, cMode='kmeans', aMode='none'): + if not self.trained: + raise EnvironmentError('Please train this Model first!') + + self.tc[primaryTC].map.refresh_random_clock() + track = self.tc[primaryTC].map.return_random_path() + if fileName: + fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName + + return self.color_track(track, show=show, completeSequence=completeSequence, primaryTC=primaryTC, + fileName=fileName, nClusters=nClusters, multiPath=multiPath, cMode=cMode, aMode=aMode) + + def color_track(self, track, completeSequence=False, show=True, fileName='', nClusters=0, multiPath=False, + cMode='kmeans', aMode='none', primaryTC=0): + if not self.trained: + raise EnvironmentError('Please train this Model first!') + isoArray = self.tc[primaryTC].map.isovists.get_items_for_track(track, dim='full', in_walk_dir=True).swapaxes(0, 2)[ + ..., None].transpose((0, 2, 1, 3)) + smoothing = (isoArray.shape[0] // self.timesteps) * self.timesteps + isoArray = isoArray[:smoothing] + sequenceArray = np.array([isoArray[i:i+self.timesteps] for i in range(len(isoArray)-self.timesteps)]) + dummyData = self.tc[primaryTC].as_n_sample_4D(self.timesteps).astype(int) + dummyData = dummyData.reshape((-1, self.timesteps, self.height, self.width, 1)) + testdata = np.row_stack((dummyData, sequenceArray))[-1000:] + + keys = [track[i+self.timesteps//2] for i in range(len(sequenceArray))] + + tsneArray = self.reduce_and_color(testData=testdata, nClusters=nClusters, + primaryTC=primaryTC, aMode=aMode, + cMode=cMode)[-len(sequenceArray):] + 1 # This is for color correction + npmax = np.max(tsneArray[:, -1])+1 + figure = np.where(self.tc[primaryTC].map.imgArray > 0, npmax, 0) + + for i in range(len(sequenceArray)): + figure[keys[i]] = tsneArray[i, -1] + + if multiPath: + return keys, tsneArray[-1] + else: + self.print_n_show(figure, 'img', npmax, fileName=fileName, show=show) + self.print_n_show(tsneArray, 'scatter', npmax, fileName=fileName, show=show) + + if completeSequence: + if fileName: + fileName = '%s_sequence.tif' % fileName[:fileName.find('.')] + self._Trainer__colored_sequence(tsneArray, isoArray, maxVal=npmax, show=show, fileName=fileName) + + def __colored_sequence(self, tsneArray, isoArray, maxVal=0, show=True, fileName=''): + """ + Returns all the Isovist sequences for a Track, next to its class color. + :param tsneArray: + :type tsneArray: + :param isoArray: + :type isoArray: + :param maxVal: + :type maxVal: + :param show: + :type show: + :param fileName: + :type fileName: + :return: + :rtype: + """ + if not self.trained: + raise EnvironmentError('Please train this Model first!') + if maxVal == 0: + maxVal, np.max(tsneArray[:, -1]) + + spacing = 2 + figure = np.full(((spacing + len(tsneArray) * (self.height + spacing)), (self.timesteps + 1) * self.width), 2) + + # Iterate through a 4 by 4 grid with 100 spacing, to place the image + for i in range(len(tsneArray)): + backGround = np.full((self.height, self.width * (self.timesteps + 1)), tsneArray[i, -1]) + + sequence = isoArray[i:i + self.timesteps].swapaxes(0, 1).reshape(( + self.height, self.timesteps * self.width)) + sequence = np.where(sequence > 0, maxVal, 0) + backGround[:, 0:-self.width] = sequence + figure[i * self.height + i * spacing: (i + 1)*self.height + i*spacing, :] = backGround + if fileName: + fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName + self.print_n_show(figure, 'img', maxVal, show=show, fileName=fileName) + + @staticmethod + def print_n_show(x, mode, maxValue, show=True, fileName=''): + # Scatterplot with Classes + fig, ax = plt.subplots() + # make the picture + if mode == 'img' or mode == 'fig': + pic = ax.imshow(x, cmap='gist_ncar', vmin=0, vmax=maxValue) + cb = plt.colorbar(pic, spacing='proportional', ticks=np.linspace(0, maxValue, maxValue + 1)) + elif mode == 'rgba': + pic = ax.imshow(x, cmap='gist_ncar', vmax=255) + cb = plt.colorbar(pic, spacing='proportional', ticks=np.linspace(0, 255, maxValue + 1)) + elif mode == 'scatter': + scat = ax.scatter(x[:, 0], x[:, 1], c=x[:, -1], ) + cb = plt.colorbar(scat, spacing='proportional', ticks=np.linspace(0, maxValue, maxValue + 1)) + elif mode == 'bars': + objects = list(x.keys()) + y_pos = list(range(len(objects))) + performance = [x[key] for key in objects] + + bar = ax.bar(y_pos, performance, align='center') + # fig.xticks(y_pos, objects) + # fig.ylabel('Usage') + # fig.title('Programming language usage') + else: + raise ValueError('Mode needs to be "img", "fig", "bars", "rgba" or "scatter".') + fig.tight_layout() + if show: + plt.show() + if fileName: + plt.savefig(fileName) + + return True + + def reduce_and_color(self, testData=None, aMode='tsne', nClusters=0, cMode='kmeans', eps=5, primaryTC=0): + """ + :param testData: Numpy Arraym, shape (n, timesteps, height, width, 1) + :type testData: sdf + :param eps: When using cMode=DBSCAN, + :type eps: int + :param aMode: Dimensonal reduction mode, default = 'pca', other='tsne' + :type aMode: str + :param cMode: Clustering mode, default='kmeans', other='DBSCAN' + :type cMode: str + :param nClusters: Number of Clusters for kmeans-clustering if 0 nClusters=self.classes + :type nClusters: int + :param primaryTC: Index Number of the TrackCollection used for Basemap etc. + :type primaryTC: int + :return: Numpy Array (n, X, Y, Labels) + :rtype: np.ndarray + """ + if isinstance(testData, np.ndarray): + if testData.shape[1:] != (self.timesteps, self.height, self.width, 1): + raise ValueError('Shape must be (n, timesteps, height, width, 1), but was ', testData.shape) + + if not isinstance(testData, np.ndarray): + testData = self.tc[primaryTC].as_n_sample_4D(self.timesteps) + testData = testData.reshape((-1, self.timesteps, self.height, self.width, 1)) + + n = testData.shape[0] + + C = np.zeros((n, self.cD * self.classes if self.mode == 'gumble' else self.classes)) + + for i in range(0, n, 100): + c = self.encoder([testData[i:i + 100]])[0] + C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes) + + if aMode == 'tsne': + array = TSNE(metric='hamming').fit_transform(C.reshape(n, -1)) + elif aMode == 'pca': + array = PCA(n_components=self.classes).fit_transform(C.reshape(n, -1)) + elif aMode.lower() == 'none': + array = C.reshape(n, -1) + else: + raise ValueError('"aMode" needs to be either "pca" or "tsne".') + + if cMode.lower() == 'dbscan': + labels = DBSCAN(eps=eps, min_samples=10).fit_predict(array) + + elif cMode.lower() in ['kmeans', 'kmean', 'k-mean', 'k-means']: + nClusters = nClusters if nClusters > 0 else self.classes + labels = KMeans(n_clusters=nClusters).fit_predict(array) + else: + raise ValueError('"cMode" needs to be either "kmeans" or "dbscan".') + + if len(np.unique(labels)) > 1: + color = labels + else: + # Color Generating + X = array[:, 0] + np.min(array[:, 0]) + Y = (np.min(array[:, 1]) + array[:, 1]) // (np.max(array[:, 1]) + np.min(array[:, 1])) + color = X * Y + + return np.column_stack((array, color)) + + def viz_clusters(self, aMode='pca', cMode='kmeans', testdata=None, fileName=''): + dataArray = self.reduce_and_color(aMode=aMode, cMode=cMode, testData=testdata) + + self.print_n_show(dataArray, 'scatter', np.max(dataArray[:, -1]), fileName=fileName) + return True + + # THIS WORKS in all modes + def show_prediction(self, n, dataArray=None, show=True, fileName='', startI=0, primaryTC=0): + if not fileName and not show: + raise ValueError('Why are you doing this? Print smth or show it!') + if self.mode in ['gumble', 'vae', 'refined']: + if not isinstance(dataArray, np.ndarray): + dataArray = self.tc[primaryTC].as_n_sample_4D(self.timesteps) + seqWidth = self.width * self.timesteps + spacing = 1 + sqrtDim = int(sqrt(n)) + 1 + fullwidth = (seqWidth + spacing) * sqrtDim + fullheight = (self.height*2 + spacing) * sqrtDim + + figure = np.zeros((fullheight, fullwidth)) + for i in range(n): + + array = dataArray[i+startI] + arr_h = self.model.predict(array.reshape((1, self.timesteps, self.height, self.width, 1))) + f = np.ones((self.height*2, seqWidth)) + f[:self.height, :seqWidth] = array.reshape(seqWidth, self.height).swapaxes(0, 1) + f[self.height:self.height*2, : seqWidth] = arr_h.reshape(seqWidth, self.height).swapaxes(0, 1) + + try: + y, x = divmod(i, sqrtDim) + except ZeroDivisionError: + x, y = 0, 0 + + figure[y*self.height*2 + y*spacing: (y+1)*self.height*2 + y*spacing, + x*seqWidth + x*spacing: (x+1)*seqWidth + x*spacing] = f + if fileName: + fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName + self.print_n_show(figure, 'img', maxValue=np.max(figure), fileName=fileName) + if show: + self.print_n_show(figure, 'img', maxValue=np.max(figure)) + + return True + + def sample_latent(self, nSamples, show=True, fileName=''): + if self.mode not in ['gumble', 'vae', 'refined']: + raise ValueError('Needs to be either of "gumble", "vae", "refined"') + if self.classes >= self.height: + raise NotImplementedError('This cannot be shown, edit the Funciton!') + + seqWidth = self.width * self.timesteps + spacing = 1 + sqrtDim = int(sqrt(nSamples)) + 1 + fullwidth = (seqWidth + spacing) * sqrtDim + if self.mode == 'gumble': + if self.cD >= fullwidth: + raise NotImplementedError('This cannot be shown, please edit the Function!!!') + lHSpace = (self.height - self.classes) // 2 + lWSpace = (seqWidth - self.cD) // 2 + lShape = (self.classes, self.cD) + + else: + if self.classes >= seqWidth: + raise NotImplementedError('To many Samples, this cannot be displayed, please edit the Function!!!') + lHSpace = (self.height - 1) // 2 + lWSpace = (seqWidth - self.classes) // 2 + lShape = (-1, self.classes) + + fullheight = (self.height*2 + spacing) * sqrtDim + figure = np.zeros((fullheight, fullwidth)) + + for i in range(nSamples): + f = np.ones((self.height * 2, seqWidth)) + + if self.mode == 'gumble': + # https://stackoverflow.com/a/42874726/7746808 + oneHot = np.eye(self.classes)[np.random.randint(0, self.classes, self.cD)] + sample = oneHot.reshape((-1, self.classes*self.cD)) + f[lHSpace:lHSpace + self.classes, lWSpace:lWSpace + self.cD] = sample.swapaxes(0, 1).reshape(lShape) + else: + if self.mode == 'vae': + sampleSpace = np.random.randn(nSamples, self.classes) + elif self.mode == 'refined': + sampleSpace = np.random.rand(nSamples, self.classes) + else: + raise ValueError('Needs to be either of "gumble", "vae", "refined"') + + sample = sampleSpace[i].reshape(lShape) + f[lHSpace:lHSpace+1, lWSpace:lWSpace + self.classes] = sample + + arr_h = self.generator.predict(sample) + f[self.height:self.height*2, : seqWidth] = arr_h.reshape(seqWidth, self.height).swapaxes(0, 1) + + try: + y, x = divmod(i, sqrtDim) + except ZeroDivisionError: + x, y = 0, 0 + + figure[y * self.height * 2 + y*spacing: (y + 1) * self.height * 2 + y*spacing, + x * seqWidth + x*spacing: (x + 1) * seqWidth + x*spacing] = f + + if fileName: + fileName = fileName if fileName.endswith('.tif') else '%s.tif' % fileName + self.print_n_show(figure, 'img', maxValue=np.max(figure), fileName=fileName) + if show: + self.print_n_show(figure, 'img', maxValue=np.max(figure)) + return True + + def multi_path_coloring(self, nClusters, fileName='', state='', primaryTC=0, uncertainty=False, rgba=False): + if nClusters <= 2: + raise ValueError('More than 2 Classes are needed') + + if fileName and state.lower() == 'load': + import pickle + with open(fileName, 'rb') as file: + patchDict = pickle.load(file) + else: + patchDict = defaultdict(list) + for key in self.tc[primaryTC].keys(): + + tempKeys, tempSequence = self.tc[primaryTC].as_n_sample_4D(self.timesteps, + in_walk_dir=True, + keys=True, + moving_window=True, + for_track=key) + + C = np.zeros((len(tempSequence), self.cD * self.classes if self.mode == 'gumble' else self.classes)) + + for i in range(0, len(tempSequence), 100): + c = self.encoder([tempSequence[i:i + 100]])[0] + C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes) + + for i, tempKey in enumerate(tempKeys): + patchDict[tempKey].append(list(C[i])) + + if fileName and state.lower() == 'dump': + with open(fileName, 'wb') as f: + import pickle + pickle.dump(patchDict, f, pickle.HIGHEST_PROTOCOL) + + l = list() + for x in patchDict.keys(): + for elem in patchDict[x]: + l.append(elem + list(x)) + + a = np.array(l) + k = KMeans(nClusters).fit_predict(a[:, :-2]) + 1 # Color Correction + + s = np.zeros((a.shape[0], a.shape[1] + 1)) + s[:, :-1] = a + s[:, -1] = k + + patchDict = defaultdict(list) + for i in range(s.shape[0]): + key = int(s[i][-3]), int(s[i][-2]) + patchDict[key].append(int(s[i][-1])) + + from collections import Counter + c = Counter() + for key in patchDict.keys(): + c[len(set(patchDict[key]))] += 1 + + npmax = np.max(s[:, -1]) + 1 + # npmax = 4 + self.print_n_show(c, 'bars', npmax) + + if rgba: + figure = Image.fromarray(np.where(self.tc[primaryTC].map.imgArray > 0, 255, 0)).convert('RGBA') + draw = ImageDraw.Draw(figure, 'RGBA') + from matplotlib import cm + cmap = cm.get_cmap('gist_ncar', 12) # 12 discrete colors + + for key in patchDict.keys(): + for value in patchDict[key]: + color = [int(x*255) for x in cmap(int(value), alpha=0.3)] + draw.point((key[1], key[0]), fill=tuple(color)) + + self.print_n_show(figure, 'rgba', npmax) + else: + figure = np.where(self.tc[primaryTC].map.imgArray > 0, npmax, 0) + for key in patchDict.keys(): + c = Counter(patchDict[key]) + figure[key] = c.most_common(1)[0][0] + self.print_n_show(figure, 'img', npmax) + + if uncertainty: + uncertainfig = np.where(self.tc[primaryTC].map.imgArray > 0, npmax + 1, 0) + for key in patchDict.keys(): + uncertainfig[key] = len(set(patchDict[key])) * 2 + self.print_n_show(uncertainfig, 'img', npmax+1) + return + + def show_silhouette_score(self, k_list, primaryTC=0): + X = None + for key in list(self.tc[primaryTC].keys())[:100]: + tempSequence = self.tc[primaryTC].as_n_sample_4D(self.timesteps, + in_walk_dir=True, + keys=False, + moving_window=True, + for_track=key) + C = np.zeros((len(tempSequence), self.cD * self.classes if self.mode == 'gumble' else self.classes)) + + for i in range(0, len(tempSequence), 100): + c = self.encoder([tempSequence[i:i + 100]])[0] + C[i:i + 100] = c.reshape(-1, self.cD * self.classes if self.mode == 'gumble' else self.classes) + if isinstance(X, np.ndarray): + X = np.row_stack((X, C)) + else: + X = C + + for n_clusters in k_list: + # Create a subplot with 1 row and 2 columns + fig, (ax1, ax2) = plt.subplots(1, 2) + fig.set_size_inches(18, 7) + + # The 1st subplot is the silhouette plot + # The silhouette coefficient can range from -1, 1 but in this example all + # lie within [-0.1, 1] + ax1.set_xlim([-0.1, 1]) + # The (n_clusters+1)*10 is for inserting blank space between silhouette + # plots of individual clusters, to demarcate them clearly. + ax1.set_ylim([0, len(X) + (n_clusters + 1) * 10]) + + # Initialize the clusterer with n_clusters value and a random generator + # seed of 10 for reproducibility. + clusterer = KMeans(n_clusters=n_clusters, random_state=10) + cluster_labels = clusterer.fit_predict(X) + + # The silhouette_score gives the average value for all the samples. + # This gives a perspective into the density and separation of the formed + # clusters + silhouette_avg = silhouette_score(X, cluster_labels) + print("For n_clusters =", n_clusters, + "The average silhouette_score is :", silhouette_avg) + + # Compute the silhouette scores for each sample + sample_silhouette_values = silhouette_samples(X, cluster_labels) + + y_lower = 10 + for i in range(n_clusters): + # Aggregate the silhouette scores for samples belonging to + # cluster i, and sort them + ith_cluster_silhouette_values = \ + sample_silhouette_values[cluster_labels == i] + + ith_cluster_silhouette_values.sort() + + size_cluster_i = ith_cluster_silhouette_values.shape[0] + y_upper = y_lower + size_cluster_i + + color = cm.spectral(float(i) / n_clusters) + ax1.fill_betweenx(np.arange(y_lower, y_upper), + 0, ith_cluster_silhouette_values, + facecolor=color, edgecolor=color, alpha=0.7) + + # Label the silhouette plots with their cluster numbers at the middle + ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i)) + + # Compute the new y_lower for next plot + y_lower = y_upper + 10 # 10 for the 0 samples + + ax1.set_title("The silhouette plot for the various clusters.") + ax1.set_xlabel("The silhouette coefficient values") + ax1.set_ylabel("Cluster label") + + # The vertical line for average silhouette score of all the values + ax1.axvline(x=silhouette_avg, color="red", linestyle="--") + + ax1.set_yticks([]) # Clear the yaxis labels / ticks + ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1]) + + # 2nd Plot showing the actual clusters formed + colors = cm.spectral(cluster_labels.astype(float) / n_clusters) + ax2.scatter(X[:, 0], X[:, 1], marker='.', s=30, lw=0, alpha=0.7, + c=colors) + + # Labeling the clusters + centers = clusterer.cluster_centers_ + # Draw white circles at cluster centers + ax2.scatter(centers[:, 0], centers[:, 1], + marker='o', c="white", alpha=1, s=200) + + for i, c in enumerate(centers): + ax2.scatter(c[0], c[1], marker='$%d$' % i, alpha=1, s=50) + + ax2.set_title("The visualization of the clustered data.") + ax2.set_xlabel("Feature space for the 1st feature") + ax2.set_ylabel("Feature space for the 2nd feature") + + plt.suptitle(("Silhouette analysis for KMeans clustering on sample data " + "with n_clusters = %d" % n_clusters), + fontsize=14, fontweight='bold') + + plt.show() diff --git a/VAETraining.py b/VAETraining.py new file mode 100644 index 0000000..43051c1 --- /dev/null +++ b/VAETraining.py @@ -0,0 +1,146 @@ +from keras.layers import (Input, TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, MaxPooling2D, + Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda) + +from keras.models import Model, Sequential +from keras import backend as K +from keras.metrics import binary_crossentropy + + +import numpy as np +import pickle +from math import sqrt + +from Trainer import Trainer + + +def get_batch(X, size): + a = np.random.choice(len(X), size, replace=False) + return X[a] + + +def load_preprocesseed_data(filename): + if not filename.endswith('.pik'): + raise TypeError('input File needs to be a Pickle object ".pik"!') + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15 +# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf +def slerp(val, low, high): + """Spherical interpolation. val has a range of 0 to 1.""" + if val <= 0: + return low + elif val >= 1: + return high + elif np.allclose(low, high): + return low + # noinspection PyTypeChecker + omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15)) + so = np.sin(omega) + re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high + return re * np.random.rand(low.shape[-1]) + + +if __name__ == '__main__': + K.set_image_dim_ordering('tf') + + # Data PreProcessing + trackCollection = load_preprocesseed_data('test_track.pik') # Tate_10000 + + '''HERE IS THE TRAINING!!!!!''' + T = Trainer('vae', trackCollection, 10, categorical_distribution=0, + batchSize=1, timesteps=5, filters=10) # BatchSize: 400 for Training / 1 for Testing + + # PreStage 1: Encoder Input + enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input') + + # Stage 1: Encoding + enc_seq = Sequential(name='Encoder') + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1), + name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1))) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1')) + + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1), + name='Conv2')) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2')) + + enc_seq.add(TimeDistributed(Flatten(), name='Flatten')) + enc_seq.add(LSTM(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode')) + + encoding = enc_seq(enc_input) + + # Stage 2: Bottleneck + out_z_mean = Dense(T.classes, name='Dense_Mean')(encoding) + out_z_log_var = Dense(T.classes, name='Std_Dev')(encoding) + + + def sampling(args, batch_size=500, classes=3, epsilon_std=0.01): + z_mean, z_log_var = args + epsilon = K.random_normal(shape=(batch_size, classes), mean=0., stddev=epsilon_std) + return z_mean + K.exp(z_log_var / 2) * epsilon + + + z = Lambda(sampling, name='Sampling', arguments={'batch_size': T.batchSize, + 'classes': T.classes, + 'epsilon_std': 0.01})([out_z_mean, out_z_log_var]) + + # Stage 3: Decoding + dec_seq = Sequential(name='Decoder') + + dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,))) + dec_seq.add(LSTM(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='LSTM_Decode')) + + reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters)) + dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape')) + + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1), + name='DeConv1')) + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1), + name='DeConv2')) + + dec_output = dec_seq(z) + + # Loss function minimized by autoencoder + def vae_objective(true, pred): + true = K.reshape(true, (-1, T.original_dim)) # ! + pred = K.reshape(pred, (-1, T.original_dim)) # ! + loss = binary_crossentropy(true, pred) + kl_regu = -.5 * K.sum(1. + out_z_log_var - K.square( + out_z_mean) - K.exp(out_z_log_var), axis=-1) + return loss + kl_regu + + # Model + T.set_model(Model(inputs=enc_input, outputs=dec_output), vae_objective) + + # Separate encoder from input to latent space + encoder = K.function([enc_input], [out_z_mean]) + T.set_encoder(encoder) + + # Generatorfrom latent to input space + decoder_input = Input(shape=(T.classes,)) + decoder_output = dec_seq(decoder_input) + T.set_generator(Model(inputs=decoder_input, outputs=decoder_output)) + + if False: + T.train('VAEweights') + T.save_weights('VAEweights') + if True: + T.load_weights('VAEweights') + if False: + T.plot_model('vae.png', show_shapes=True, show_layer_names=True) + if False: + T.color_track(trackCollection[list(trackCollection.keys())[2200]], + nClusters=10, cMode='kmeans', aMode='None') # 2600 + # T.color_random_track(completeSequence=False, nClusters=10, cMode='kmeans', aMode='None') + if False: + T.show_prediction(200) + if False: + T.sample_latent(200) + if False: + T.multi_path_coloring(10, fileName='', state='') + if True: + T.show_silhouette_score([120]) diff --git a/VAE_Gumble_Softmax.pdf b/VAE_Gumble_Softmax.pdf new file mode 100644 index 0000000..01cc95f Binary files /dev/null and b/VAE_Gumble_Softmax.pdf differ diff --git a/VAEtate.py b/VAEtate.py new file mode 100644 index 0000000..5b60dcd --- /dev/null +++ b/VAEtate.py @@ -0,0 +1,159 @@ +from keras.layers import (Input, TimeDistributed, Dense, GRU, UpSampling2D, RepeatVector, MaxPooling2D, + Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda) + +from keras.models import Model, Sequential +from keras import backend as K +from keras.metrics import binary_crossentropy + + +import numpy as np +import pickle +from math import sqrt + +from Trainer import Trainer + + +def get_batch(X, size): + a = np.random.choice(len(X), size, replace=False) + return X[a] + + +def load_preprocesseed_data(filename): + """ + + :param filename: The Filename of the pikled Dataset to lod + :type filename: str + :return: Trackcollection + :rtype: tools.TrackCollection + """ + if not filename.endswith('.pik'): + raise TypeError('input File needs to be a Pickle object ".pik"!') + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15 +# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf +def slerp(val, low, high): + """Spherical interpolation. val has a range of 0 to 1.""" + if val <= 0: + return low + elif val >= 1: + return high + elif np.allclose(low, high): + return low + # noinspection PyTypeChecker + omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15)) + so = np.sin(omega) + re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high + return re * np.random.rand(low.shape[-1]) + + +if __name__ == '__main__': + K.set_image_dim_ordering('tf') + + # Data PreProcessing + tate = load_preprocesseed_data('Tate_1000.pik') + maze = load_preprocesseed_data('Maze_1000.pik') + oet = load_preprocesseed_data('Oet_1000.pik') + tum = load_preprocesseed_data('Tum_1000.pik') + doom = load_preprocesseed_data('Doom_1000.pik') + priz = load_preprocesseed_data('Priz_1000.pik') + cross = load_preprocesseed_data('crossing.pik') + + '''HERE IS THE TRAINING!!!!!''' + # T = Trainer('vae', [maze, tate, oet, tum, doom, priz], 10, categorical_distribution=0, + T = Trainer('vae', [tate, maze, oet, tum, doom, priz], 10, categorical_distribution=0, + batchSize=1, timesteps=9, filters=0, rotating=True) # BatchSize: 1600 for Training / 1 for Testing + + # PreStage 1: Encoder Input + enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input') + + # Stage 1: Encoding + enc_seq = Sequential(name='Encoder') + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1), + name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1))) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1')) + + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1), + name='Conv2')) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2')) + + enc_seq.add(TimeDistributed(Flatten(), name='Flatten')) + enc_seq.add(GRU(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='GRU_Encode')) + + encoding = enc_seq(enc_input) + + # Stage 2: Bottleneck + out_z_mean = Dense(T.classes, name='Dense_Mean')(encoding) + out_z_log_var = Dense(T.classes, name='Std_Dev')(encoding) + + + def sampling(args, batch_size=500, classes=3, epsilon_std=0.01): + z_mean, z_log_var = args + epsilon = K.random_normal(shape=(batch_size, classes), mean=0., stddev=epsilon_std) + return z_mean + K.exp(z_log_var / 2) * epsilon + + + z = Lambda(sampling, name='Sampling', arguments={'batch_size': T.batchSize, + 'classes': T.classes, + 'epsilon_std': 0.01})([out_z_mean, out_z_log_var]) + + # Stage 3: Decoding + dec_seq = Sequential(name='Decoder') + + dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,))) + dec_seq.add(GRU(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='GRU_Decode')) + + reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters)) + dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape')) + + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters, kernel_size=(4, 4), strides=1), + name='DeConv1')) + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1), + name='DeConv2')) + + dec_output = dec_seq(z) + + # Loss function minimized by autoencoder + def vae_objective(true, pred): + true = K.reshape(true, (-1, T.original_dim)) # ! + pred = K.reshape(pred, (-1, T.original_dim)) # ! + loss = binary_crossentropy(true, pred) + kl_regu = -.5 * K.sum(1. + out_z_log_var - K.square( + out_z_mean) - K.exp(out_z_log_var), axis=-1) + return loss + kl_regu + + # Model + T.set_model(Model(inputs=enc_input, outputs=dec_output), vae_objective) + + # Separate encoder from input to latent space + encoder = K.function([enc_input], [out_z_mean]) + T.set_encoder(encoder) + + # Generatorfrom latent to input space + decoder_input = Input(shape=(T.classes,)) + decoder_output = dec_seq(decoder_input) + T.set_generator(Model(inputs=decoder_input, outputs=decoder_output)) + + if False: + # T.load_weights('VAEall_1k_9t_GRU') + T.train('VAEall_1k_9t_GRU') + T.save_weights('VAEall_1k_9t_GRU') + if True: + T.load_weights('VAEall_1k_9t_GRU') + if False: + T.plot_model('vae.png', show_shapes=True, show_layer_names=True) + if False: + T.color_track(cross[list(cross.keys())[0]], completeSequence=True, + nClusters=10, cMode='kmeans', aMode='None') # 2600 for the 10k dataset + # T.color_random_track(completeSequence=False, nClusters=10, cMode='kmeans', aMode='None') + if False: + T.show_prediction(200) + if False: + T.sample_latent(200) + if True: + T.multi_path_coloring(10, fileName='all', state='dump', primaryTC=-2, uncertainty=True) diff --git a/VaeGumble.py b/VaeGumble.py new file mode 100644 index 0000000..4f4c4a4 --- /dev/null +++ b/VaeGumble.py @@ -0,0 +1,135 @@ +from keras.layers import (Input, TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, MaxPooling2D, + Convolution2D, Deconvolution2D, Flatten, Reshape, Lambda) + +from keras.models import Model, Sequential + +from keras import backend as K + +from keras.metrics import binary_crossentropy +from keras.activations import softmax + + +import numpy as np +import pickle +from math import sqrt + +from Trainer import Trainer + + +def get_batch(X, size): + a = np.random.choice(len(X), size, replace=False) + return X[a] + + +def load_preprocesseed_data(filename): + if not filename.endswith('.pik'): + raise TypeError('input File needs to be a Pickle object ".pik"!') + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +if __name__ == '__main__': + K.set_image_dim_ordering('tf') + '''HERE IS THE TRAINING!!!!!''' + # Paper From https://github.com/nzw0301/keras-examples/blob/master/gumbel_softmax_vae_MNIST.ipynb + # https://arxiv.org/pdf/1611.01144.pdf + + # Data PreProcessing, keep the Batchsize Shmall because of Small memory 500 Should do, rerun the fitting! + trackCollection = load_preprocesseed_data('test_track.pik') + T = Trainer('gumble', trackCollection, 10, 30, filters=10) + + # PreStage 1: Encoder Input + enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='main_input') + + # Stage 1: Encoding + enc_seq = Sequential(name='Encoder') + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1), + name='Conv1', input_shape=(T.timesteps, T.width, T.height, 1))) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool1')) + + enc_seq.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1), + name='Conv2')) + enc_seq.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2), name='MaxPool2')) + + enc_seq.add(TimeDistributed(Flatten(), name='Flatten')) + enc_seq.add(LSTM(int(enc_seq.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode')) + + encoding = enc_seq(enc_input) + + # Stage 2: Bottleneck + logits_y = Dense(T.classes * T.cD)(encoding) # activation='softmax' ICh denke nicht + + # Sampling Function + def sampling(logits): + U = K.random_uniform(K.shape(logits), 0, 1) + y = logits - K.log(-K.log(U + 1e-20) + 1e-20) # logits + gumbel noise + y = softmax(K.reshape(y, (-1, T.cD, T.classes)) / T.tau) + y = K.reshape(y, (-1, T.cD * T.classes)) + return y + + z = Lambda(sampling,)(logits_y) + + # Stage 3: Decoding + dec_seq = Sequential(name='Decoder') + + dec_seq.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes * T.cD,))) + dec_seq.add(LSTM(enc_seq.layers[-1].output_shape[-1], return_sequences=True, name='LSTM_Decode')) + + reValue = int(sqrt(dec_seq.layers[-1].output_shape[-1]//T.filters)) + dec_seq.add(TimeDistributed(Reshape((reValue, reValue, T.filters)), name='ReShape')) + + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up1')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1), + name='DeConv1')) + dec_seq.add(TimeDistributed(UpSampling2D(2), name='Up2')) + dec_seq.add(TimeDistributed(Deconvolution2D(activation='hard_sigmoid', filters=1, kernel_size=(5, 5), strides=1), + name='DeConv2')) + + dec_output = dec_seq(z) + + # Gumble Loss Function + def gumbel_loss(x, x_hat): + # N = T.cD; M = T.classes + q_y = K.reshape(logits_y, (-1, T.cD, T.classes)) + q_y = softmax(q_y) + log_q_y = K.log(q_y + 1e-20) + kl_tmp = q_y * (log_q_y - K.log(1.0 / T.classes)) + KL = K.sum(kl_tmp, axis=(1, 2)) + x = K.reshape(x, (-1, T.original_dim)) # ! + x_hat = K.reshape(x_hat, (-1, T.original_dim)) # ! + elbo = T.original_dim * binary_crossentropy(x, x_hat) - KL + return elbo + + T.set_model(Model(inputs=enc_input, outputs=dec_output), gumbel_loss, optimizer='adagrad') + + # Generatorfrom latent to input space + decoder_input = Input(shape=(T.classes * T.cD,)) + decoder_output = dec_seq(decoder_input) + T.set_generator(Model(inputs=decoder_input, outputs=decoder_output)) + + # Separate encoder from input to latent space + argmax_y = K.max(K.reshape(logits_y, (-1, T.cD, T.classes)), axis=-1, keepdims=True) + argmax_y = K.equal(K.reshape(logits_y, (-1, T.cD, T.classes)), argmax_y) + encoder = K.function([enc_input], [argmax_y]) + T.set_encoder(encoder) + + if False: + T.train('GumbleLSTMWeights') + T.save_weights('GumbleLSTMWeights') + if True: + T.load_weights('GumbleWeights') + if False: + T.plot_model('GumbleLSTM.png', show_shapes=True, show_layer_names=True) + if False: + # T.color_track(trackCollection[list(trackCollection.keys())[2200]], nClusters=4) # 2600 + T.color_random_track(completeSequence=False, nClusters=4) + if False: + T.show_prediction(200, startI=200) + if False: + T.sample_latent(200) + if False: + T.multi_path_coloring(10) + if True: + T.show_silhouette_score([2,4,6,8,10,12,14]) + diff --git a/Variational_lmageSequence_LSTM_Autoencoder.pdf b/Variational_lmageSequence_LSTM_Autoencoder.pdf new file mode 100644 index 0000000..96d754a Binary files /dev/null and b/Variational_lmageSequence_LSTM_Autoencoder.pdf differ diff --git a/indoor_Navigation_master.py b/indoor_Navigation_master.py new file mode 100644 index 0000000..d25b7e4 --- /dev/null +++ b/indoor_Navigation_master.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from __future__ import division, absolute_import +from PIL import Image +import numpy as np +from pylab import imshow, savefig # , show, get_cmap +from tools import Worker, Isovist, TrackCollection, IndoorToolset # , Track, IsovistCollection + +# import tensorflow as tf + + +if __name__ == '__main__': + walkableTiles = 255 + w = Worker() + # file = 'maps\Tate.bmp' + # file = 'maps\Map.bmp' + # file = 'maps\Maze.bmp' + # file = 'maps\oet.bmp' + # file = 'maps\doom.bmp' + # file = 'maps\priz.bmp' + # file = 'maps\\tum.bmp' + file = 'maps\crossing.bmp' + with Image.open(file) as f: + baseToolset = IndoorToolset(np.array(f), walkableTiles, worker=w, isoVistSize=30) + + baseToolset.refresh_random_clock() + + """HEATMAP, Random""" + # Heatmap with random starts/targets + if False: + pCol = TrackCollection(baseToolset, worker=w) + pCol.add_n_bunch_random(100, penalty=2, safe=True) + pCol.show(graphUpdate=True, saveIMG=True) + + """Homotroph Analysis""" + # Homotrphic Colored Pixels with same Starts/Targets, vertices iterating over all walkable pixels. + if False: + st = baseToolset.getRandomPos() + baseToolset.refresh_random_clock() + ta = baseToolset.getRandomPos() + pCol = TrackCollection(baseToolset) + + pCol.add_n_bunch_tracks('all', st, ta) + + pCol.homotopic_classification() + pCol.show(hClass=True, saveIMG=True) + + """Principal Convex Hull Analysis (PCHA) / Archetypal Analysis.""" + # refer to https://github.com/ulfaslak/py_pcha for further Information and original Editor + if False: + if True: + st = baseToolset.getRandomPos() + baseToolset.refresh_random_clock() + ta = baseToolset.getRandomPos() + if False: # NOTE: use this with Maze.bmp!!!!!! + st = (98, 6) + ta = (98, 195) + if False: # NOTE: use this with Map.bmp!!!!!! + st = (15, 15) + ta = (190, 280) + pCol = TrackCollection(baseToolset) + # ToDo: Print the k value as chart + # noinspection PyUnboundLocalVariable + pCol.add_n_bunch_tracks(20, st, ta, penalty=2) + pCol.homotopic_classification() + if False: + ArchetypeList = pCol.return_archetypes(2) + pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True) + ArchetypeList = pCol.return_archetypes(4) + pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True) + ArchetypeList = pCol.return_archetypes(5) + pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True) + ArchetypeList = pCol.return_archetypes(3) + pCol.show(saveIMG=True, trackList=ArchetypeList, allTracks=True) + + """Isovists Testing""" + # Some Explanation text here + if False: + point = baseToolset.getRandomPos() + print(point) + i = Isovist(*point, array=baseToolset.imgArray, walkable=walkableTiles, rangeLimit=30) + i.saveImg() + baseToolset.imgArray[point] = 160 + imshow(baseToolset.imgArray) + savefig('startpos.tif', interpolation='none', cmap='gray') + + """Synthesyse n-bunch Tracks/ minimum length / storage for TensorFlow""" + # Some Explenation text here + if True: + pCol = TrackCollection(baseToolset) + pCol.add_single_track((67, 103), (67, 64), qhull=False) + pCol.add_single_track((47, 82), (88, 82), qhull=False) + #pCol.add_n_bunch_random( + # 1000, penalty=None, safe=False, + # minLen=50 + # # minLen=int(sqrt(baseToolset.width * baseToolset.height) / 4) + #) + + pCol.save_to_disc('crossing') + + # pCol3 = TrackCollection(baseToolset) + # pCol3.recover_from_disc('synthetic_tracks_sizeDIV4') + + print('Success!') + pass + pass diff --git a/maps/Map.bmp b/maps/Map.bmp new file mode 100644 index 0000000..c4fa782 Binary files /dev/null and b/maps/Map.bmp differ diff --git a/maps/Maze.bmp b/maps/Maze.bmp new file mode 100644 index 0000000..b6e4fe8 Binary files /dev/null and b/maps/Maze.bmp differ diff --git a/maps/Tate.bmp b/maps/Tate.bmp new file mode 100644 index 0000000..2224ef6 Binary files /dev/null and b/maps/Tate.bmp differ diff --git a/maps/crossing.bmp b/maps/crossing.bmp new file mode 100644 index 0000000..075befb Binary files /dev/null and b/maps/crossing.bmp differ diff --git a/maps/doom.bmp b/maps/doom.bmp new file mode 100644 index 0000000..bfbbb54 Binary files /dev/null and b/maps/doom.bmp differ diff --git a/maps/moep.bmp b/maps/moep.bmp new file mode 100644 index 0000000..f1fdd93 Binary files /dev/null and b/maps/moep.bmp differ diff --git a/maps/oet.bmp b/maps/oet.bmp new file mode 100644 index 0000000..c93c030 Binary files /dev/null and b/maps/oet.bmp differ diff --git a/maps/priz.bmp b/maps/priz.bmp new file mode 100644 index 0000000..99fb858 Binary files /dev/null and b/maps/priz.bmp differ diff --git a/maps/tum.bmp b/maps/tum.bmp new file mode 100644 index 0000000..ea049eb Binary files /dev/null and b/maps/tum.bmp differ diff --git a/refinedTraining.py b/refinedTraining.py new file mode 100644 index 0000000..e153262 --- /dev/null +++ b/refinedTraining.py @@ -0,0 +1,117 @@ +from keras.layers import TimeDistributed, Dense, LSTM, UpSampling2D, RepeatVector, \ + MaxPooling2D, Convolution2D, Deconvolution2D, Flatten, Reshape, Input +from keras.models import Sequential, Model + +from keras import backend as K + +import numpy as np +import pickle +from math import sqrt + +from Trainer import Trainer + + +def get_batch(X, size): + a = np.random.choice(len(X), size, replace=False) + return X[a] + + +def load_preprocesseed_data(filename): + if not filename.endswith('.pik'): + raise TypeError('input File needs to be a Pickle object ".pik"!') + with open(filename, 'rb') as f: + data = pickle.load(f) + return data + + +# https://github.com/dribnet/plat/blob/master/plat/interpolate.py#L15 +# https://pdfs.semanticscholar.org/f46c/307d4d73e86412e0c57161fb52f7591e124b.pdf +def slerp(val, low, high): + """Spherical interpolation. val has a range of 0 to 1.""" + if val <= 0: + return low + elif val >= 1: + return high + elif np.allclose(low, high): + return low + + # noinspection PyTypeChecker + omega = np.arccos(round(np.dot(low/np.linalg.norm(low), high/np.linalg.norm(high)), 15)) + so = np.sin(omega) + re = np.sin((1.0-val)*omega) / so * low + np.sin(val*omega)/so * high + return re * np.random.rand(low.shape[-1]) + + +if __name__ == '__main__': + '''HERE IS THE TRAINING!!!!!''' + trackCollection = load_preprocesseed_data('test_track.pik') + K.set_image_dim_ordering('tf') + T = Trainer('refined', trackCollection, 10, categorical_distribution=0, batchSize=400, filters=10) + + # PreStage 1: Encoder Input + enc_input = Input(shape=(T.timesteps, T.width, T.height, 1), name='Main_Input') + + enc = Sequential() + enc.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(3, 3), strides=1, + name='Conv1'), input_shape=(T.timesteps, T.width, T.height, 1))) + enc.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2, name='MaxPool1'))) + + enc.add(TimeDistributed(Convolution2D(activation='relu', filters=T.filters, kernel_size=(5, 5), strides=1, + name='Conv2'))) + enc.add(TimeDistributed(MaxPooling2D(pool_size=2, strides=2, name='MaxPool2'))) + + enc.add(TimeDistributed(Flatten(name='Flatten'))) + enc.add(LSTM(int(enc.layers[-1].output_shape[-1]), return_sequences=False, name='LSTM_Encode')) + + encoding = enc(enc_input) + + # Stage 2: Bottleneck + z = Dense(T.classes, activation='softmax', name='Clustering')(encoding) + + # + dec = Sequential() + dec.add(RepeatVector(T.timesteps, name='TimeRepeater', input_shape=(T.classes,))) + dec.add(LSTM(enc.layers[-2].output_shape[-1], return_sequences=True, name='LSTM_Decode')) + + reValue = int(sqrt(dec.layers[-1].output_shape[-1]//T.filters)) + dec.add(TimeDistributed(Reshape((reValue, reValue, T.filters)))) + + dec.add(TimeDistributed(UpSampling2D(2, name='Up1'))) + + dec.add(TimeDistributed(Deconvolution2D(activation='relu', filters=T.filters//2, kernel_size=(4, 4), strides=1, + name='DeConv1'))) + dec.add(TimeDistributed(UpSampling2D(2, name='Up2'))) + + dec.add(TimeDistributed(Deconvolution2D(activation='relu', filters=1, kernel_size=(5, 5), strides=1, + name='DeConv2'))) + + dec_output = dec(z) + + T.set_model(Model(inputs=enc_input, outputs=dec_output), optimizer='adagrad', loss='binary_crossentropy') + + decoder_input = Input(shape=(T.classes,)) + decoded = dec(decoder_input) + T.set_generator(Model(inputs=decoder_input, outputs=decoded)) + + encoder = K.Function([enc_input], [z]) + T.set_encoder(encoder) + + if False: + T.train('refinedWeights') + T.save_weights('refinedWeights') + if True: + T.load_weights('refinedWeights') + if False: + T.plot_model('refined.png', show_shapes=True, show_layer_names=True) + if False: + # T.color_track(trackCollection[list(trackCollection.keys())[2200]]) # 2600 + T.color_track(trackCollection[list(trackCollection.keys())[2200]], nClusters=4) # 2600 + # T.color_random_track(completeSequence=True) + if False: + T.show_prediction(200) + if False: + T.sample_latent(200) + if False: + T.multi_path_coloring(10) + if True: + T.show_silhouette_score([120]) diff --git a/tools.py b/tools.py new file mode 100644 index 0000000..f595736 --- /dev/null +++ b/tools.py @@ -0,0 +1,986 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +from __future__ import division, absolute_import +import pickle +from multiprocessing import Pool +import networkx as nx +from PIL import Image, ImageDraw +from math import sqrt, hypot, degrees, atan2 +import random +import numpy as np +import time +from pylab import imshow, show, get_cmap, savefig +from collections import UserList, UserDict +import scipy.spatial as sp +from scipy import ndimage +from PCHA import PCHA +# from py_pcha.PCHA import PCHA +from operator import itemgetter +from dtw import dtw + +workercount = 4 + +class Worker(object): + def __init__(self, n=workercount): + self.pool = Pool(processes=n) + self.timer = time.clock() + + def do_work_onClass(self, itemList, taskName, *args): + results = [] + for item in itemList: + task = getattr(item, taskName) + results.append(self.pool.apply_async(task, args=(*args,))) + self.pool.close() + self.pool.join() + return [r.get() for r in results] + + def do_work(self, itemList, task): + results = [] + for item in itemList: + results.append((self.pool.apply_async(task, args=(item, )))) + self.pool.close() + self.pool.join() + return [r.get() for r in results] + + def do_2_work(self, itemList1, itemList2, task): + if len(itemList1) != len(itemList2): + raise ValueError('ItemLists need to be of same shape!') + results = [] + for item1, item2 in itemList1, itemList2: + results.append((self.pool.apply_async(task, args=(item1, item2, )))) + self.pool.close() + self.pool.join() + return [r.get() for r in results] + + def init_many(self, classObject, argsList): + results = self.pool.starmap(classObject, argsList) + self.pool.close() + self.pool.join() + return results + + +class IsovistCollection(UserDict): + def __init__(self, walkable, rangeLimit, tileArray, worker=None): + super(IsovistCollection, self).__init__() + if not isinstance(worker, Worker): + raise TypeError + self.data = dict() + self.walkable = walkable + self.tileArray = tileArray + self.rangeLimit = rangeLimit + self.lfr = None + if isinstance(self.tileArray, np.ndarray): + workerResult = worker.init_many( + Isovist, [(*npIdx, self.tileArray, self.walkable, self.rangeLimit) + for npIdx, value in np.ndenumerate(self.tileArray) if value == self.walkable]) + self.data = {isovist.vertex: isovist for isovist in workerResult} + # The following would be a non multithreaded approach, maybe activate it for smaller blueprints later + # TODO: Activate this for smaller Blueprints, when multithreading would lead to overhead + # for ndIndex, value in np.ndenumerate(self.tileArray): + # if value == self.walkable: + # self.addIsovist(*ndIndex) + else: + pass + + # TODO Nachträglich mehrere Isovisten durch multiprozesse hinzufügen + def add_isovist(self, x, y): + """ + Generate and add Isovist for specif coordinate. + + :param x: X-Coordinate + :type x: int + :param y: Y-Coordinate + :type y: int + :return: Just a boolean as control-function. + :rtype: bool + """ + + self[(x, y)] = (Isovist(x, y, self.tileArray, self.walkable, self.rangeLimit)) + return True + + @staticmethod + def get_angle(point, target): + """ + Calculate the angle between two points in degrees + https://stackoverflow.com/questions/9970281/java-calculating-the-angle-between-two-points-in-degrees + + :param point: The point from where to measure. + :type point: (int, int) + :param target: The point to where to measure. + :type target: (int, int) + :return: Angle between two points in degree. + :rtype: int + """ + angle = degrees(atan2(target[1] - point[1], target[0] - point[0])) + if angle < 0: + angle += 360 + return int(angle) + + def rotateIsovist(self, isovist, prev, curr): + + """ + Rotate an numpy-array or Isovist class object regarding the angle between the two points. + + How-to-rotate-a-matrix-by-45-degrees: + https://math.stackexchange.com/questions/732679/how-to-rotate-a-matrix-by-45-degrees + https://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.ndimage.interpolation.rotate.html + + :param isovist: Numpy array or Isovist class object. + :type isovist: np.ndarray or Isovist + :param prev: T-1 point comming from. + :type prev: (int, int) + :param curr: T-0 Point currently standing on. + :type curr: (int, int) + :return: + :rtype: np.ndarray or Isovist + """ + if isinstance(isovist, Isovist): + array = isovist.visArray + elif isinstance(isovist, np.ndarray): + array = isovist + else: + raise TypeError('Must be either np.ndarray or Isovist class object, but was: ', type(isovist)) + + # Calculate how many times to rotate by 90° + + cur_angle = self.get_angle(prev, curr) + if self.lfr is not None: + if abs(self.lfr - cur_angle) == 45: + rotation = abs(self.lfr // 90) + else: + self.lfr = cur_angle + rotation = abs(cur_angle // 90) + else: + self.lfr = cur_angle + rotation = abs(cur_angle // 90) + + if isinstance(isovist, Isovist): + isovist.visArray = np.rot90(array, rotation) + return isovist + else: + return np.rot90(array, rotation) + + def get_items_for_track(self, track, dim='flat', in_walk_dir=False): + """ + + :param track: Track class Object or list of int holding path coordinates. + :type track: Track or list of (int, int) + :param dim: Either 'flat' for a flattened numpy array or 'full' for all dimensions. + :type dim: str + :param in_walk_dir: Determine if the Isovist should rotate in walking direktion. + :type in_walk_dir: bool + :return: An array of Isovist arrays, one for each coordinate in track. + :rtype: np.ndarray + """ + if dim not in ['flat', 'full']: + raise ValueError('Dim can either be "flat" or "full".') + if not isinstance(track, Track): + raise TypeError('Please provide a Track object') + if not in_walk_dir: + if dim == 'flat': + return np.dstack([self[point].get_1D_array() for point in track]) + else: + return np.dstack([self[point].visArray for point in track]) + elif in_walk_dir and dim == 'full': + self.lfr = None + return np.dstack([self.rotateIsovist(self[currP].visArray, prevP, currP) + for prevP, currP in zip(track[:-1], track[1:])]) + else: + raise ValueError('For a rotation in walking direction, please choose "full" output-mode.') + + # [nb_samples, nb_frames, width, height, channels] + + +class Isovist(object): + def __init__(self, x, y, array, walkable, rangeLimit): + """ + "Calculate lit squares from the given location and radius through 'Shadow Casting'" + Source: + http://www.roguebasin.com/index.php?title=FOV_using_recursive_shadowcasting + http://www.roguebasin.com/index.php?title=PythonShadowcastingImplementation + + :param x: y-part of the center coordinate from where the 'light' travels + :type x: int + :param y: X-part of the center coordinate from where the 'light' travels + :type y: int + :param array: Numpy Array holding the background image + :type array: np.ndarray + :param walkable: The value which identifies positions in the array through which light can travel + :type walkable: int or (int, int, int) + :param rangeLimit: Determine the radius in which pixels of the shadow needs to be calculated + :type rangeLimit: int + """ + mult = [[1, 0, 0, -1, -1, 0, 0, 1], + [0, 1, -1, 0, 0, -1, 1, 0], + [0, 1, 1, 0, 0, -1, -1, 0], + [1, 0, 0, 1, -1, 0, 0, -1]] + self.x = x + self.y = y + self.vertex = (self.x, self.y) + self.rangeLimit = rangeLimit if rangeLimit else int(sqrt(array.shape[0] * array.shape[1])) + self.visArray = np.zeros(array.shape, dtype=bool) + + for octant in range(8): + self.__cast_light(self.x, self.y, 1, 1.0, 0.0, self.rangeLimit, + mult[0][octant], mult[1][octant], + mult[2][octant], mult[3][octant], 0, array, walkable) + + offset = int(rangeLimit/2) + # self.visArray = self.visArray[ + # max(int(x-self.rangeLimit), 0):min(self.visArray.shape[0], int(x+self.rangeLimit)), + # max(int(y-self.rangeLimit), 0):min(self.visArray.shape[1], int(y+self.rangeLimit))] + self.visArray = np.pad(self.visArray, ((offset, offset), (offset, offset)), + mode='constant')[self.x:self.x+rangeLimit, self.y:self.y+rangeLimit] + + self.size = np.sum(self.visArray) + + centroid = ndimage.measurements.center_of_mass(self.visArray.astype(int)) + # centroid = np.average(self.visArray[:,:2], axis=0, weights=self.visArray[:,2]) # TODO: Baue eine np.Lösung + # https://stackoverflow.com/questions/29356825/python-calculate-center-of-mass + self.Xcent = centroid[0] + self.Ycent = centroid[1] + + @staticmethod + def __blocksLight(x, y, array, walkable): + if x < 0 or y < 0: + return True + try: + return False if array[x, y] == walkable else True + except IndexError: + return True + + def __setVisible(self, x, y): + if x > 0 or y > 0: + try: + self.visArray[x, y] = True + return + except IndexError: + return + + def __isVisible(self, x, y): + return self.visArray[x, y] + + def __cast_light(self, cx, cy, row, start, end, radius, xx, xy, yx, yy, idx, array, walkable): + """Recursive lightcasting function""" + if start < end: + return + radius_squared = radius * radius + for j in range(row, radius + 1): + dx, dy = -j - 1, -j + blocked = False + while dx <= 0: + dx += 1 + # Translate the dx, dy coordinates into map coordinates: + X, Y = cx + dx * xx + dy * xy, cy + dx * yx + dy * yy + # l_slope and r_slope store the slopes of the left and right + # extremities of the square we're considering: + l_slope, r_slope = (dx - 0.5) / (dy + 0.5), (dx + 0.5) / (dy - 0.5) + if start < r_slope: + continue + elif end > l_slope: + break + else: + # Our light beam is touching this square; light it: + if dx * dx + dy * dy < radius_squared: + self.__setVisible(X, Y) + if blocked: + # we're scanning a row of blocked squares: + if self.__blocksLight(X, Y, array, walkable): + new_start = r_slope + continue + else: + blocked = False + start = new_start + else: + if self.__blocksLight(X, Y, array, walkable) and j < radius: + # This is a blocking square, start a child scan: + blocked = True + self.__cast_light(cx, cy, j + 1, start, l_slope, + radius, xx, xy, yx, yy, idx + 1, array, walkable) + new_start = r_slope + # Row is scanned; do next row unless last square was blocked: + if blocked: + break + + def saveImg(self, filename='Isovist.tif'): + filename = filename if filename.endswith('.tif') else '%s.tif' % filename + imshow(self.visArray, interpolation='none', cmap='gray') + savefig(filename) + + def get_1D_array(self): + return self.visArray.ravel() + + +class TrackCollection(UserDict): + def __init__(self, indoorToolset, worker=None): + """ + :param indoorToolset: An indoorToolset with baseMap holding cell values + :type indoorToolset: indoor_Toolset + :rtype: TrackCollection + """ + if not isinstance(indoorToolset, IndoorToolset): + raise TypeError + super(UserDict, self).__init__() + self.data = dict() + self.map = indoorToolset + self.hDict = dict() + self.archeArray = None + self.maxLen = self.__update_maxLen() + self.worker = worker if worker else Worker(n=workercount) + + def __update_maxLen(self): + if self: + return max([len(self[track]) for track in self.keys()]) + else: + return 0 + + def homotopic_classification(self): + print('Homotopic Classification started!') + baseArray = self.map.imgArray.astype(bool).astype(int) + + massNames = list(self.keys()) + for track in massNames.copy(): + found = False + for key in reversed(list(self.hDict.keys())): + if self[key].isHomotopic(self[track], self.map, baseArray=baseArray): + self.hDict[key].append(track) + self[track].group = key + found = True + break + if not found: + self.hDict[track] = [track] + self[track].group = track + + print('All Done\n%i Classes could be observed.' % len(self.hDict)) + return True + + def add_single_track(self, start, target, penalty=None, qhull=True): + track = self.map.calculate_path(start, target, penalty=penalty, qhull=qhull) + key = self.__find_list_middle(track) + track.vertex = key + self[key] = track + return True + + + def add_n_bunch_tracks(self, n, start, target, nbunch=None, penalty=None): + + def build_track(segment1, segment2): + combined = list() + combined.extend(segment1 + list(reversed(segment2))) + return Track(combined, self.map.walkableTile) + + if isinstance(penalty, int) or isinstance(penalty, float): + for i in range(n): + track = self.map.calculate_path(start, target, penalty=penalty) + key = self.__find_list_middle(track) + track.vertex = key + self[key] = track + + else: + if nbunch and not n: + if isinstance(nbunch, list): + if all([isinstance(track, Track) for track in nbunch]): + for i, track in enumerate(nbunch): + key = self.__find_list_middle(track) + track.vertex = key + self[key] = track + n = i + + else: + singleSourceDij_S = nx.single_source_dijkstra_path(self.map.graph, start, weight='weight') + print('Start-Tree Created!') + singleSourceDij_T = nx.single_source_dijkstra_path(self.map.graph, target, weight='weight') + print('Target-Tree Created!') + + if isinstance(n, str): + if n == 'all': + n = len(self.map.graph) + + if n > 10: + allNodes = self.map.graph.nodes() + [allNodes.remove(p) for p in [start, target]] + if len(allNodes) > n: + rand = random.Random() + while len(allNodes) > n: + rand.seed(time.clock()) + allNodes.remove(rand.randrange(len(allNodes))) + allTracks = self.worker.do_2_work( + [singleSourceDij_S[point] for point in allNodes if + singleSourceDij_S.get(point, None) and singleSourceDij_T.get(point, None)], + [singleSourceDij_T[point] for point in allNodes if + singleSourceDij_S.get(point, None) and singleSourceDij_T.get(point, None)], + build_track) + for track in allTracks: + self[track.vertex] = track + + else: + for i in range(n): + point = self.map.getRandomPos() + segmentS = singleSourceDij_S[point] + segmentT = singleSourceDij_T[point] + self[point] = build_track(segmentS, segmentT) + self[point].vertex = point + self.maxLen = self.__update_maxLen() + print('%i tracks added!!' % n) + return True + + @staticmethod + def __find_list_middle(input_list): + middle = float(len(input_list)) / 2 + if middle % 2 != 0: + return input_list[int(middle - .5)] + else: + return input_list[int(middle - 1)] + + def add_n_bunch_random(self, n, penalty=None, safe=True, minLen=0): + """ + :type n: int + :type penalty: float or int or None + :type safe: bool + :type minLen: int + :rtype: bool + """ + + if not isinstance(n, int) or not isinstance(minLen, int): + raise TypeError + + if n >= 50: + results = self.worker.do_work([None] * n, getattr(self.map, 'return_random_path')) + # TODO: Hier geht es weiter --> Pool object in self.map class maybe change the multiprocess call + for track in results: + while minLen and len(track) <= minLen: + print('Was too Small..') + track = self.map.return_random_path(penalty=penalty, safe=safe) + mid = self.__find_list_middle(track) + track.vertex = mid + self[mid] = track + + else: + for i in range(n): + track = self.map.return_random_path(penalty=penalty, safe=safe) + + while minLen and len(track) <= minLen: + track = self.map.return_random_path(penalty=penalty, safe=safe) + mid = self.__find_list_middle(track) + track.vertex = mid + self[mid] = track + + self.maxLen = self.__update_maxLen() + print('returning %i Tracks' % n) + return True + + def show(self, graphUpdate=False, saveIMG=False, hClass=False, trackList=None, allTracks=False): + """ + :param graphUpdate: Determine whether Node Values in Connected Graph are used. + :type graphUpdate: bool + :param saveIMG: Additionally save it as ".tif"-File + :type saveIMG: bool + :param hClass: Draw the hClasses + :type hClass: bool + :param trackList: None or list + :param allTracks: Show all Tracks grey in Background + :type allTracks: bool + :return : Show or Print the Bitmap + :rtype : None + """ + if allTracks: + allTracks = self.values() + if hClass: + self.map.show(self.hDict, hClass=True, graphUpdate=graphUpdate, saveIMG=saveIMG, + trackList=trackList, allTracks=allTracks) + else: + self.map.show(graphUpdate=graphUpdate, saveIMG=saveIMG, trackList=trackList, allTracks=allTracks) + + def fill_archeArray(self): + idxLenList = [[idx, trck.tracklen] for idx, trck in zip(self.keys(), self.values())] + idxLenList.sort(key=itemgetter(1)) + + p = Pool() + # TODO: WAS mache ich denn eigentlich hier? + # TODO: Für sowas habe ich doch einen Worker Pool und brauche keinen neuen zu deployen + poolResults = [] + t = time.clock() + print('All_Core_MultiProcessing_Pool started\n' + 'Set Up & Loaded with %i tasks \ntime is %f' % ((len(self)), t)) + for track in self.keys(): + poolResults.append(p.apply_async(extract_arch_attributes, + args=(self[track], self[idxLenList[0][0]], list(self.hDict.keys()),) + )) + p.close() + p.join() + print('Pool closed, merging... time is %F\n%f seconds taken...' % (time.clock(), time.clock() - t)) + + for result in poolResults: + if self.archeArray is not None: + self.archeArray = np.vstack([self.archeArray, result.get()]) + if self.archeArray is None: + self.archeArray = result.get() + + print(len(self.archeArray) if self.archeArray is not None else 0, ' Attributes Added!\n') + return True + + def return_archetypes(self, k): + if not self.hDict: + self.homotopic_classification() + if self.archeArray is None: + self.fill_archeArray() + if self.archeArray is not None: + data = self.archeArray.copy()[:, 1:].T.astype(np.float64) + # https: // github.com / ulfaslak / py_pcha + # X = np.random.random((dimensions, examples)) Transpose array with array.T + # needed to change the data-dtype + XC, S, C, SSE, varexpl = PCHA(data, noc=k, delta=0.1, verbose=False) + + print(' Arc #1, Arc #2, Arc #3 Arc #4\n\n', XC, + '\n\n Variables explained: ', varexpl) + else: + print('not yet implemented') + return False + + # Ref -- http://stackoverflow.com/questions/1401712/how-can-the-euclidean-distance-be-calculated-with-numpy + # Ref2 -- http://stackoverflow.com/questions/8049798/understanding-nested-list-comprehension + # noinspection PyTypeChecker + idxArray = np.argmin([[np.linalg.norm(canidate - arche) + for canidate in self.archeArray[:, 1:]] for arche in XC.T], axis=1) + return [self[key] for key in [self.archeArray[:, 0][idx] for idx in idxArray]] + + def return_walkableTileList(self): + return [npIndex for npIndex, value in np.ndenumerate(self.map.imgArray) if value == self.map.walkableTile] + + def save_to_disc(self, filename): + filename = filename if filename.endswith('.pik') else '%s%s' % (filename, '.pik') + self.worker = None + with open(filename, 'wb') as output: + pickle.dump(self, output, pickle.HIGHEST_PROTOCOL) + self.worker = Worker(n=workercount) + + def recover_from_disc(self, filename): + filename = filename if filename.endswith('.pik') else '%s%s' % (filename, '.pik') + with open(filename, 'rb') as file: + pick = pickle.load(file) + self.data = pick.data + self.map = pick.map + self.hDict = pick.hDict + self.archeArray = pick.archeArray + self.maxLen = self.__update_maxLen() + + def as_n_sample_4D(self, timessteps, moving_window=False, stackSize=0, start=0, + in_walk_dir=False, keys=False, for_track=None): + stackList, keyList = list(), list() + if moving_window: + for i, key in enumerate(self.keys()): + + if for_track: + track = self[for_track] + else: + track = self[key] + + if stackSize and len(stackList) > stackSize: + stackList = stackList[:stackSize] + if keys: + keyList = keyList[:stackSize] + break + if i >= start: + isoArray = self.map.isovists.get_items_for_track(track, dim='full', in_walk_dir=in_walk_dir) + isoArray = isoArray.swapaxes(0, 2) + + tempSequence = [isoArray[j:j + timessteps] for j in range(len(isoArray) - (timessteps - 1))] + + if keys: + tempKeys = [track[(j + 1) + timessteps // 2] for j in range(len(tempSequence))] + keyList.extend(tempKeys) + + stackList.extend(tempSequence) + if for_track: + break + + else: + for i, key in enumerate(self.keys()): + if for_track: + track = self[for_track] + else: + track = self[key] + if stackSize and i > stackSize*(start+1): + break + if i >= start*stackSize: + diaStack = self.map.isovists.get_items_for_track(track, dim='full', + in_walk_dir=in_walk_dir).swapaxes(0, 2) + x = diaStack.shape[0] // timessteps + tempSequence = np.array_split(diaStack[:timessteps*x], x) + + if keys: + tempKeys = [track[(j * timessteps) + 1 + (timessteps // 2)] for j in range(len(tempSequence))] + keyList.extend(tempKeys) + + stackList.extend(tempSequence) + if for_track: + break + + if keys: + return keyList, np.stack(stackList).astype(int)[..., None] + else: + return np.stack(stackList).astype(int)[..., None] + + def as_flatTfArray(self, maxLen=0): + if maxLen == -1: # Ignore the maxLen Parameter + return np.dstack([self.map.isovists.get_items_for_track(self[key]) for key in self.keys()]) + if not maxLen: # Default maxLen of longest track in collection, not calles when: maxLen >= 1 + maxLen = self.maxLen + resultArray = None + for track in self: + sizedArray = np.dstack([self.map.isovists[self[track][-1]].get_1D_array() for _ in range(maxLen)]) + isoArray = self.map.isovists.get_items_for_track(self[track]) + sizedArray[:isoArray.shape[0], :isoArray.shape[1], :isoArray.shape[2]] = isoArray + if resultArray is not None: + resultArray = np.vstack([resultArray, sizedArray.ravel()]) + elif resultArray is None: + resultArray = sizedArray.ravel() + return resultArray + + def draw_track(self, key, saveIMG=''): + imArray = self.map.imgArray.copy() + for pixel in self[key]: + imArray[pixel] = 15 + imshow(self.map.imgArray) + + if saveIMG: + if not isinstance(saveIMG, str): + raise TypeError('Needs to be a String or Basestring as Name') + saveIMG = saveIMG if saveIMG.endswith('.tif') else '%s.tif' % saveIMG + savefig(saveIMG) + + +class Track(UserList): + def __init__(self, NodeList, walkableTile, qhull=True): + if not isinstance(NodeList, list): + raise TypeError + super(UserList, self).__init__() + self.walkableTile = walkableTile + self.data = NodeList.copy() + self.group = None + self.vertex = None + self.hull = sp.ConvexHull(np.array(self)) if qhull else None + + self.tracklen = self.length() + + def __setitem__(self, i, item): + self.data[i] = item + self.tracklen = self.length() + + def __delitem__(self, i): + del self.data[i] + self.tracklen = self.length() + + def length(self, *args): + """ + :param args: Pass a foreign list if Points list(tuple(x,y)), this function then acts as @static method + :type args: [(int,int)] + :return: Sum of distance between every following point. + :rtype: float + Reference: + http://stackoverflow.com/questions/21216841/length-of-a-list-of-points/21217048 + """ + if len(args) == 0: + return sum([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in zip(self[:-1], self[1:])]) + else: + return sum([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in zip(args[0][:-1], args[0][1:])]) + + def areaCHull(self): + # http://stackoverflow.com/questions/21727199/ + # python-convex-hull-with-scipy-spatial-delaunay-how-to-eleminate-points-inside-t + if not self.hull: + self.hull = sp.ConvexHull(np.array(self)) + xList, yList = [self.hull.points[i][0] for i in self.hull.vertices], \ + [self.hull.points[i][1] for i in self.hull.vertices] + # http: // stackoverflow.com / questions / 19873596 / convex - hull - area - in -python + return 0.5 * np.abs(np.dot(xList, np.roll(yList, 1)) - np.dot(yList, np.roll(xList, 1))) + + def getStart(self): + return self[0] + + def getTarget(self): + return self[-1] + + def isHomotopic(self, track, indoorToolset, baseArray=None): + if not isinstance(track, Track): + raise TypeError + l, l2 = self.data.copy(), track.data.copy() + l.extend(reversed(l2)) + + img = Image.new('L', (indoorToolset.width, indoorToolset.height), 0) + ImageDraw.Draw(img).polygon(l, outline=1, fill=1) + binPoly = np.array(img) + + if baseArray is None: + baseArray = indoorToolset.imgArray + + a = (binPoly * baseArray).sum() + if a >= 1: + return False + else: + return True + + def mergeWith(self, track): + """ + :param track: A track to merge with + :type track: Track or list + :return: Two merged tracks + :rtype: Track + """ + if isinstance(track, Track): + l2 = track.data + elif isinstance(track, list): + l2 = track + else: + typ, prefix = str(type(track)), ('a', 'e', 'i', 'o', 'u') + raise TypeError('The has to be a List or a Track, but was %s: "%s"' % + ('an' if typ.startswith(prefix) else 'a', typ)) + l = self.data + l.extend(l2) + return Track(l, self.walkableTile) + + def return_isovists(self, trackCollection=None, indoorToolset=None): + if isinstance(trackCollection, TrackCollection): + return [trackCollection.map.isovists[point] for point in self] + elif isinstance(indoorToolset, IndoorToolset): + return [trackCollection.isovists[point] for point in self] + else: + print('Please provide a TrackCollection or a Indoortoolset that holds the Isovist reference.') + return False + + +class IndoorToolset(object): + def __init__(self, imageArray, walkableTile, graph=None, worker=None, isoVistSize=25): + """ + :param graph: An optional Graph + :type graph: nx.Graph + """ + if not isinstance(imageArray, np.ndarray) or not isinstance(worker, Worker): + raise TypeError + + self.walkableTile = walkableTile + self.imgArray = imageArray + self.shape = self.imgArray.shape + self.height = self.shape[0] + self.width = self.shape[1] + if graph is not None and isinstance(graph, nx.Graph): + self.graph = graph.copy() + else: + self.graph = self.translate_to_graph() + self.__rand = random.Random() + self.isovists = IsovistCollection(self.walkableTile, isoVistSize, self.imgArray, worker=worker) + + def refresh_random_clock(self): + self.__rand.seed(time.clock()) + + def copy(self): + return IndoorToolset(self.imgArray.copy(), self.walkableTile, graph=self.graph.copy()) + + def __len__(self): + return self.width * self.height + + def show(self, *args, graphUpdate=False, saveIMG=False, hClass=False, trackList=None, allTracks=None): + def print_n_show(img, name, hot=False): + # http://stackoverflow.com/questions/9406400/how-can-i-use-a-pre-made-color-map-for-my-heat-map-in-matplotlib + if hot: + imshow(img, cmap=get_cmap("hot")) # , interpolation='nearest') # Additional Option + else: + imshow(img) + if saveIMG: + savefig(name) + show() + + if graphUpdate: + maX = max(nx.get_node_attributes(self.graph, 'count').values()) + for node in self.graph.nodes(): + maX = maX if maX >= 1 else 1 + color = int(self.graph.node[node]['count'] / maX * 100) + self.imgArray[node] = color if color is not 0 else 255 + print_n_show(self.imgArray, "heatMap.tif", hot=True) + + if hClass: + if not args[0]: + print('Not Classified!') + pass + else: + hDict = args[0].copy() + + for i, hClassKey in enumerate(hDict.keys()): + color = i + 1 * 255 / len(hDict) + for track in hDict[hClassKey]: + self.imgArray[track] = int(color) + print_n_show(self.imgArray.copy(), 'hClass.tif') + + if allTracks: + for track in allTracks: + for pixel in track: + self.imgArray[pixel] = 7 + if trackList: + if isinstance(trackList, list): + for i, track in enumerate(trackList): + color = i + 1 * 255 / len(trackList) + for pixel in track: + self.imgArray[pixel] = int(color) + print_n_show(self.imgArray, 'tracks.tif') + + def getRandomPos(self, verbose=False): + """ + :param verbose: Print more infromation + :type verbose: bool + + :return: A random walkable position in the graph + :rtype: (int, int) + """ + self.refresh_random_clock() + rs = self.graph.nodes()[self.__rand.randrange(len(self.graph))] + if verbose: + print(rs, ' is random Position -> checking accessibiliy') + notWalkable = False + if self.imgArray[rs] != self.walkableTile: + notWalkable = True + while notWalkable: + self.refresh_random_clock() + rs = self.graph.nodes()[self.__rand.randrange(len(self.graph))] + if verbose: + print(rs, 'needed to be computed, upper was not walkable') + if verbose: + print('is valid!') + return rs + + def translate_to_graph(self): + graph = nx.Graph() + for idx, value in np.ndenumerate(self.imgArray): + if value == self.walkableTile: + x, y = idx + graph.add_node((x, y), count=0) + + # up + if graph.has_node((x, y - 1)): + graph.add_edge((x, y), (x, y - 1), weight=1) + + # upLeft + if graph.has_node((x - 1, y - 1)): + graph.add_edge((x, y), (x - 1, y - 1), weight=sqrt(2)) + + # lowerLeft + if graph.has_node((x - 1, y + 1)): + graph.add_edge((x, y), (x - 1, y + 1), weight=sqrt(2)) + + # left + if graph.has_node((x - 1, y)): + graph.add_edge((x, y), (x - 1, y), weight=1) + + return graph + + def calculate_path(self, source, target, alg='dijkstra', path=None, penalty=None, qhull=True): + """ + Calculate a path through the graph, based on the Bitmap you imported. + + :param source: (X,Y) Positions Tuple to route from + :type source: (int, int) + :param target: (X,Y) Positions Tuple to route to + :type target: (int, int) + :param alg: Define the Routing Algorithm through the graph + :type alg: basestring + :param path: Use this for Updating edge weights for an allready given Path + in form of a (X,Y) Position Tuple List + :type path: ((int, int)) + :param penalty: Set a Nummer for applying edge weights + :type penalty: None or float + :return: Calculates an Path with the given algorithm, default: 'Dijkstra' + :rtype: Track + """ + + dij_s_p = list() + if not path and not isinstance(path, list): + if alg == 'dijkstra': + dij_s_p = nx.dijkstra_path(self.graph, source, target, weight='weight') + elif alg == 'bi_dijkstra': + dij_s_p = nx.bidirectional_dijkstra(self.graph, source, target, weight='weight') + else: + dij_s_p = path + + if penalty and (isinstance(penalty, float) or isinstance(penalty, int)): + for node in dij_s_p.copy(): + oldCount = self.graph.node[node]['count'] + self.graph.add_node(node, count=oldCount + penalty) + + for currNode, nextNode in zip(dij_s_p[:-1], dij_s_p[1:]): + oldWeight = self.graph.edge[currNode][nextNode]['weight'] + self.graph.add_edge(currNode, nextNode, weight=oldWeight + penalty) + + track = Track(dij_s_p, self.walkableTile, qhull=qhull) + print(len(dij_s_p), ' Len Path generated -> Nodes: ', len(self.graph), ' -> Edges: ', len(self.graph.edges())) + return track + + def return_random_path(self, penalty=None, safe=False): + """ + Calculate a single random shortest path + + :param penalty: Set a Nummer for applying edge weights + :type penalty: None or float + + :param safe: Apply a connectivity check before returning. + Affects the performance, only usefull when dealing with multiple non connected components. + :type safe: bool + + :return: A Random shortest Path. + :rtype: Track + """ + p, p2 = (0, 0), (0, 0) + # while angle in degree modulo 45 == 0 or distance <= sqrt(2) + while degrees(atan2(p[1] - p2[1], p[0] - p2[0])) % 45 == 0 or hypot(p[0] - p2[0], p[1] - p2[1]) <= sqrt(2): + p, p2 = self.getRandomPos(), self.getRandomPos() + print('source: ', p, 'target: ', p2, ' - generated') + if safe: + while not nx.has_path(self.graph, p, p2): + p, p2 = self.getRandomPos(), self.getRandomPos() + print('unconnected -> New Try: S=', p, ' T=', p2) + return self.calculate_path(p, p2, penalty=penalty) + + +# Extraction Function - had to be static because of multiprocessing +def extract_arch_attributes(track, shortestT, hClassList): + attributes = list() + # ▪ TrackID - do not use as real attribute for analysis input!!!!!!!!!! + # remove by: dataArray[:, 1:] Subset without first Attribute + attributes.append(track.vertex) + # ▪ Convex hull’s area, + attributes.append(track.areaCHull()) + # ▪ length, + attributes.append(track.length(track.hull.points)) + # + # Longest Distance between two vertives of the Convex Hull, this is probably the euclidian distance + # between start and target - ups + # cLD = max([hypot(p1[0] - p2[0], p1[1] - p2[1]) for p1, p2 in combinations(track.hull.points, 2)])) + # attributes.append(cLD)) + # + # ▪ vertices, + attributes.append(len(track)) + # ▪ ,centroid - (distance to mipoint between start und target) + # http://stackoverflow.com/questions/31562534/scipy-centroid-of-convex-hull + centroid = list((np.mean(track.hull.vertices[0]), np.mean(track.hull.vertices[1]))) + midpoint = list(((track[0][0] + track[-1][0]) / 2, (track[0][1] + track[-1][1]) / 2)) + cMidDist = hypot(centroid[0] - midpoint[0], centroid[1] - midpoint[1]) + attributes.append(cMidDist) + # ▪ and orientation --http://stackoverflow.com/questions/31735499/calculate-angle-clockwise-between-two-points + # https://docs.python.org/dev/reference/expressions.html#calls + ang1 = np.arctan2(*centroid[::-1]) + ang2 = np.arctan2(*midpoint[::-1]) + attributes.append(np.rad2deg((ang1 - ang2) % (2 * np.pi))) + # ▪ Length + attributes.append(track.tracklen) + # ▪ Angular sum(cancelling / positive) + # Not yet implemented + # ▪ Relative length Regarding the shortest route + attributes.append(track.tracklen / shortestT.tracklen) + # ▪ DTW distance + dist, cost, acc, path = \ + dtw(np.array(track), np.array(shortestT), dist=lambda x, y: np.linalg.norm(x - y, ord=1)) + attributes.append(dist) + # ▪ Pixel’s average / min “heat” + # Not yet implemented + # ▪ Homotrophic class - remap (x,y)Coord-Tuple-Keys to int representation + attributes.append(hClassList.index(track.group)) + return np.array(attributes)