Refactor:

Step 4 - Aggregating Neural Networks
Step 5 - Training Neural Networks
This commit is contained in:
Si11ium 2019-06-08 21:28:38 +02:00
parent 50f7f84084
commit 203c5b45e3

View File

@ -1,6 +1,7 @@
import numpy as np
from abc import abstractmethod, ABC
from typing import List, Union
from types import FunctionType
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.callbacks import Callback
@ -63,6 +64,12 @@ class Weights:
def __getitem__(self, item):
return self.layers[item]
def max(self):
np.max(self.layers)
def avg(self):
return np.average(self.layers)
def __len__(self):
return sum([x.size for x in self.layers])
@ -101,23 +108,24 @@ class Weights:
new_weights = self.__reshape_flat_array__(array, self.shapes())
return new_weights
def shuffle(self):
flat = self.to_flat_array()
np.random.shuffle(flat)
self.from_flat_array(flat)
return True
def are_diverged(self):
return any([np.isnan(x).any() for x in self.layers]) or any([np.isinf(x).any() for x in self.layers])
def are_within_bounds(self, lower_bound: float, upper_bound: float):
return bool(sum([((lower_bound < x) & (x > upper_bound)).size for x in self.layers]))
def apply_new_weights(self, weights: np.ndarray):
# TODO: Make this more Pythonic
new_weights = copy.deepcopy(self.layers)
current_weight_id = 0
for layer_id, layer in enumerate(new_weights):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
new_weight = weights[current_weight_id]
new_weights[layer_id][cell_id][weight_id] = new_weight
current_weight_id += 1
return new_weights
def aggregate_by(self, func: FunctionType, num_aggregates):
collection_sizes = len(self) // num_aggregates
weights = self.to_flat_array()[:collection_sizes * num_aggregates].reshape((num_aggregates, -1))
aggregated_weights = func(weights, num_aggregates)
left_overs = self.to_flat_array()[collection_sizes * num_aggregates:]
return aggregated_weights, left_overs
class NeuralNetwork(ABC):
@ -154,7 +162,12 @@ class NeuralNetwork(ABC):
return self.get_weights().to_flat_array()
def set_weights(self, new_weights: Weights):
return self.model.set_weights(new_weights)
return self.model.set_weights(new_weights.layers)
@abstractmethod
def get_samples(self):
# TODO: add a dogstring, telling the user what this does, e.g. what is a sample?
raise NotImplementedError
@abstractmethod
def apply_to_weights(self, old_weights) -> Weights:
@ -250,12 +263,12 @@ class ParticleDecorator:
def update_state(self, number, **kwargs):
raise NotImplementedError('Result is vague')
if number < len(self.states):
self.states[number] = self.make_state(**kwargs)
else:
for i in range(len(self.states), number):
self.states += [None]
self.states += self.make_state(**kwargs)
# if number < len(self.states):
# self.states[number] = self.make_state(**kwargs)
# else:
# for i in range(len(self.states), number):
# self.states += [None]
# self.states += self.make_state(**kwargs)
def get_states(self):
return self.states
@ -278,15 +291,20 @@ class WeightwiseNeuralNetwork(NeuralNetwork):
# TODO: Write about it... What does it do?
return self.model.predict(inputs)
def get_samples(self):
weights = self.get_weights()
sample = np.asarray([
[weight, idx, *x] for idx, layer in enumerate(weights.layers) for x, weight in np.ndenumerate(layer)
])
# normalize [layer, cell, position]
for idx in range(1, sample.shape[1]):
sample[:, idx] = sample[:, idx] / np.max(sample[:, idx])
return sample, sample
def apply_to_weights(self, weights) -> Weights:
# ToDo: Insert DocString
# Transform the weight matrix in an horizontal stack as: array([[weight, layer, cell, position], ...])
transformed_weights = np.asarray([
[weight, idx, *x] for idx, layer in enumerate(weights.layers) for x, weight in np.ndenumerate(layer)
])
# normalize [layer, cell, position]
for idx in range(1, transformed_weights.shape[1]):
transformed_weights[:, idx] = transformed_weights[:, idx] / np.max(transformed_weights[:, idx])
transformed_weights = self.get_samples()[0]
new_weights = self.apply(transformed_weights)
# use the original weight shape to transform the new tensor
return Weights(new_weights, flat_array_shape=weights.shapes())
@ -295,34 +313,40 @@ class WeightwiseNeuralNetwork(NeuralNetwork):
class AggregatingNeuralNetwork(NeuralNetwork):
@staticmethod
def aggregate_average(weights):
total = 0
count = 0
for weight in weights:
total += float(weight)
count += 1
return total / float(count)
def aggregate_fft(array: np.ndarray, aggregates: int):
flat = array.flatten()
fft_reduction = np.fft.fftn(flat, aggregates)
return fft_reduction
@staticmethod
def aggregate_max(weights):
max_found = weights[0]
for weight in weights:
max_found = weight > max_found and weight or max_found
return max_found
def aggregate_average(array, _):
return np.average(array, axis=1)
@staticmethod
def aggregate_max(array, _):
return np.max(array, axis=1)
@staticmethod
def deaggregate_identically(aggregate, amount):
return [aggregate for _ in range(amount)]
# ToDo: Find a better way than using the a hardcoded [0]
return np.hstack([aggregate for _ in range(amount)])[0]
@staticmethod
def shuffle_not(weights_list):
return weights_list
def shuffle_not(weights: Weights):
"""
Doesn't do a thing. f(x)
:param weights: A List of Weights
:type weights: Weights
:return: The same old weights.
:rtype: Weights
"""
return weights
@staticmethod
def shuffle_random(weights_list):
import random
random.shuffle(weights_list)
return weights_list
def shuffle_random(weights: Weights):
assert weights.shuffle()
return weights
def __init__(self, aggregates, width, depth, **kwargs):
super().__init__(**kwargs)
@ -345,76 +369,38 @@ class AggregatingNeuralNetwork(NeuralNetwork):
return self.params.get('shuffler', self.shuffle_not)
def get_amount_of_weights(self):
total_weights = 0
for layer_id, layer in enumerate(self.get_weights()):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
total_weights += 1
return total_weights
return len(self.get_weights())
def apply(self, *inputs):
stuff = np.transpose(np.array([[inputs[i]] for i in range(self.aggregates)]))
return self.model.predict(stuff)[0]
def apply_to_weights(self, old_weights):
# build aggregations from old_weights
collection_size = self.get_amount_of_weights() // self.aggregates
collections, leftovers = self.collect_weights(old_weights, collection_size)
# call network
old_aggregations = [self.get_aggregator()(collection) for collection in collections]
new_aggregations = self.apply(*old_aggregations)
# generate list of new weights
new_weights_list = []
for aggregation_id, aggregation in enumerate(new_aggregations):
if aggregation_id == self.aggregates - 1:
new_weights_list += self.get_deaggregator()(aggregation, collection_size + leftovers)
else:
new_weights_list += self.get_deaggregator()(aggregation, collection_size)
new_weights_list = self.get_shuffler()(new_weights_list)
# write back new weights
new_weights = self.fill_weights(old_weights, new_weights_list)
# return results
# if self.params.get("print_all_weight_updates", False) and not self.is_silent():
# print("updated old weight aggregations " + str(old_aggregations))
# print("to new weight aggregations " + str(new_aggregations))
# print("resulting in network weights ...")
# print(self.weights_to_string(new_weights))
return new_weights
@staticmethod
def collect_weights(all_weights, collection_size):
collections = []
next_collection = []
current_weight_id = 0
for layer_id, layer in enumerate(all_weights):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
next_collection += [weight]
if (current_weight_id + 1) % collection_size == 0:
collections += [next_collection]
next_collection = []
current_weight_id += 1
collections[-1] += next_collection
leftovers = len(next_collection)
return collections, leftovers
def get_collected_weights(self):
collection_size = self.get_amount_of_weights() // self.aggregates
return self.collect_weights(self.get_weights(), collection_size)
def apply(self, inputs):
# You need to add an dimension here... "..." copies array values
return self.model.predict(inputs[None, ...])
def get_aggregated_weights(self):
collections, leftovers = self.get_collected_weights()
aggregations = [self.get_aggregator()(collection) for collection in collections]
return aggregations, leftovers
return self.get_weights().aggregate_by(self.get_aggregator(), self.aggregates)
def compute_samples(self):
def apply_to_weights(self, old_weights) -> Weights:
# build aggregations of old_weights
old_aggregations, leftovers = self.get_aggregated_weights()
# call network
new_aggregations = self.apply(old_aggregations)
collection_sizes = self.get_amount_of_weights() // self.aggregates
new_aggregations = self.deaggregate_identically(new_aggregations, collection_sizes)
# generate new weights
# only include leftovers if there are some then coonvert them to Weight on base of th old shape
new_weights = Weights(new_aggregations if not leftovers.shape[0] else np.hstack((new_aggregations, leftovers)),
flat_array_shape=old_weights.shapes())
# maybe shuffle
new_weights = self.get_shuffler()(new_weights)
return new_weights
def get_samples(self):
aggregations, _ = self.get_aggregated_weights()
sample = np.transpose(np.array([[aggregations[i]] for i in range(self.aggregates)]))
return [sample], [sample]
# What did that do?
# sample = np.transpose(np.array([[aggregations[i]] for i in range(self.aggregates)]))
return aggregations, aggregations
def is_fixpoint_after_aggregation(self, degree=1, epsilon=None):
assert degree >= 1, "degree must be >= 1"
@ -428,96 +414,15 @@ class AggregatingNeuralNetwork(NeuralNetwork):
if new_weights.are_diverged():
return False
# ToDo: Explain This, what the heck is happening?
collection_size = self.get_amount_of_weights() // self.aggregates
collections, leftovers = self.__class__.collect_weights(new_weights, collection_size)
new_aggregations = [self.get_aggregator()(collection) for collection in collections]
new_aggregations, leftovers = self.get_aggregated_weights()
# ToDo: Explain This, why are you additionally checking tolerances of aggregated weights?
biggerEpsilon = (np.abs(np.asarray(old_aggregations) - np.asarray(new_aggregations)) >= epsilon).any()
# Boolean value hast to be flipped to answer the question.
# Boolean value has to be flipped to answer the question.
return True, not biggerEpsilon
class FFTNeuralNetwork(NeuralNetwork):
@staticmethod
def aggregate_fft(weights, dims):
flat = np.hstack([weight.flatten() for weight in weights])
fft_reduction = np.fft.fftn(flat, dims)[None, ...]
return fft_reduction
@staticmethod
def deaggregate_identically(aggregate, dims):
fft_inverse = np.fft.ifftn(aggregate, dims)
return fft_inverse
@staticmethod
def shuffle_not(weights_list):
return weights_list
@staticmethod
def shuffle_random(weights_list):
import random
random.shuffle(weights_list)
return weights_list
def __init__(self, aggregates, width, depth, **kwargs):
super().__init__(**kwargs)
self.aggregates = aggregates
self.width = width
self.depth = depth
self.model = Sequential()
self.model.add(Dense(units=width, input_dim=self.aggregates, **self.keras_params))
for _ in range(depth-1):
self.model.add(Dense(units=width, **self.keras_params))
self.model.add(Dense(units=self.aggregates, **self.keras_params))
def get_shuffler(self):
return self.params.get('shuffler', self.shuffle_not)
def get_amount_of_weights(self):
total_weights = 0
for layer_id, layer in enumerate(self.get_weights()):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
total_weights += 1
return total_weights
def apply(self, inputs):
sample = np.asarray(inputs)
return self.model.predict(sample)[0]
def apply_to_weights(self, old_weights):
# build aggregations from old_weights
weights = self.get_weights_flat()
# call network
old_aggregation = self.aggregate_fft(weights, self.aggregates)
new_aggregation = self.apply(old_aggregation)
# generate list of new weights
new_weights_list = self.deaggregate_identically(new_aggregation, self.get_amount_of_weights())
new_weights_list = self.get_shuffler()(new_weights_list)
# write back new weights
new_weights = self.fill_weights(old_weights, new_weights_list)
# return results
# if self.params.get("print_all_weight_updates", False) and not self.is_silent():
# print("updated old weight aggregations " + str(old_aggregation))
# print("to new weight aggregations " + str(new_aggregation))
# print("resulting in network weights ...")
# print(self.weights_to_string(new_weights))
return new_weights
def compute_samples(self):
weights = self.get_weights()
sample = np.asarray(weights)[None, ...]
return [sample], [sample]
class RecurrentNeuralNetwork(NeuralNetwork):
def __init__(self, width, depth, **kwargs):
@ -568,22 +473,22 @@ class RecurrentNeuralNetwork(NeuralNetwork):
return sample, sample
class TrainingNeuralNetworkDecorator():
class TrainingNeuralNetworkDecorator:
def __init__(self, net, **kwargs):
self.net = net
def __init__(self, network):
self.network = network
self.compile_params = dict(loss='mse', optimizer='sgd')
self.model_compiled = False
def __getattr__(self, name):
return getattr(self.net, name)
return getattr(self.network, name)
def with_params(self, **kwargs):
self.net.with_params(**kwargs)
self.network.with_params(**kwargs)
return self
def with_keras_params(self, **kwargs):
self.net.with_keras_params(**kwargs)
self.network.with_keras_params(**kwargs)
return self
def get_compile_params(self):
@ -596,7 +501,7 @@ class TrainingNeuralNetworkDecorator():
def compile_model(self, **kwargs):
compile_params = copy.deepcopy(self.compile_params)
compile_params.update(kwargs)
return self.net.model.compile(**compile_params)
return self.network.model.compile(**compile_params)
def compiled(self, **kwargs):
if not self.model_compiled:
@ -606,35 +511,37 @@ class TrainingNeuralNetworkDecorator():
def train(self, batchsize=1, store_states=True, epoch=0):
self.compiled()
x, y = self.net.compute_samples()
savestatecallback = [SaveStateCallback(net=self, epoch=epoch)] if store_states else None
history = self.net.model.fit(x=x, y=y, epochs=epoch+1, verbose=0, batch_size=batchsize, callbacks=savestatecallback, initial_epoch=epoch)
x, y = self.network.get_samples()
savestatecallback = [SaveStateCallback(network=self, epoch=epoch)] if store_states else None
history = self.network.model.fit(x=x, y=y, epochs=epoch+1, verbose=0,
batch_size=batchsize, callbacks=savestatecallback,
initial_epoch=epoch)
return history.history['loss'][-1]
def learn_from(self, other_network, batchsize=1):
self.compiled()
other_network.compiled()
x, y = other_network.net.compute_samples()
history = self.net.model.fit(x=x, y=y, verbose=0, batch_size=batchsize)
x, y = other_network.network.get_samples()
history = self.network.model.fit(x=x, y=y, verbose=0, batch_size=batchsize)
return history.history['loss'][-1]
if __name__ == '__main__':
def run_exp(net, prints=False):
def run_exp(network, prints=False):
# INFO Run_ID needs to be more than 0, so that exp stores the trajectories!
exp.run_net(net, 100, run_id=run_id + 1)
exp.historical_particles[run_id] = net
exp.run_net(network, 100, run_id=run_id + 1)
exp.historical_particles[run_id] = network
if prints:
print("Fixpoint? " + str(net.is_fixpoint()))
print("Fixpoint? " + str(network.is_fixpoint()))
print("Loss " + str(loss))
if True:
if False:
# WeightWise Neural Network
with FixpointExperiment() as exp:
for run_id in tqdm(range(100)):
net = ParticleDecorator(WeightwiseNeuralNetwork(width=2, depth=2) \
.with_keras_params(activation='linear'))
for run_id in tqdm(range(10)):
net = ParticleDecorator(
WeightwiseNeuralNetwork(width=2, depth=2).with_keras_params(activation='linear'))
run_exp(net)
K.clear_session()
exp.log(exp.counters)
@ -642,36 +549,37 @@ if __name__ == '__main__':
if False:
# Aggregating Neural Network
with FixpointExperiment() as exp:
for run_id in tqdm(range(100)):
net = ParticleDecorator(AggregatingNeuralNetwork(aggregates=4, width=2, depth=2) \
.with_keras_params())
for run_id in tqdm(range(10)):
net = ParticleDecorator(
AggregatingNeuralNetwork(aggregates=4, width=2, depth=2).with_keras_params())
run_exp(net)
K.clear_session()
exp.log(exp.counters)
if False:
#FFT Neural Network
# FFT Aggregation
with FixpointExperiment() as exp:
for run_id in tqdm(range(100)):
net = ParticleDecorator(FFTNeuralNetwork(aggregates=4, width=2, depth=2) \
.with_keras_params(activation='linear'))
for run_id in tqdm(range(10)):
net = ParticleDecorator(
AggregatingNeuralNetwork(
aggregates=4, width=2, depth=2, aggregator=AggregatingNeuralNetwork.aggregate_fft
).with_keras_params(activation='linear'))
run_exp(net)
K.clear_session()
exp.log(exp.counters)
if False:
if True:
# ok so this works quite realiably
with FixpointExperiment() as exp:
for i in range(1):
run_count = 1000
net = TrainingNeuralNetworkDecorator(ParticleDecorator(WeightwiseNeuralNetwork(width=2, depth=2)))
net.with_params(epsilon=0.0001).with_keras_params(optimizer='sgd')
for run_id in tqdm(range(run_count+1)):
net.compiled()
loss = net.train(epoch=run_id)
if run_id % 100 == 0:
run_exp(net)
K.clear_session()
run_count = 1000
net = TrainingNeuralNetworkDecorator(ParticleDecorator(WeightwiseNeuralNetwork(width=2, depth=2)))
net.with_params(epsilon=0.0001).with_keras_params(optimizer='sgd')
for run_id in tqdm(range(run_count+1)):
net.compiled()
loss = net.train(epoch=run_id)
if run_id % 100 == 0:
run_exp(net)
K.clear_session()
if False:
with FixpointExperiment() as exp: