From 19e4ed65f91b60acb2f3855fb9b851bc3bece741 Mon Sep 17 00:00:00 2001 From: Thomas Gabor <> Date: Tue, 5 Mar 2019 04:42:50 +0100 Subject: [PATCH] built class for training all networks, including working fixpoint check and some experiments on that --- code/experiment.py | 17 ++- code/network.py | 285 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 271 insertions(+), 31 deletions(-) diff --git a/code/experiment.py b/code/experiment.py index b77410f..88df2b4 100644 --- a/code/experiment.py +++ b/code/experiment.py @@ -2,6 +2,7 @@ import sys import os import time import dill +from tqdm import tqdm class Experiment: @@ -69,15 +70,23 @@ class FixpointExperiment(Experiment): self.counters['fix_zero'] += 1 else: self.counters['fix_other'] += 1 - self.interesting_fixpoints.append(net) - self.log(net.repr_weights()) - net.self_attack() - self.log(net.repr_weights()) + self.interesting_fixpoints.append(net.get_weights()) elif net.is_fixpoint(2): self.counters['fix_sec'] += 1 else: self.counters['other'] += 1 +class MixedFixpointExperiment(FixpointExperiment): + + def run_net(self, net, trains_per_application=100, step_limit=100): + i = 0 + while i < step_limit and not net.is_diverged() and not net.is_fixpoint(): + net.self_attack() + for _ in tqdm(range(trains_per_application)): + loss = net.compiled().train() + i += 1 + self.count(net) + class SoupExperiment(Experiment): pass diff --git a/code/network.py b/code/network.py index 982ba4b..ae11bc9 100644 --- a/code/network.py +++ b/code/network.py @@ -1,14 +1,13 @@ import math import copy - import os import numpy as np +from tqdm import tqdm from keras.models import Sequential from keras.layers import SimpleRNN, Dense -from tqdm import tqdm -from experiment import FixpointExperiment, IdentLearningExperiment +from experiment import * # Supress warnings and info messages os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' @@ -40,8 +39,48 @@ def are_weights_within(network_weights, lower_bound, upper_bound): return False return True +class PrintingObject(): -class NeuralNetwork: + class SilenceSignal(): + def __init__(self, obj, value): + self.obj = obj + self.new_silent = value + def __enter__(self): + self.old_silent = self.obj.get_silence() + self.obj.set_silence(self.new_silent) + def __exit__(self, exception_type, exception_value, traceback): + self.obj.set_silence(self.old_silent) + + def __init__(self): + self.silent = True + + def is_silent(self): + return self.silent + + def get_silence(self): + return self.is_silent() + + def set_silence(self, value=True): + self.silent = value + return self + + def unset_silence(self): + self.silent = False + return self + + def with_silence(self, value=True): + self.set_silence(value) + return self + + def silence(self, value=True): + return self.__class__.SilenceSignal(self, value) + + def _print(self, *args, **kwargs): + if not self.silent: + print(*args, **kwargs) + + +class NeuralNetwork(PrintingObject): @staticmethod def weights_to_string(weights): @@ -56,19 +95,17 @@ class NeuralNetwork: return s def __init__(self, **params): + super().__init__() self.model = Sequential() self.params = dict(epsilon=0.00000000000001) self.params.update(params) self.keras_params = dict(activation='linear', use_bias=False) - self.silent = True + + def get_params(self): + return self.params - def silence(self): - self.silent = True - return self - - def unsilence(self): - self.silent = False - return self + def get_keras_params(self): + return self.keras_params def with_params(self, **kwargs): self.params.update(kwargs) @@ -77,15 +114,18 @@ class NeuralNetwork: def with_keras_params(self, **kwargs): self.keras_params.update(kwargs) return self + + def get_model(self): + return self.model def get_weights(self): - return self.model.get_weights() + return self.get_model().get_weights() def set_weights(self, new_weights): - return self.model.set_weights(new_weights) + return self.get_model().set_weights(new_weights) def apply_to_weights(self, old_weights): - # Placeholder + # placeholder, overwrite in subclass return old_weights def apply_to_network(self, other_network): @@ -117,14 +157,14 @@ class NeuralNetwork: return are_weights_within(self.get_weights(), -epsilon, epsilon) def is_fixpoint(self, degree=1, epsilon=None): - epsilon = epsilon or self.params.get('epsilon') + assert degree >= 1, "degree must be >= 1" + epsilon = epsilon or self.get_params().get('epsilon') old_weights = self.get_weights() - assert degree, "Degree cannot be 0, Null" - self.silence() + new_weights = copy.deepcopy(old_weights) + for _ in range(degree): - new_weights = self.apply_to_network(self) - - self.unsilence() + new_weights = self.apply_to_weights(new_weights) + if are_weights_diverged(new_weights): return False for layer_id, layer in enumerate(old_weights): @@ -175,12 +215,30 @@ class WeightwiseNeuralNetwork(NeuralNetwork): new_weight = self.apply(weight, normal_layer_id, normal_cell_id, normal_weight_id) new_weights[layer_id][cell_id][weight_id] = new_weight - if self.params.get("print_all_weight_updates", False) and not self.silent: + if self.params.get("print_all_weight_updates", False) and not self.is_silent(): print("updated old weight {weight}\t @ ({layer},{cell},{weight_id}) " - "to new value {new_weight}\t calling @ ({n_layer},{n_cell},{n_weight_id})").format( + "to new value {new_weight}\t calling @ ({normal_layer},{normal_cell},{normal_weight_id})").format( weight=weight, layer=layer_id, cell=cell_id, weight_id=weight_id, new_weight=new_weight, - n_layer=normal_layer_id, n_cell=normal_cell_id, n_weight_id=normal_weight_id) + normal_layer=normal_layer_id, normal_cell=normal_cell_id, normal_weight_id=normal_weight_id) return new_weights + + def compute_samples(self): + samples = [] + new_weights = copy.deepcopy(self.get_weights()) + max_layer_id = len(self.get_weights()) - 1 + for layer_id, layer in enumerate(self.get_weights()): + max_cell_id = len(layer) - 1 + for cell_id, cell in enumerate(layer): + max_weight_id = len(cell) - 1 + for weight_id, weight in enumerate(cell): + normal_layer_id = normalize_id(layer_id, max_layer_id) + normal_cell_id = normalize_id(cell_id, max_cell_id) + normal_weight_id = normalize_id(weight_id, max_weight_id) + sample = np.transpose(np.array([[weight], [normal_layer_id], [normal_cell_id], [normal_weight_id]])) + samples += [sample[0]] + samples_array = np.asarray(samples) + return samples_array, samples_array[:, 0] + class AggregatingNeuralNetwork(NeuralNetwork): @@ -262,9 +320,11 @@ class AggregatingNeuralNetwork(NeuralNetwork): current_weight_id += 1 collections[-1] += next_collection leftovers = len(next_collection) + # call network old_aggregations = [self.get_aggregator()(collection) for collection in collections] new_aggregations = self.apply(*old_aggregations) + # generate list of new weights new_weights_list = [] for aggregation_id, aggregation in enumerate(new_aggregations): @@ -273,6 +333,7 @@ class AggregatingNeuralNetwork(NeuralNetwork): else: new_weights_list += self.get_deaggregator()(aggregation, collection_size) new_weights_list = self.get_shuffler()(new_weights_list) + # write back new weights new_weights = copy.deepcopy(old_weights) current_weight_id = 0 @@ -282,13 +343,64 @@ class AggregatingNeuralNetwork(NeuralNetwork): new_weight = new_weights_list[current_weight_id] new_weights[layer_id][cell_id][weight_id] = new_weight current_weight_id += 1 + # return results - if self.params.get("print_all_weight_updates", False) and not self.silent: + if self.params.get("print_all_weight_updates", False) and not self.is_silent(): print("updated old weight aggregations " + str(old_aggregations)) print("to new weight aggregations " + str(new_aggregations)) print("resulting in network weights ...") print(self.__class__.weights_to_string(new_weights)) return new_weights + + @staticmethod + def collect_weights(all_weights, collection_size): + collections = [] + next_collection = [] + current_weight_id = 0 + for layer_id, layer in enumerate(all_weights): + for cell_id, cell in enumerate(layer): + for weight_id, weight in enumerate(cell): + next_collection += [weight] + if (current_weight_id + 1) % collection_size == 0: + collections += [next_collection] + next_collection = [] + current_weight_id += 1 + collections[-1] += next_collection + leftovers = len(next_collection) + return collections, leftovers + + def get_collected_weights(self): + collection_size = self.get_amount_of_weights() // self.aggregates + return self.__class__.collect_weights(self.get_weights(), collection_size) + + def get_aggregated_weights(self): + collections, leftovers = self.get_collected_weights() + aggregations = [self.get_aggregator()(collection) for collection in collections] + return aggregations, leftovers + + def compute_samples(self): + aggregations, _ = self.get_aggregated_weights() + sample = np.transpose(np.array([[aggregations[i]] for i in range(self.aggregates)])) + return [sample], [sample] + + def is_fixpoint(self, degree=1, epsilon=None): + assert degree >= 1, "degree must be >= 1" + epsilon = epsilon or self.get_params().get('epsilon') + old_weights = self.get_weights() + new_weights = copy.deepcopy(old_weights) + + for _ in range(degree): + new_weights = self.apply_to_weights(new_weights) + + if are_weights_diverged(new_weights): + return False + for layer_id, layer in enumerate(old_weights): + for cell_id, cell in enumerate(layer): + for weight_id, weight in enumerate(cell): + new_weight = new_weights[layer_id][cell_id][weight_id] + if abs(new_weight - weight) >= epsilon: + return False + return True class RecurrentNeuralNetwork(NeuralNetwork): @@ -315,8 +427,10 @@ class RecurrentNeuralNetwork(NeuralNetwork): for cell_id, cell in enumerate(layer): for weight_id, weight in enumerate(cell): old_weights_list += [weight] + # call network new_weights_list = self.apply(*old_weights_list) + # write back new weights from list of rnn returns current_weight_id = 0 for layer_id, layer in enumerate(new_weights): @@ -326,6 +440,17 @@ class RecurrentNeuralNetwork(NeuralNetwork): new_weights[layer_id][cell_id][weight_id] = new_weight current_weight_id += 1 return new_weights + + def compute_samples(self): + # build list from old weights + old_weights_list = [] + for layer_id, layer in enumerate(self.get_weights()): + for cell_id, cell in enumerate(layer): + for weight_id, weight in enumerate(cell): + old_weights_list += [weight] + sample = np.transpose(np.array([[[old_weights_list[i]] for i in range(len(old_weights_list))]])) + return sample, sample + class LearningNeuralNetwork(NeuralNetwork): @@ -360,6 +485,9 @@ class LearningNeuralNetwork(NeuralNetwork): self.model.add(Dense(units=self.features, **self.keras_params)) self.model.compile(**self.compile_params) + def apply_to_weights(self, old_weights): + raise NotImplementedException + def with_compile_params(self, **kwargs): self.compile_params.update(kwargs) return self @@ -375,6 +503,60 @@ class LearningNeuralNetwork(NeuralNetwork): bar.postfix[1]["value"] = history.history['loss'][-1] bar.update() +class TrainingNeuralNetworkDecorator(NeuralNetwork): + + def __init__(self, net, **kwargs): + super().__init__(**kwargs) + self.net = net + self.model = None + self.compile_params = dict(loss='mse', optimizer='sgd') + self.model_compiled = False + + def get_params(self): + return self.net.get_params() + + def get_keras_params(self): + return self.net.get_keras_params() + + def get_compile_params(self): + return self.net.get_compile_params() + + def with_params(self, **kwargs): + self.net.with_params(**kwargs) + return self + + def with_keras_params(self, **kwargs): + self.net.with_keras_params(**kwargs) + return self + + def with_compile_params(self, **kwargs): + self.compile_params.update(kwargs) + return self + + def get_model(self): + return self.net.get_model() + + def apply_to_weights(self, old_weights): + return self.net.apply_to_weights(old_weights) + + def compile_model(self, **kwargs): + compile_params = copy.deepcopy(self.compile_params) + compile_params.update(kwargs) + return self.get_model().compile(**compile_params) + + def compiled(self, **kwargs): + if not self.model_compiled: + self.compile_model(**kwargs) + self.model_compiled = True + return self + + def train(self, batchsize=1): + self.compiled() + x, y = self.net.compute_samples() + history = self.net.model.fit(x=x, y=y, verbose=0, batch_size=batchsize) + return history.history['loss'][-1] + + if __name__ == '__main__': if False: @@ -391,14 +573,63 @@ if __name__ == '__main__': exp.run_net(net, 100) exp.log(exp.counters) - if True: + if False: # is_fixpoint was wrong because it trivially returned the old weights with IdentLearningExperiment() as exp: net = LearningNeuralNetwork(width=2, depth=2, features=2, )\ .with_keras_params(activation='sigmoid', use_bias=False, ) \ .with_params(print_all_weight_updates=False) + net.print_weights() + time.sleep(1) + print(net.is_fixpoint(epsilon=0.1e-6)) + print() net.learn(1, reduction=LearningNeuralNetwork.fft_reduction) import time time.sleep(1) net.print_weights() time.sleep(1) - print(net.is_fixpoint(1, epsilon=0.9e-6)) + print(net.is_fixpoint(epsilon=0.1e-6)) + if False: # ok so this works quite realiably + with FixpointExperiment() as exp: + run_count = 1000 + net = TrainingNeuralNetworkDecorator(WeightwiseNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.1e-6) + for run_id in tqdm(range(run_count+1)): + loss = net.compiled().train() + if run_id % 100 == 0: + net.print_weights() + # print(net.apply_to_network(net)) + print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001))) + print("Loss " + str(loss)) + print() + if False: # this does not work as the aggregation function screws over the fixpoint computation.... TODO: check for fixpoint in aggregated space... + with FixpointExperiment() as exp: + run_count = 1000 + net = TrainingNeuralNetworkDecorator(AggregatingNeuralNetwork(4, width=2, depth=2)).with_params(epsilon=0.1e-6) + for run_id in tqdm(range(run_count+1)): + loss = net.compiled().train() + if run_id % 100 == 0: + net.print_weights() + # print(net.apply_to_network(net)) + print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001))) + print("Loss " + str(loss)) + print() + if False: # this explodes in our faces completely... NAN everywhere TODO: Wtf is happening here? + with FixpointExperiment() as exp: + run_count = 10 + net = TrainingNeuralNetworkDecorator(RecurrentNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.1e-6) + for run_id in tqdm(range(run_count+1)): + loss = net.compiled().train() + if run_id % 1 == 0: + net.print_weights() + # print(net.apply_to_network(net)) + print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001))) + print("Loss " + str(loss)) + print() + if True: # and this gets somewhat interesting... we can still achieve non-trivial fixpoints over multiple applications when training enough in-between + with MixedFixpointExperiment() as exp: + for run_id in range(1): + net = TrainingNeuralNetworkDecorator(WeightwiseNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.0001) + exp.run_net(net, 500, 10) + net.print_weights() + print("Fixpoint? " + str(net.is_fixpoint())) + print() + exp.log(exp.counters)