built class for training all networks, including working fixpoint check and some experiments on that

This commit is contained in:
Thomas Gabor 2019-03-05 04:42:50 +01:00
parent 7ea8287b0e
commit 19e4ed65f9
2 changed files with 271 additions and 31 deletions

View File

@ -2,6 +2,7 @@ import sys
import os
import time
import dill
from tqdm import tqdm
class Experiment:
@ -69,15 +70,23 @@ class FixpointExperiment(Experiment):
self.counters['fix_zero'] += 1
else:
self.counters['fix_other'] += 1
self.interesting_fixpoints.append(net)
self.log(net.repr_weights())
net.self_attack()
self.log(net.repr_weights())
self.interesting_fixpoints.append(net.get_weights())
elif net.is_fixpoint(2):
self.counters['fix_sec'] += 1
else:
self.counters['other'] += 1
class MixedFixpointExperiment(FixpointExperiment):
def run_net(self, net, trains_per_application=100, step_limit=100):
i = 0
while i < step_limit and not net.is_diverged() and not net.is_fixpoint():
net.self_attack()
for _ in tqdm(range(trains_per_application)):
loss = net.compiled().train()
i += 1
self.count(net)
class SoupExperiment(Experiment):
pass

View File

@ -1,14 +1,13 @@
import math
import copy
import os
import numpy as np
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from tqdm import tqdm
from experiment import FixpointExperiment, IdentLearningExperiment
from experiment import *
# Supress warnings and info messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
@ -40,8 +39,48 @@ def are_weights_within(network_weights, lower_bound, upper_bound):
return False
return True
class PrintingObject():
class NeuralNetwork:
class SilenceSignal():
def __init__(self, obj, value):
self.obj = obj
self.new_silent = value
def __enter__(self):
self.old_silent = self.obj.get_silence()
self.obj.set_silence(self.new_silent)
def __exit__(self, exception_type, exception_value, traceback):
self.obj.set_silence(self.old_silent)
def __init__(self):
self.silent = True
def is_silent(self):
return self.silent
def get_silence(self):
return self.is_silent()
def set_silence(self, value=True):
self.silent = value
return self
def unset_silence(self):
self.silent = False
return self
def with_silence(self, value=True):
self.set_silence(value)
return self
def silence(self, value=True):
return self.__class__.SilenceSignal(self, value)
def _print(self, *args, **kwargs):
if not self.silent:
print(*args, **kwargs)
class NeuralNetwork(PrintingObject):
@staticmethod
def weights_to_string(weights):
@ -56,19 +95,17 @@ class NeuralNetwork:
return s
def __init__(self, **params):
super().__init__()
self.model = Sequential()
self.params = dict(epsilon=0.00000000000001)
self.params.update(params)
self.keras_params = dict(activation='linear', use_bias=False)
self.silent = True
def get_params(self):
return self.params
def silence(self):
self.silent = True
return self
def unsilence(self):
self.silent = False
return self
def get_keras_params(self):
return self.keras_params
def with_params(self, **kwargs):
self.params.update(kwargs)
@ -77,15 +114,18 @@ class NeuralNetwork:
def with_keras_params(self, **kwargs):
self.keras_params.update(kwargs)
return self
def get_model(self):
return self.model
def get_weights(self):
return self.model.get_weights()
return self.get_model().get_weights()
def set_weights(self, new_weights):
return self.model.set_weights(new_weights)
return self.get_model().set_weights(new_weights)
def apply_to_weights(self, old_weights):
# Placeholder
# placeholder, overwrite in subclass
return old_weights
def apply_to_network(self, other_network):
@ -117,14 +157,14 @@ class NeuralNetwork:
return are_weights_within(self.get_weights(), -epsilon, epsilon)
def is_fixpoint(self, degree=1, epsilon=None):
epsilon = epsilon or self.params.get('epsilon')
assert degree >= 1, "degree must be >= 1"
epsilon = epsilon or self.get_params().get('epsilon')
old_weights = self.get_weights()
assert degree, "Degree cannot be 0, Null"
self.silence()
new_weights = copy.deepcopy(old_weights)
for _ in range(degree):
new_weights = self.apply_to_network(self)
self.unsilence()
new_weights = self.apply_to_weights(new_weights)
if are_weights_diverged(new_weights):
return False
for layer_id, layer in enumerate(old_weights):
@ -175,12 +215,30 @@ class WeightwiseNeuralNetwork(NeuralNetwork):
new_weight = self.apply(weight, normal_layer_id, normal_cell_id, normal_weight_id)
new_weights[layer_id][cell_id][weight_id] = new_weight
if self.params.get("print_all_weight_updates", False) and not self.silent:
if self.params.get("print_all_weight_updates", False) and not self.is_silent():
print("updated old weight {weight}\t @ ({layer},{cell},{weight_id}) "
"to new value {new_weight}\t calling @ ({n_layer},{n_cell},{n_weight_id})").format(
"to new value {new_weight}\t calling @ ({normal_layer},{normal_cell},{normal_weight_id})").format(
weight=weight, layer=layer_id, cell=cell_id, weight_id=weight_id, new_weight=new_weight,
n_layer=normal_layer_id, n_cell=normal_cell_id, n_weight_id=normal_weight_id)
normal_layer=normal_layer_id, normal_cell=normal_cell_id, normal_weight_id=normal_weight_id)
return new_weights
def compute_samples(self):
samples = []
new_weights = copy.deepcopy(self.get_weights())
max_layer_id = len(self.get_weights()) - 1
for layer_id, layer in enumerate(self.get_weights()):
max_cell_id = len(layer) - 1
for cell_id, cell in enumerate(layer):
max_weight_id = len(cell) - 1
for weight_id, weight in enumerate(cell):
normal_layer_id = normalize_id(layer_id, max_layer_id)
normal_cell_id = normalize_id(cell_id, max_cell_id)
normal_weight_id = normalize_id(weight_id, max_weight_id)
sample = np.transpose(np.array([[weight], [normal_layer_id], [normal_cell_id], [normal_weight_id]]))
samples += [sample[0]]
samples_array = np.asarray(samples)
return samples_array, samples_array[:, 0]
class AggregatingNeuralNetwork(NeuralNetwork):
@ -262,9 +320,11 @@ class AggregatingNeuralNetwork(NeuralNetwork):
current_weight_id += 1
collections[-1] += next_collection
leftovers = len(next_collection)
# call network
old_aggregations = [self.get_aggregator()(collection) for collection in collections]
new_aggregations = self.apply(*old_aggregations)
# generate list of new weights
new_weights_list = []
for aggregation_id, aggregation in enumerate(new_aggregations):
@ -273,6 +333,7 @@ class AggregatingNeuralNetwork(NeuralNetwork):
else:
new_weights_list += self.get_deaggregator()(aggregation, collection_size)
new_weights_list = self.get_shuffler()(new_weights_list)
# write back new weights
new_weights = copy.deepcopy(old_weights)
current_weight_id = 0
@ -282,13 +343,64 @@ class AggregatingNeuralNetwork(NeuralNetwork):
new_weight = new_weights_list[current_weight_id]
new_weights[layer_id][cell_id][weight_id] = new_weight
current_weight_id += 1
# return results
if self.params.get("print_all_weight_updates", False) and not self.silent:
if self.params.get("print_all_weight_updates", False) and not self.is_silent():
print("updated old weight aggregations " + str(old_aggregations))
print("to new weight aggregations " + str(new_aggregations))
print("resulting in network weights ...")
print(self.__class__.weights_to_string(new_weights))
return new_weights
@staticmethod
def collect_weights(all_weights, collection_size):
collections = []
next_collection = []
current_weight_id = 0
for layer_id, layer in enumerate(all_weights):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
next_collection += [weight]
if (current_weight_id + 1) % collection_size == 0:
collections += [next_collection]
next_collection = []
current_weight_id += 1
collections[-1] += next_collection
leftovers = len(next_collection)
return collections, leftovers
def get_collected_weights(self):
collection_size = self.get_amount_of_weights() // self.aggregates
return self.__class__.collect_weights(self.get_weights(), collection_size)
def get_aggregated_weights(self):
collections, leftovers = self.get_collected_weights()
aggregations = [self.get_aggregator()(collection) for collection in collections]
return aggregations, leftovers
def compute_samples(self):
aggregations, _ = self.get_aggregated_weights()
sample = np.transpose(np.array([[aggregations[i]] for i in range(self.aggregates)]))
return [sample], [sample]
def is_fixpoint(self, degree=1, epsilon=None):
assert degree >= 1, "degree must be >= 1"
epsilon = epsilon or self.get_params().get('epsilon')
old_weights = self.get_weights()
new_weights = copy.deepcopy(old_weights)
for _ in range(degree):
new_weights = self.apply_to_weights(new_weights)
if are_weights_diverged(new_weights):
return False
for layer_id, layer in enumerate(old_weights):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
new_weight = new_weights[layer_id][cell_id][weight_id]
if abs(new_weight - weight) >= epsilon:
return False
return True
class RecurrentNeuralNetwork(NeuralNetwork):
@ -315,8 +427,10 @@ class RecurrentNeuralNetwork(NeuralNetwork):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
old_weights_list += [weight]
# call network
new_weights_list = self.apply(*old_weights_list)
# write back new weights from list of rnn returns
current_weight_id = 0
for layer_id, layer in enumerate(new_weights):
@ -326,6 +440,17 @@ class RecurrentNeuralNetwork(NeuralNetwork):
new_weights[layer_id][cell_id][weight_id] = new_weight
current_weight_id += 1
return new_weights
def compute_samples(self):
# build list from old weights
old_weights_list = []
for layer_id, layer in enumerate(self.get_weights()):
for cell_id, cell in enumerate(layer):
for weight_id, weight in enumerate(cell):
old_weights_list += [weight]
sample = np.transpose(np.array([[[old_weights_list[i]] for i in range(len(old_weights_list))]]))
return sample, sample
class LearningNeuralNetwork(NeuralNetwork):
@ -360,6 +485,9 @@ class LearningNeuralNetwork(NeuralNetwork):
self.model.add(Dense(units=self.features, **self.keras_params))
self.model.compile(**self.compile_params)
def apply_to_weights(self, old_weights):
raise NotImplementedException
def with_compile_params(self, **kwargs):
self.compile_params.update(kwargs)
return self
@ -375,6 +503,60 @@ class LearningNeuralNetwork(NeuralNetwork):
bar.postfix[1]["value"] = history.history['loss'][-1]
bar.update()
class TrainingNeuralNetworkDecorator(NeuralNetwork):
def __init__(self, net, **kwargs):
super().__init__(**kwargs)
self.net = net
self.model = None
self.compile_params = dict(loss='mse', optimizer='sgd')
self.model_compiled = False
def get_params(self):
return self.net.get_params()
def get_keras_params(self):
return self.net.get_keras_params()
def get_compile_params(self):
return self.net.get_compile_params()
def with_params(self, **kwargs):
self.net.with_params(**kwargs)
return self
def with_keras_params(self, **kwargs):
self.net.with_keras_params(**kwargs)
return self
def with_compile_params(self, **kwargs):
self.compile_params.update(kwargs)
return self
def get_model(self):
return self.net.get_model()
def apply_to_weights(self, old_weights):
return self.net.apply_to_weights(old_weights)
def compile_model(self, **kwargs):
compile_params = copy.deepcopy(self.compile_params)
compile_params.update(kwargs)
return self.get_model().compile(**compile_params)
def compiled(self, **kwargs):
if not self.model_compiled:
self.compile_model(**kwargs)
self.model_compiled = True
return self
def train(self, batchsize=1):
self.compiled()
x, y = self.net.compute_samples()
history = self.net.model.fit(x=x, y=y, verbose=0, batch_size=batchsize)
return history.history['loss'][-1]
if __name__ == '__main__':
if False:
@ -391,14 +573,63 @@ if __name__ == '__main__':
exp.run_net(net, 100)
exp.log(exp.counters)
if True:
if False: # is_fixpoint was wrong because it trivially returned the old weights
with IdentLearningExperiment() as exp:
net = LearningNeuralNetwork(width=2, depth=2, features=2, )\
.with_keras_params(activation='sigmoid', use_bias=False, ) \
.with_params(print_all_weight_updates=False)
net.print_weights()
time.sleep(1)
print(net.is_fixpoint(epsilon=0.1e-6))
print()
net.learn(1, reduction=LearningNeuralNetwork.fft_reduction)
import time
time.sleep(1)
net.print_weights()
time.sleep(1)
print(net.is_fixpoint(1, epsilon=0.9e-6))
print(net.is_fixpoint(epsilon=0.1e-6))
if False: # ok so this works quite realiably
with FixpointExperiment() as exp:
run_count = 1000
net = TrainingNeuralNetworkDecorator(WeightwiseNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.1e-6)
for run_id in tqdm(range(run_count+1)):
loss = net.compiled().train()
if run_id % 100 == 0:
net.print_weights()
# print(net.apply_to_network(net))
print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001)))
print("Loss " + str(loss))
print()
if False: # this does not work as the aggregation function screws over the fixpoint computation.... TODO: check for fixpoint in aggregated space...
with FixpointExperiment() as exp:
run_count = 1000
net = TrainingNeuralNetworkDecorator(AggregatingNeuralNetwork(4, width=2, depth=2)).with_params(epsilon=0.1e-6)
for run_id in tqdm(range(run_count+1)):
loss = net.compiled().train()
if run_id % 100 == 0:
net.print_weights()
# print(net.apply_to_network(net))
print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001)))
print("Loss " + str(loss))
print()
if False: # this explodes in our faces completely... NAN everywhere TODO: Wtf is happening here?
with FixpointExperiment() as exp:
run_count = 10
net = TrainingNeuralNetworkDecorator(RecurrentNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.1e-6)
for run_id in tqdm(range(run_count+1)):
loss = net.compiled().train()
if run_id % 1 == 0:
net.print_weights()
# print(net.apply_to_network(net))
print("Fixpoint? " + str(net.is_fixpoint(epsilon=0.0001)))
print("Loss " + str(loss))
print()
if True: # and this gets somewhat interesting... we can still achieve non-trivial fixpoints over multiple applications when training enough in-between
with MixedFixpointExperiment() as exp:
for run_id in range(1):
net = TrainingNeuralNetworkDecorator(WeightwiseNeuralNetwork(width=2, depth=2)).with_params(epsilon=0.0001)
exp.run_net(net, 500, 10)
net.print_weights()
print("Fixpoint? " + str(net.is_fixpoint()))
print()
exp.log(exp.counters)