diff --git a/README.md b/README.md index deae216..e5cb27c 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ # self-rep NN paper - ALIFE journal edition -- [x] Plateau / Pillar sizeWhat does happen to the fixpoints after noise introduction and retraining?Options beeing: Same Fixpoint, Similar Fixpoint (Basin), Different Fixpoint?Do they do the clustering thingy? +- [x] Plateau / Pillar sizeWhat does happen to the fixpoints after noise introduction and retraining?Options beeing: Same Fixpoint, Similar Fixpoint (Basin), Different Fixpoint? Do they do the clustering thingy? - - see journal_basins.py for the "train -> spawn with noise -> train again and see where they end up" functionality. Apply noise follows the `vary` function that was used in the paper robustness test with `+- prng() * eps`. Change if desired. + - see `journal_basins.py` for the "train -> spawn with noise -> train again and see where they end up" functionality. Apply noise follows the `vary` function that was used in the paper robustness test with `+- prng() * eps`. Change if desired. - there is also a distance matrix for all-to-all particle comparisons (with distance parameter one of: `MSE`, `MAE` (mean absolute error = mean manhattan) and `MIM` (mean position invariant manhattan)) - [ ] Same Thing with Soup interactionWe would expect the same behaviour...Influence of interaction with near and far away particles. -- [ ] Robustness test with a trained NetworkTraining for high quality fixpoints, compare with the "perfect" fixpoint.Average Loss per application step +- [x] Robustness test with a trained NetworkTraining for high quality fixpoints, compare with the "perfect" fixpoint. Average Loss per application step + + - see `journal_robustness.py` for robustness test modeled after cristians robustness-exp (with the exeption that we put noise on the weights). Has `synthetic` bool to switch to hand-modeled perfect fixpoint instead of naturally trained ones. + + - We might need to consult about the "average loss per application step", as I think application loss get gradually higher the worse the weights get. So the average might not tell us much here. - [ ] Adjust Self Training so that it favors second order fixpoints-> Second order test implementation (?) @@ -21,6 +25,6 @@ - I have also added a `start_time` property for the nets (default: `1`). This is intended to be set flexibly for e.g., clones (when they are spawned midway through the experiment), such that the PCA can start the plotting trace from this timestep. When we spawn clones we deepcopy their parent's saved weight_history too, so that the PCA transforms same lenght trajectories. With `plot_pca_together` that means that clones and their parents will literally be plotted perfectly overlayed on top, up until the spawn-time, where you can see the offset / noise we apply. By setting the start_time, you can avoid this overlap and avoid hiding the parent's trace color which gets plotted first (because the parent is always added to self.nets first). **But more importantly, you can effectively zoom into the plot, by setting the parents start-time to just shy of the end of first epoch (where they get checked on fixpoint-property and spawn clones) and the start-times of clones to the second epoch. This will make the plot begin at spawn time, cutting off the parents initial trajectory and zoom-in to the action (see. `journal_basins.py/spawn_and_continue()`).** -- I saved the whole experiment class as pickle dump (`experiment_pickle.p`, just like cristian), hope thats fine. +- Now saving the whole experiment class as pickle dump (`experiment_pickle.p`, just like cristian), hope thats fine. -- I have also added a `requirement.txt` for quick venv / pip -r installs. Append as necessary. \ No newline at end of file +- Added a `requirement.txt` for quick venv / pip -r installs. Append as necessary. \ No newline at end of file diff --git a/functionalities_test.py b/functionalities_test.py index 449a89b..13aa30c 100644 --- a/functionalities_test.py +++ b/functionalities_test.py @@ -24,9 +24,9 @@ def is_identity_function(network: Net, epsilon=pow(10, -5)) -> bool: rtol=0, atol=epsilon) -def is_zero_fixpoint(network: Net) -> bool: +def is_zero_fixpoint(network: Net) -> bool: result = bool(len(np.nonzero(network.create_target_weights(network.input_weight_matrix())))) - return result + return not result def is_secondary_fixpoint(network: Net, epsilon: float = pow(10, -5)) -> bool: @@ -56,24 +56,23 @@ def is_secondary_fixpoint(network: Net, epsilon: float = pow(10, -5)) -> bool: def test_for_fixpoints(fixpoint_counter: Dict, nets: List, id_functions=None): id_functions = id_functions or list() - for i in range(len(nets)): - net = nets[i] - if is_divergent(nets[i]): + for net in nets: + if is_divergent(net): fixpoint_counter["divergent"] += 1 - nets[i].is_fixpoint = "divergent" - elif is_identity_function(nets[i]): # is default value + net.is_fixpoint = "divergent" + elif is_identity_function(net): # is default value fixpoint_counter["identity_func"] += 1 - nets[i].is_fixpoint = "identity_func" - id_functions.append(nets[i]) - elif is_zero_fixpoint(nets[i]): + net.is_fixpoint = "identity_func" + id_functions.append(net) + elif is_zero_fixpoint(net): fixpoint_counter["fix_zero"] += 1 - nets[i].is_fixpoint = "fix_zero" - elif is_secondary_fixpoint(nets[i]): + net.is_fixpoint = "fix_zero" + elif is_secondary_fixpoint(net): fixpoint_counter["fix_sec"] += 1 - nets[i].is_fixpoint = "fix_sec" + net.is_fixpoint = "fix_sec" else: fixpoint_counter["other_func"] += 1 - nets[i].is_fixpoint = "other_func" + net.is_fixpoint = "other_func" return id_functions diff --git a/journal_robustness.py b/journal_robustness.py new file mode 100644 index 0000000..b456d28 --- /dev/null +++ b/journal_robustness.py @@ -0,0 +1,177 @@ +import pickle +import torch +import random +import copy + +from pathlib import Path +from tqdm import tqdm +from tabulate import tabulate +from sklearn.metrics import mean_absolute_error as MAE +from sklearn.metrics import mean_squared_error as MSE + +from journal_basins import mean_invariate_manhattan_distance as MIM +from functionalities_test import is_identity_function, is_zero_fixpoint, test_for_fixpoints, is_divergent +from network import Net +from visualization import plot_loss, bar_chart_fixpoints + + +def prng(): + return random.random() + +def generate_fixpoint_weights(): + return torch.tensor([ [1.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], + [1.0], [0.0], [0.0], [0.0], + [1.0], [0.0] + ], dtype=torch.float32) + + +class RobustnessComparisonExperiment: + + @staticmethod + def apply_noise(network, noise: int): + """ Changing the weights of a network to values + noise """ + + for layer_id, layer_name in enumerate(network.state_dict()): + for line_id, line_values in enumerate(network.state_dict()[layer_name]): + for weight_id, weight_value in enumerate(network.state_dict()[layer_name][line_id]): + #network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise + if prng() < 0.5: + network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise + else: + network.state_dict()[layer_name][line_id][weight_id] = weight_value - noise + + return network + + def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate, + epochs, st_steps, synthetic, directory) -> None: + self.population_size = population_size + self.log_step_size = log_step_size + self.net_input_size = net_input_size + self.net_hidden_size = net_hidden_size + self.net_out_size = net_out_size + self.net_learning_rate = net_learning_rate + self.epochs = epochs + self.ST_steps = st_steps + self.loss_history = [] + self.nets = [] + self.synthetic = synthetic + self.fixpoint_counters = { + "identity_func": 0, + "divergent": 0, + "fix_zero": 0, + "fix_weak": 0, + "fix_sec": 0, + "other_func": 0 + } + + self.directory = Path(directory) + self.directory.mkdir(parents=True, exist_ok=True) + + self.id_functions = [] + self.populate_environment() + self.count_fixpoints() + self.data = self.test_robustness() + + self.save() + + def populate_environment(self): + loop_population_size = tqdm(range(self.population_size)) + + for i in loop_population_size: + loop_population_size.set_description("Populating experiment %s" % i) + + if self.synthetic: + ''' Either use perfect / hand-constructed fixpoint ... ''' + net_name = f"ST_net_{str(i)}_synthetic" + net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name) + net.apply_weights(generate_fixpoint_weights()) + + else: + ''' .. or use natural approach to train fixpoints from random initialisation. ''' + net_name = f"ST_net_{str(i)}" + net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name) + for _ in range(self.epochs): + for _ in range(self.ST_steps): + net.self_train(1, self.log_step_size, self.net_learning_rate) + + self.nets.append(net) + + + def test_robustness(self, print_it=True): + data = [[0 for _ in range(10)] for _ in range(len(self.id_functions))] + avg_loss_per_application = [[0 for _ in range(10)] for _ in range(len(self.id_functions))] + noise_range = range(10) + row_headers = [] + + for i, fixpoint in enumerate(self.id_functions): + row_headers.append(fixpoint.name) + for noise_level in noise_range: + application_losses = [] + + clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size, + f"{fixpoint.name}_clone_noise10e-{noise_level}") + clone.load_state_dict(copy.deepcopy(fixpoint.state_dict())) + rand_noise = prng() * pow(10, -noise_level) + clone = self.apply_noise(clone, rand_noise) + + while not is_zero_fixpoint(clone) and not is_divergent(clone): + # Todo: what kind of comparison between application? -> before + clone.self_application(1, self.log_step_size) + data[i][noise_level] += 1 + # -> after + + if print_it: + print(f"Number appplications steps: ") + col_headers = [str(f"10e-{d}") for d in noise_range] + print(tabulate(data, showindex=row_headers, headers=col_headers, tablefmt='orgtbl')) + + # other tables here + + return data + + + def count_fixpoints(self): + exp_details = f"ST steps: {self.ST_steps}" + self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.nets) + bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate, + exp_details) + + + def visualize_loss(self): + for i in range(len(self.nets)): + net_loss_history = self.nets[i].loss_history + self.loss_history.append(net_loss_history) + plot_loss(self.loss_history, self.directory) + + + def save(self): + pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb")) + print(f"\nSaved experiment to {self.directory}.") + + +if __name__ == "__main__": + NET_INPUT_SIZE = 4 + NET_OUT_SIZE = 1 + + ST_steps = 1000 + ST_epochs = 5 + ST_log_step_size = 10 + ST_population_size = 3 + ST_net_hidden_size = 2 + ST_net_learning_rate = 0.04 + ST_name_hash = random.getrandbits(32) + ST_synthetic = True + + print(f"Running the robustness comparison experiment:") + RobustnessComparisonExperiment( + population_size=ST_population_size, + log_step_size=ST_log_step_size, + net_input_size=NET_INPUT_SIZE, + net_hidden_size=ST_net_hidden_size, + net_out_size=NET_OUT_SIZE, + net_learning_rate=ST_net_learning_rate, + epochs=ST_epochs, + st_steps=ST_steps, + synthetic=ST_synthetic, + directory=Path('output') / 'robustness' / f'{ST_name_hash}' + ) diff --git a/main.py b/main.py index 2e711ea..3c906da 100644 --- a/main.py +++ b/main.py @@ -131,13 +131,13 @@ if __name__ == '__main__': """ ----------------------------------------- Robustness experiment ----------------------------------------- """ # Define number of runs & name: - rob_runs = 3 + rob_runs = 1 rob_runs_name = "test-07" - rob_ST_steps = 500 + rob_ST_steps = 1500 rob_log_step_size = 10 # Define number of networks & their architecture - rob_population_size = 6 + rob_population_size = 1 rob_net_hidden_size = 2 rob_net_learning_rate = 0.04