implements basin experiments for soup

2021-06-02 02:49:18 +03:00
parent 32ebb729e8
commit 0320957b85
2 changed files with 570 additions and 0 deletions
--- a/journal_soup_basins.py
+++ b/journal_soup_basins.py
@@ -0,0 +1,304 @@
 import os
 from pathlib import Path
 import pickle
 from torch import mean
 from tqdm import tqdm
 import random
 import copy
 from functionalities_test import is_identity_function, test_status, test_for_fixpoints
 from network import Net
 from visualization import plot_3d_self_train, plot_loss, plot_3d_soup
 import numpy as np
 from tabulate import tabulate
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSE
 import pandas as pd
 import seaborn as sns
 from matplotlib import pyplot as plt
 def prng():
    return random.random()
 def l1(tup):
    a, b = tup
    return abs(a - b)
 def mean_invariate_manhattan_distance(x, y):
    # One of these one-liners that might be smart or really dumb. Goal is to find pairwise
    # distances of ascending values, ie. sum (abs(min1_X-min1_Y), abs(min2_X-min2Y) ...) / mean.
    # Idea was to find weight sets that have same values but just in different positions, that would
    # make this distance 0.
    return np.mean(list(map(l1, zip(sorted(x.numpy()), sorted(y.numpy())))))
 def distance_matrix(nets, distance="MIM", print_it=True):
    matrix = [[0 for _ in range(len(nets))] for _ in range(len(nets))]
    for net in range(len(nets)):
        weights = nets[net].input_weight_matrix()[:, 0]
        for other_net in range(len(nets)):
            other_weights = nets[other_net].input_weight_matrix()[:, 0]
            if distance in ["MSE"]:
                matrix[net][other_net] = MSE(weights, other_weights)
            elif distance in ["MAE"]:
                matrix[net][other_net] = MAE(weights, other_weights)
            elif distance in ["MIM"]:
                matrix[net][other_net] = mean_invariate_manhattan_distance(weights, other_weights)
    if print_it:
        print(f"\nDistance matrix (all to all) [{distance}]:")
        headers = [i.name for i in nets]
        print(tabulate(matrix, showindex=headers, headers=headers, tablefmt='orgtbl'))
    return matrix
 def distance_from_parent(nets, distance="MIM", print_it=True):
    list_of_matrices = []
    parents = list(filter(lambda x: "clone" not in x.name and is_identity_function(x), nets))
    distance_range = range(10)
    for parent in parents:
        parent_weights = parent.create_target_weights(parent.input_weight_matrix())
        clones = list(filter(lambda y: parent.name in y.name and parent.name != y.name, nets))
        matrix = [[0 for _ in distance_range] for _ in range(len(clones))]
        for dist in distance_range:
            for idx, clone in enumerate(clones):
                clone_weights = clone.create_target_weights(clone.input_weight_matrix())
                if distance in ["MSE"]:
                    matrix[idx][dist] = MSE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MAE"]:
                    matrix[idx][dist] = MAE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MIM"]:
                    matrix[idx][dist] = mean_invariate_manhattan_distance(parent_weights, clone_weights) < pow(10,
                                                                                                               -dist)
        if print_it:
            print(f"\nDistances from parent {parent.name} [{distance}]:")
            col_headers = [str(f"10e-{d}") for d in distance_range]
            row_headers = [str(f"clone_{i}") for i in range(len(clones))]
            print(tabulate(matrix, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        list_of_matrices.append(matrix)
    return list_of_matrices
 class SoupSpawnExperiment:
    @staticmethod
    def apply_noise(network, noise: int):
        """ Changing the weights of a network to values + noise """
        for layer_id, layer_name in enumerate(network.state_dict()):
            for line_id, line_values in enumerate(network.state_dict()[layer_name]):
                for weight_id, weight_value in enumerate(network.state_dict()[layer_name][line_id]):
                    # network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    if prng() < 0.5:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    else:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value - noise
        return network
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, st_steps, attack_chance, nr_clones, noise, directory) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.ST_steps = st_steps
        self.attack_chance = attack_chance
        self.loss_history = []
        self.nr_clones = nr_clones
        self.noise = noise or 10e-5
        print("\nNOISE:", self.noise)
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        # Populating environment & evolving entities
        self.nets = []
        self.populate_environment()
        self.evolve()
        self.spawn_and_continue()
        self.weights_evolution_3d_experiment()
        # self.visualize_loss()
        self.distance_matrix = distance_matrix(self.nets, print_it=False)
        self.parent_clone_distances = distance_from_parent(self.nets, print_it=False)
        self.save()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating experiment %s" % i)
            net_name = f"soup_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            self.nets.append(net)
    def evolve(self):
        loop_epochs = tqdm(range(self.epochs))
        for i in loop_epochs:
            loop_epochs.set_description("Evolving soup %s" % i)
            # A network attacking another network with a given percentage
            if random.randint(1, 100) <= self.attack_chance:
                random_net1, random_net2 = random.sample(range(self.population_size), 2)
                random_net1 = self.nets[random_net1]
                random_net2 = self.nets[random_net2]
                print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
                random_net1.attack(random_net2)
            #  Self-training each network in the population
            for j in range(self.population_size):
                net = self.nets[j]
                for _ in range(self.ST_steps):
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
    def spawn_and_continue(self, number_clones: int = None):
        number_clones = number_clones or self.nr_clones
        df = pd.DataFrame(
            columns=['parent', 'MAE_pre', 'MAE_post', 'MSE_pre', 'MSE_post', 'MIM_pre', 'MIM_post', 'noise',
                     'status_post'])
        # For every initial net {i} after populating (that is fixpoint after first epoch);
        for i in range(self.population_size):
            net = self.nets[i]
            # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
            # to see full trajectory (but the clones will be very hard to see).
            # Make one target to compare distances to clones later when they have trained.
            net.start_time = self.ST_steps - 150
            net_input_data = net.input_weight_matrix()
            net_target_data = net.create_target_weights(net_input_data)
            if is_identity_function(net):
                print(f"\nNet {i} is fixpoint")
                # Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
                # To plot clones starting after first epoch (z=ST_steps), set that as start_time!
                # To make sure PCA will plot the same trajectory up until this point, we clone the
                # parent-net's weight history as well.
                for j in range(number_clones):
                    clone = Net(net.input_size, net.hidden_size, net.out_size,
                                f"ST_net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps)
                    clone.load_state_dict(copy.deepcopy(net.state_dict()))
                    rand_noise = prng() * self.noise
                    clone = self.apply_noise(clone, rand_noise)
                    clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history)
                    clone.number_trained = copy.deepcopy(net.number_trained)
                    # Pre Training distances (after noise application of course)
                    clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix())
                    MAE_pre = MAE(net_target_data, clone_pre_weights)
                    MSE_pre = MSE(net_target_data, clone_pre_weights)
                    MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)
                    # Then finish training each clone {j} (for remaining epoch-1 * ST_steps) ..
                    for _ in range(self.epochs - 1):
                        for _ in range(self.ST_steps):
                            clone.self_train(1, self.log_step_size, self.net_learning_rate)
                    # Post Training distances for comparison
                    clone_post_weights = clone.create_target_weights(clone.input_weight_matrix())
                    MAE_post = MAE(net_target_data, clone_post_weights)
                    MSE_post = MSE(net_target_data, clone_post_weights)
                    MIM_post = mean_invariate_manhattan_distance(net_target_data, clone_post_weights)
                    # .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
                    test_status(clone)
                    if is_identity_function(clone):
                        print(f"Clone {j} (of net_{i}) is fixpoint."
                              f"\nMSE({i},{j}): {MSE_post}"
                              f"\nMAE({i},{j}): {MAE_post}"
                              f"\nMIM({i},{j}): {MIM_post}\n")
                        self.nets.append(clone)
                    df.loc[clone.name] = [net.name, MAE_pre, MAE_post, MSE_pre, MSE_post, MIM_pre, MIM_post, self.noise,
                                          clone.is_fixpoint]
                # Finally take parent net {i} and finish it's training for comparison to clone development.
                for _ in range(self.epochs - 1):
                    for _ in range(self.ST_steps):
                        net.self_train(1, self.log_step_size, self.net_learning_rate)
                net_weights_after = net.create_target_weights(net.input_weight_matrix())
                print(f"Parent net's distance to original position."
                      f"\nMSE(OG,new): {MAE(net_target_data, net_weights_after)}"
                      f"\nMAE(OG,new): {MSE(net_target_data, net_weights_after)}"
                      f"\nMIM(OG,new): {mean_invariate_manhattan_distance(net_target_data, net_weights_after)}\n")
        self.df = df
    def weights_evolution_3d_experiment(self):
        exp_name = f"soup_basins_{str(len(self.nets))}_nets_3d_weights_PCA"
        return plot_3d_soup(self.nets, exp_name, self.directory)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
    def save(self):
        pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb"))
        print(f"\nSaved experiment to {self.directory}.")
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    # Define number of runs & name:
    ST_runs = 1
    ST_runs_name = "test-27"
    soup_ST_steps = 2500
    soup_epochs = 2
    soup_log_step_size = 10
    # Define number of networks & their architecture
    nr_clones = 15
    soup_population_size = 2
    soup_net_hidden_size = 2
    soup_net_learning_rate = 0.04
    soup_attack_chance = 10
    soup_name_hash = random.getrandbits(32)
    print(f"Running the Soup-Spawn experiment:")
    exp_list = []
    for noise_factor in range(2, 5):
        exp = SoupSpawnExperiment(
            population_size=soup_population_size,
            log_step_size=soup_log_step_size,
            net_input_size=NET_INPUT_SIZE,
            net_hidden_size=soup_net_hidden_size,
            net_out_size=NET_OUT_SIZE,
            net_learning_rate=soup_net_learning_rate,
            epochs=soup_epochs,
            st_steps=soup_ST_steps,
            attack_chance=soup_attack_chance,
            nr_clones=nr_clones,
            noise=pow(10, -noise_factor),
            directory=Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}' / f'10e-{noise_factor}'
        )
        exp_list.append(exp)
    # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
    df = pd.concat([exp.df for exp in exp_list])
    sns.countplot(data=df, x="noise", hue="status_post")
    plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/fixpoint_status_countplot.png")
    # Catplot (either kind="point" or "box") that shows before-after training distances to parent
    mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance')
    sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False)
    plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/clone_distance_catplot.png")
--- a/journal_soup_robustness.py
+++ b/journal_soup_robustness.py
@@ -0,0 +1,266 @@
 import copy
 import random
 import os.path
 import pickle
 from pathlib import Path
 from typing import Union
 import numpy as np
 import pandas as pd
 import seaborn as sns
 from tqdm import tqdm
 from matplotlib import pyplot as plt
 from torch.nn import functional as F
 from tabulate import tabulate
 from experiments.helpers import check_folder, summary_fixpoint_percentage, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints, is_zero_fixpoint, is_divergent, is_identity_function
 from network import Net
 from visualization import plot_loss, bar_chart_fixpoints, plot_3d_soup, line_chart_fixpoints
 def prng():
    return random.random()
 class SoupRobustnessExperiment:
    @staticmethod
    def apply_noise(network, noise: int):
        """ Changing the weights of a network to values + noise """
        for layer_id, layer_name in enumerate(network.state_dict()):
            for line_id, line_values in enumerate(network.state_dict()[layer_name]):
                for weight_id, weight_value in enumerate(network.state_dict()[layer_name][line_id]):
                    # network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    if prng() < 0.5:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    else:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value - noise
        return network
    def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, attack_chance,
                 train_nets, ST_steps, epochs, log_step_size, directory: Union[str, Path]):
        super().__init__()
        self.population_size = population_size
        self.net_input_size = net_i_size
        self.net_hidden_size = net_h_size
        self.net_out_size = net_o_size
        self.net_learning_rate = learning_rate
        self.attack_chance = attack_chance
        self.train_nets = train_nets
        # self.SA_steps = SA_steps
        self.ST_steps = ST_steps
        self.epochs = epochs
        self.log_step_size = log_step_size
        self.loss_history = []
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        # <self.fixpoint_counters_history> is used for keeping track of the amount of fixpoints in %
        self.fixpoint_counters_history = []
        self.id_functions = []
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        self.population = []
        self.populate_environment()
        self.evolve()
        self.fixpoint_percentage()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
        self.time_to_vergence, self.time_as_fixpoint = self.test_robustness()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in tqdm(range(self.population_size)):
            loop_population_size.set_description("Populating soup experiment %s" % i)
            net_name = f"soup_network_{i}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            self.population.append(net)
    def evolve(self):
        """ Evolving consists of attacking & self-training. """
        loop_epochs = tqdm(range(self.epochs))
        for i in loop_epochs:
            loop_epochs.set_description("Evolving soup %s" % i)
            # A network attacking another network with a given percentage
            if random.randint(1, 100) <= self.attack_chance:
                random_net1, random_net2 = random.sample(range(self.population_size), 2)
                random_net1 = self.population[random_net1]
                random_net2 = self.population[random_net2]
                print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
                random_net1.attack(random_net2)
            #  Self-training each network in the population
            for j in range(self.population_size):
                net = self.population[j]
                for _ in range(self.ST_steps):
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
            # Testing for fixpoints after each batch of ST steps to see relevant data
            if i % self.ST_steps == 0:
                test_for_fixpoints(self.fixpoint_counters, self.population)
                fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
                self.fixpoint_counters_history.append(fixpoints_percentage)
            # Resetting the fixpoint counter. Last iteration not to be reset -
            #  it is important for the bar_chart_fixpoints().
            if i < self.epochs:
                self.reset_fixpoint_counters()
    def test_robustness(self, print_it=True, noise_levels=10, seeds=10):
        # assert (len(self.id_functions) == 1 and seeds > 1) or (len(self.id_functions) > 1 and seeds == 1)
        is_synthetic = True if len(self.id_functions) > 1 and seeds == 1 else False
        avg_time_to_vergence = [[0 for _ in range(noise_levels)] for _ in
                                range(seeds if is_synthetic else len(self.id_functions))]
        avg_time_as_fixpoint = [[0 for _ in range(noise_levels)] for _ in
                                range(seeds if is_synthetic else len(self.id_functions))]
        row_headers = []
        data_pos = 0
        # This checks wether to use synthetic setting with multiple seeds
        #   or multi network settings with a singlee seed
        df = pd.DataFrame(columns=['seed', 'noise_level', 'application_step', 'absolute_loss'])
        for i, fixpoint in enumerate(self.id_functions):  # 1 / n
            row_headers.append(fixpoint.name)
            for seed in range(seeds):  # n / 1
                for noise_level in range(noise_levels):
                    self_application_steps = 1
                    clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size,
                                f"{fixpoint.name}_clone_noise10e-{noise_level}")
                    clone.load_state_dict(copy.deepcopy(fixpoint.state_dict()))
                    rand_noise = prng() * pow(10, -noise_level)  # n / 1
                    clone = self.apply_noise(clone, rand_noise)
                    while not is_zero_fixpoint(clone) and not is_divergent(clone):
                        if is_identity_function(clone):
                            avg_time_as_fixpoint[i][noise_level] += 1
                        # -> before
                        clone_weight_pre_application = clone.input_weight_matrix()
                        target_data_pre_application = clone.create_target_weights(clone_weight_pre_application)
                        clone.self_application(1, self.log_step_size)
                        avg_time_to_vergence[i][noise_level] += 1
                        # -> after
                        clone_weight_post_application = clone.input_weight_matrix()
                        target_data_post_application = clone.create_target_weights(clone_weight_post_application)
                        absolute_loss = F.l1_loss(target_data_pre_application, target_data_post_application).item()
                        setting = i if is_synthetic else seed
                        df.loc[data_pos] = [setting, noise_level, self_application_steps, absolute_loss]
                        data_pos += 1
                        self_application_steps += 1
        # calculate the average:
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.dropna()
        # sns.set(rc={'figure.figsize': (10, 50)})
        bx = sns.catplot(data=df[df['absolute_loss'] < 1], y='absolute_loss', x='application_step', kind='box',
                         col='noise_level', col_wrap=3, showfliers=False)
        directory = Path('output') / 'robustness'
        filename = f"absolute_loss_perapplication_boxplot_grid.png"
        filepath = directory / filename
        plt.savefig(str(filepath))
        if print_it:
            col_headers = [str(f"10e-{d}") for d in range(noise_levels)]
            print(f"\nAppplications steps until divergence / zero: ")
            print(tabulate(avg_time_to_vergence, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
            print(f"\nTime as fixpoint: ")
            print(tabulate(avg_time_as_fixpoint, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        return avg_time_as_fixpoint, avg_time_to_vergence
    def weights_evolution_3d_experiment(self):
        exp_name = f"soup_{self.population_size}_nets_{self.ST_steps}_training_{self.epochs}_epochs"
        return plot_3d_soup(self.population, exp_name, self.directory)
    def count_fixpoints(self):
        self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.population)
        exp_details = f"Evolution steps: {self.epochs} epochs"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
                            exp_details)
    def fixpoint_percentage(self):
        runs = self.epochs / self.ST_steps
        SA_steps = None
        line_chart_fixpoints(self.fixpoint_counters_history, runs, self.ST_steps, SA_steps, self.directory,
                             self.population_size)
    def visualize_loss(self):
        for i in range(len(self.population)):
            net_loss_history = self.population[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
    def reset_fixpoint_counters(self):
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    soup_epochs = 100
    soup_log_step_size = 5
    soup_ST_steps = 20
    # soup_SA_steps = 10
    # Define number of networks & their architecture
    soup_population_size = 20
    soup_net_hidden_size = 2
    soup_net_learning_rate = 0.04
    # soup_attack_chance in %
    soup_attack_chance = 10
    # not used yet: soup_train_nets has 3 possible values "no", "before_SA", "after_SA".
    soup_train_nets = "no"
    soup_name_hash = random.getrandbits(32)
    soup_synthetic = True
    print(f"Running the robustness comparison experiment:")
    SoupRobustnessExperiment(
        population_size=soup_population_size,
        net_i_size=NET_INPUT_SIZE,
        net_h_size=soup_net_hidden_size,
        net_o_size=NET_OUT_SIZE,
        learning_rate=soup_net_learning_rate,
        attack_chance=soup_attack_chance,
        train_nets=soup_train_nets,
        ST_steps=soup_ST_steps,
        epochs=soup_epochs,
        log_step_size=soup_log_step_size,
        directory=Path('output') / 'robustness' / f'{soup_name_hash}'
    )