Fixed exp pickle save. Added exp results. Fixed soup plot issue.

2021-06-20 17:09:17 +02:00
parent 1da5bd95d6
commit cf6eec639f
20 changed files with 64 additions and 47 deletions
--- a/journal_basin_linspace_clones.py
+++ b/journal_basin_linspace_clones.py
@@ -2,7 +2,7 @@ import copy
 import itertools
 from pathlib import Path
 import random
-
+import pickle
 import pandas as pd
 import numpy as np
 import torch
@@ -113,7 +113,7 @@ if __name__ == '__main__':
    ST_name_hash = random.getrandbits(32)

    print(f"Running the Spawn experiment:")
-    df = SpawnLinspaceExperiment(
+    exp = SpawnLinspaceExperiment(
        population_size=ST_population_size,
        log_step_size=ST_log_step_size,
        net_input_size=NET_INPUT_SIZE,
@@ -125,7 +125,12 @@ if __name__ == '__main__':
        nr_clones=nr_clones,
        noise=None,
        directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'linage'
-    ).df
+    )
+    df = exp.df
+
+    directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}' / 'linage'
+    pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
+    print(f"\nSaved experiment to {directory}.")

    # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
    sns.countplot(data=df, x="noise", hue="status_post")
--- a/journal_basins.py
+++ b/journal_basins.py
@@ -126,7 +126,7 @@ class SpawnExperiment:
        # self.visualize_loss()
        self.distance_matrix = distance_matrix(self.nets, print_it=False)
        self.parent_clone_distances = distance_from_parent(self.nets, print_it=False)
-        self.save()
+        

    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
@@ -155,7 +155,7 @@ class SpawnExperiment:
            # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
            # to see full trajectory (but the clones will be very hard to see). 
            # Make one target to compare distances to clones later when they have trained.
-            net.start_time = self.ST_steps - 150
+            net.start_time = self.ST_steps - 350
            net_input_data = net.input_weight_matrix()
            net_target_data = net.create_target_weights(net_input_data)

@@ -225,9 +225,6 @@ class SpawnExperiment:
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)

-    def save(self):
-        pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb"))
-        print(f"\nSaved experiment to {self.directory}.")


 if __name__ == "__main__":
@@ -243,15 +240,15 @@ if __name__ == "__main__":
    ST_log_step_size = 10

    # Define number of networks & their architecture
-    nr_clones = 5
-    ST_population_size = 2
+    nr_clones = 10
+    ST_population_size = 1
    ST_net_hidden_size = 2
    ST_net_learning_rate = 0.04
    ST_name_hash = random.getrandbits(32)

    print(f"Running the Spawn experiment:")
    exp_list = []
-    for noise_factor in range(2, 4):
+    for noise_factor in range(2, 3):
        exp = SpawnExperiment(
            population_size=ST_population_size,
            log_step_size=ST_log_step_size,
@@ -267,18 +264,30 @@ if __name__ == "__main__":
        )
        exp_list.append(exp)

-    # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
+    directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}'
+    pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
+    print(f"\nSaved experiment to {directory}.")
+
+    # Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent)  
    df = pd.concat([exp.df for exp in exp_list])
-    sns.countplot(data=df, x="noise", hue="status_post")
-    plt.savefig(f"output/spawn_basin/{ST_name_hash}/fixpoint_status_countplot.png")
+    df = df.dropna().reset_index()
+    df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"])/df.loc[i]["noise"] for i in range(len(df))]
+    df["class"] = [ "approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))]

-    # Catplot (either kind="point" or "box") that shows before-after training distances to parent
-    mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance')
-    sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False)
-    plt.savefig(f"output/spawn_basin/{ST_name_hash}/clone_distance_catplot.png")
-
-    mlt = df.melt(id_vars=["name", "noise"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance")
-    ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", sharey=False, palette="Greens", legend=False)
-    ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"])
-    plt.savefig(f"output/spawn_basin/{ST_name_hash}/before_after_distance_catplot.png")
+    # Countplot of all fixpoint clone after training per class. Uncomment and manually adjust xticklabels if x-ax size gets too small.
+    ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=11.7/5.27)
+    ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15)
+    #ax.set_xticklabels(labels=('10e-10', '10e-9', '10e-8', '10e-7', '10e-6', '10e-5', '10e-4', '10e-3', '10e-2', '10e-1'), fontsize=15)
+    plt.savefig(f"{directory}/clone_status_after_countplot_{ST_name_hash}.png")
+    plt.clf()

+    # Catplot of before-after comparison of the clone's weights. Colors links depending on class (approaching, distancing, stationary (i.e., MAE=0)). Blue, orange and green are based on countplot above, should be save for colorblindness (see https://gist.github.com/mwaskom/b35f6ebc2d4b340b4f64a4e28e778486)-
+    mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance")
+    P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))]
+    P = sns.color_palette(P, as_cmap=False)
+    ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P, col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
+    ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
+    ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
+    ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
+    plt.savefig(f"{directory}/before_after_distance_catplot_{ST_name_hash}.png")
+    plt.clf()
--- a/journal_robustness.py
+++ b/journal_robustness.py
@@ -91,7 +91,6 @@ class RobustnessComparisonExperiment:
        self.time_to_vergence, self.time_as_fixpoint = self.test_robustness(
            seeds=population_size if self.is_synthetic else 1)

-        self.save()

    def populate_environment(self):
        nets = []
@@ -211,9 +210,6 @@ class RobustnessComparisonExperiment:
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)

-    def save(self):
-        pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb"))
-        print(f"\nSaved experiment to {self.directory}.")


 if __name__ == "__main__":
@@ -230,7 +226,7 @@ if __name__ == "__main__":
    ST_synthetic = True

    print(f"Running the robustness comparison experiment:")
-    RobustnessComparisonExperiment(
+    exp = RobustnessComparisonExperiment(
        population_size=ST_population_size,
        log_step_size=ST_log_step_size,
        net_input_size=NET_INPUT_SIZE,
@@ -242,3 +238,7 @@ if __name__ == "__main__":
        synthetic=ST_synthetic,
        directory=Path('output') / 'journal_robustness' / f'{ST_name_hash}'
    )
+
+    directory = Path('output') / 'journal_robustness' / f'{ST_name_hash}'
+    pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
+    print(f"\nSaved experiment to {directory}.")
--- a/journal_soup_basins.py
+++ b/journal_soup_basins.py
@@ -231,6 +231,8 @@ class SoupSpawnExperiment:
                MSE_pre = MSE(net_target_data, clone_pre_weights)
                MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)

+                df.loc[len(df)] = [clone.name, net.name, MAE_pre, 0, MSE_pre, 0, MIM_pre, 0, self.noise, ""]
+
                net.children.append(clone)
                self.clones.append(clone)
                self.parents_with_clones.append(clone)
@@ -260,10 +262,9 @@ class SoupSpawnExperiment:
                          f"\nMSE({i},{j}): {MSE_post}"
                          f"\nMAE({i},{j}): {MAE_post}"
                          f"\nMIM({i},{j}): {MIM_post}\n")
-                    self.parents_clones_id_functions.append(clone)
+                    self.parents_clones_id_functions.append(clone):

-                df.loc[clone.name] = [net.name, MAE_pre, MAE_post, MSE_pre, MSE_post, MIM_pre, MIM_post, self.noise,
-                                      clone.is_fixpoint]
+                df.loc[df.name==clone.name, ["MAE_post", "MSE_post", "MIM_post", "status_post"]] = [MAE_post, MSE_post, MIM_post, clone.is_fixpoint]

            # Finally take parent net {i} and finish it's training for comparison to clone development.
            for _ in range(self.epochs - 1):
@@ -287,9 +288,6 @@ class SoupSpawnExperiment:
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)

-    def save(self):
-        pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb"))
-        print(f"\nSaved experiment to {self.directory}.")


 if __name__ == "__main__":
@@ -331,12 +329,19 @@ if __name__ == "__main__":
        )
        exp_list.append(exp)

+    directory = Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}'
+    pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{soup_name_hash}.p", "wb"))
+    print(f"\nSaved experiment to {directory}.")
+
    # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
    df = pd.concat([exp.df for exp in exp_list])
    sns.countplot(data=df, x="noise", hue="status_post")
    plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/fixpoint_status_countplot.png")

    # Catplot (either kind="point" or "box") that shows before-after training distances to parent
-    mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance')
-    sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False)
+    mlt = df.melt(id_vars=["name", "noise"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance")
+    ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
+    ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
+    ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
+    ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
    plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/clone_distance_catplot.png")
--- a/3234988603/Bildschirmfoto
+++ b/3234988603/Bildschirmfoto
--- a/3234988603/before_after_distance_catplot_3234988603.png
+++ b/3234988603/before_after_distance_catplot_3234988603.png
--- a/3234988603/clone_status_after_countplot_3234988603.png
+++ b/3234988603/clone_status_after_countplot_3234988603.png
--- a/3234988603/experiment_pickle_3234988603.p
+++ b/3234988603/experiment_pickle_3234988603.p
--- a/results_new/basin_2889303335/10e-2/ST_11_nets_3d_weights_PCA.png
+++ b/results_new/basin_2889303335/10e-2/ST_11_nets_3d_weights_PCA.png
--- a/results_new/basin_2889303335/basin_trajectory_1_2889303335.png
+++ b/results_new/basin_2889303335/basin_trajectory_1_2889303335.png
--- a/results_new/basin_2889303335/basin_trajectory_2_2889303335.png
+++ b/results_new/basin_2889303335/basin_trajectory_2_2889303335.png
--- a/results_new/basin_2889303335/before_after_distance_catplot_2889303335.png
+++ b/results_new/basin_2889303335/before_after_distance_catplot_2889303335.png
--- a/results_new/basin_2889303335/clone_status_after_countplot_2889303335.png
+++ b/results_new/basin_2889303335/clone_status_after_countplot_2889303335.png
--- a/results_new/basin_2889303335/experiment_pickle_2889303335.p
+++ b/results_new/basin_2889303335/experiment_pickle_2889303335.p
--- a/results_new/basin_3570597918/before_after_distance_catplot_3570597918.png
+++ b/results_new/basin_3570597918/before_after_distance_catplot_3570597918.png
--- a/results_new/basin_3570597918/clone_status_after_countplot_3570597918.png
+++ b/results_new/basin_3570597918/clone_status_after_countplot_3570597918.png
--- a/results_new/basin_3570597918/experiment_pickle_3570597918.p
+++ b/results_new/basin_3570597918/experiment_pickle_3570597918.p
--- a/results_new/basin_3570597918/fixpoint_accuracy_3570597918.png
+++ b/results_new/basin_3570597918/fixpoint_accuracy_3570597918.png
--- a/results_new/basin_3570597918/fixpoint_clone_status_after_countplot_3570597918.png
+++ b/results_new/basin_3570597918/fixpoint_clone_status_after_countplot_3570597918.png
--- a/visualization.py
+++ b/visualization.py
@@ -92,7 +92,6 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si
            wm = np.array(wh)
            n, x, y = wm.shape
            wm = wm.reshape(n, x * y)
-            #print(wm.shape, wm)
            weight_histories.append(wm)

        weight_data = np.array(weight_histories)
@@ -104,7 +103,6 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si

        for transformed_trajectory, start_time in zip(np.split(weight_data_pca, n), start_times):
            start_log_time = int(start_time / batch_size)
-            #print(start_time, start_log_time)
            xdata = transformed_trajectory[start_log_time:, 0]
            ydata = transformed_trajectory[start_log_time:, 1]
            zdata = np.arange(start_time, len(ydata)*batch_size+start_time, batch_size).tolist()
@@ -139,7 +137,7 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si
            else:
                ax.scatter(np.asarray(xdata), np.asarray(ydata), zdata, s=3)

-    steps = mpatches.Patch(color="white", label=f"{z_axis_legend}: {len(matrices_weights_history)} steps")
+    #steps = mpatches.Patch(color="white", label=f"{z_axis_legend}: {len(matrices_weights_history)} steps")
    population_size = mpatches.Patch(color="white", label=f"Population: {population_size} networks")

    if z_axis_legend == "Self-application":
@@ -147,14 +145,14 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si
            trained = mpatches.Patch(color="white", label=f"Trained: true")
        else:
            trained = mpatches.Patch(color="white", label=f"Trained: false")
-        ax.legend(handles=[steps, population_size, trained])
+        ax.legend(handles=[population_size, trained])
    else:
-        ax.legend(handles=[steps, population_size])
+        ax.legend(handles=[population_size])

-    ax.set_title(f"PCA Weights history")
-    ax.set_xlabel("PCA X")
-    ax.set_ylabel("PCA Y")
-    ax.set_zlabel(f"Epochs")
+    ax.set_title(f"PCA Transformed Weight Trajectories")
+    ax.set_xlabel("PCA Transformed X-Axis")
+    ax.set_ylabel("PCA Transformed Y-Axis")
+    ax.set_zlabel(f"Self Training Steps")

    # FIXME: Replace this kind of operation with pathlib.Path() object interactions
    directory = Path(directory)
@@ -168,7 +166,7 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si
    else:
        plt.savefig(str(filepath))

-    # plt.show()
+    plt.show()


 def plot_3d_self_train(nets_array: List, exp_name: str, directory: Union[str, Path], batch_size: int, plot_pca_together: bool):