diff --git a/journal_basin_linspace_clones.py b/journal_basin_linspace_clones.py index b2b632d..29a4977 100644 --- a/journal_basin_linspace_clones.py +++ b/journal_basin_linspace_clones.py @@ -2,7 +2,7 @@ import copy import itertools from pathlib import Path import random - +import pickle import pandas as pd import numpy as np import torch @@ -113,7 +113,7 @@ if __name__ == '__main__': ST_name_hash = random.getrandbits(32) print(f"Running the Spawn experiment:") - df = SpawnLinspaceExperiment( + exp = SpawnLinspaceExperiment( population_size=ST_population_size, log_step_size=ST_log_step_size, net_input_size=NET_INPUT_SIZE, @@ -125,7 +125,12 @@ if __name__ == '__main__': nr_clones=nr_clones, noise=None, directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'linage' - ).df + ) + df = exp.df + + directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}' / 'linage' + pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb")) + print(f"\nSaved experiment to {directory}.") # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis sns.countplot(data=df, x="noise", hue="status_post") diff --git a/journal_basins.py b/journal_basins.py index 209bf46..aca416e 100644 --- a/journal_basins.py +++ b/journal_basins.py @@ -126,7 +126,7 @@ class SpawnExperiment: # self.visualize_loss() self.distance_matrix = distance_matrix(self.nets, print_it=False) self.parent_clone_distances = distance_from_parent(self.nets, print_it=False) - self.save() + def populate_environment(self): loop_population_size = tqdm(range(self.population_size)) @@ -155,7 +155,7 @@ class SpawnExperiment: # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to # to see full trajectory (but the clones will be very hard to see). # Make one target to compare distances to clones later when they have trained. - net.start_time = self.ST_steps - 150 + net.start_time = self.ST_steps - 350 net_input_data = net.input_weight_matrix() net_target_data = net.create_target_weights(net_input_data) @@ -169,7 +169,7 @@ class SpawnExperiment: for j in range(number_clones): clone = Net(net.input_size, net.hidden_size, net.out_size, f"ST_net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps) - clone.load_state_dict(copy.deepcopy(net.state_dict())) + clone.load_state_dict(copy.deepcopy(net.state_dict())) rand_noise = prng() * self.noise clone = self.apply_noise(clone, rand_noise) clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history) @@ -225,9 +225,6 @@ class SpawnExperiment: self.loss_history.append(net_loss_history) plot_loss(self.loss_history, self.directory) - def save(self): - pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb")) - print(f"\nSaved experiment to {self.directory}.") if __name__ == "__main__": @@ -243,15 +240,15 @@ if __name__ == "__main__": ST_log_step_size = 10 # Define number of networks & their architecture - nr_clones = 5 - ST_population_size = 2 + nr_clones = 10 + ST_population_size = 1 ST_net_hidden_size = 2 ST_net_learning_rate = 0.04 ST_name_hash = random.getrandbits(32) print(f"Running the Spawn experiment:") exp_list = [] - for noise_factor in range(2, 4): + for noise_factor in range(2, 3): exp = SpawnExperiment( population_size=ST_population_size, log_step_size=ST_log_step_size, @@ -267,18 +264,30 @@ if __name__ == "__main__": ) exp_list.append(exp) - # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis + directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}' + pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb")) + print(f"\nSaved experiment to {directory}.") + + # Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent) df = pd.concat([exp.df for exp in exp_list]) - sns.countplot(data=df, x="noise", hue="status_post") - plt.savefig(f"output/spawn_basin/{ST_name_hash}/fixpoint_status_countplot.png") + df = df.dropna().reset_index() + df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"])/df.loc[i]["noise"] for i in range(len(df))] + df["class"] = [ "approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))] - # Catplot (either kind="point" or "box") that shows before-after training distances to parent - mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance') - sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False) - plt.savefig(f"output/spawn_basin/{ST_name_hash}/clone_distance_catplot.png") - - mlt = df.melt(id_vars=["name", "noise"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance") - ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", sharey=False, palette="Greens", legend=False) - ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"]) - plt.savefig(f"output/spawn_basin/{ST_name_hash}/before_after_distance_catplot.png") + # Countplot of all fixpoint clone after training per class. Uncomment and manually adjust xticklabels if x-ax size gets too small. + ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=11.7/5.27) + ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15) + #ax.set_xticklabels(labels=('10e-10', '10e-9', '10e-8', '10e-7', '10e-6', '10e-5', '10e-4', '10e-3', '10e-2', '10e-1'), fontsize=15) + plt.savefig(f"{directory}/clone_status_after_countplot_{ST_name_hash}.png") + plt.clf() + # Catplot of before-after comparison of the clone's weights. Colors links depending on class (approaching, distancing, stationary (i.e., MAE=0)). Blue, orange and green are based on countplot above, should be save for colorblindness (see https://gist.github.com/mwaskom/b35f6ebc2d4b340b4f64a4e28e778486)- + mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance") + P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))] + P = sns.color_palette(P, as_cmap=False) + ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P, col_wrap=min(5, len(exp_list)), sharey=False, legend=False) + ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100]) + ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15) + ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15) + plt.savefig(f"{directory}/before_after_distance_catplot_{ST_name_hash}.png") + plt.clf() diff --git a/journal_robustness.py b/journal_robustness.py index 32e0282..69bcc0c 100644 --- a/journal_robustness.py +++ b/journal_robustness.py @@ -91,7 +91,6 @@ class RobustnessComparisonExperiment: self.time_to_vergence, self.time_as_fixpoint = self.test_robustness( seeds=population_size if self.is_synthetic else 1) - self.save() def populate_environment(self): nets = [] @@ -211,9 +210,6 @@ class RobustnessComparisonExperiment: self.loss_history.append(net_loss_history) plot_loss(self.loss_history, self.directory) - def save(self): - pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb")) - print(f"\nSaved experiment to {self.directory}.") if __name__ == "__main__": @@ -230,7 +226,7 @@ if __name__ == "__main__": ST_synthetic = True print(f"Running the robustness comparison experiment:") - RobustnessComparisonExperiment( + exp = RobustnessComparisonExperiment( population_size=ST_population_size, log_step_size=ST_log_step_size, net_input_size=NET_INPUT_SIZE, @@ -242,3 +238,7 @@ if __name__ == "__main__": synthetic=ST_synthetic, directory=Path('output') / 'journal_robustness' / f'{ST_name_hash}' ) + + directory = Path('output') / 'journal_robustness' / f'{ST_name_hash}' + pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb")) + print(f"\nSaved experiment to {directory}.") \ No newline at end of file diff --git a/journal_soup_basins.py b/journal_soup_basins.py index fd67a5e..a674a2f 100644 --- a/journal_soup_basins.py +++ b/journal_soup_basins.py @@ -231,6 +231,8 @@ class SoupSpawnExperiment: MSE_pre = MSE(net_target_data, clone_pre_weights) MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights) + df.loc[len(df)] = [clone.name, net.name, MAE_pre, 0, MSE_pre, 0, MIM_pre, 0, self.noise, ""] + net.children.append(clone) self.clones.append(clone) self.parents_with_clones.append(clone) @@ -260,10 +262,9 @@ class SoupSpawnExperiment: f"\nMSE({i},{j}): {MSE_post}" f"\nMAE({i},{j}): {MAE_post}" f"\nMIM({i},{j}): {MIM_post}\n") - self.parents_clones_id_functions.append(clone) + self.parents_clones_id_functions.append(clone): - df.loc[clone.name] = [net.name, MAE_pre, MAE_post, MSE_pre, MSE_post, MIM_pre, MIM_post, self.noise, - clone.is_fixpoint] + df.loc[df.name==clone.name, ["MAE_post", "MSE_post", "MIM_post", "status_post"]] = [MAE_post, MSE_post, MIM_post, clone.is_fixpoint] # Finally take parent net {i} and finish it's training for comparison to clone development. for _ in range(self.epochs - 1): @@ -287,9 +288,6 @@ class SoupSpawnExperiment: self.loss_history.append(net_loss_history) plot_loss(self.loss_history, self.directory) - def save(self): - pickle.dump(self, open(f"{self.directory}/experiment_pickle.p", "wb")) - print(f"\nSaved experiment to {self.directory}.") if __name__ == "__main__": @@ -331,12 +329,19 @@ if __name__ == "__main__": ) exp_list.append(exp) + directory = Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}' + pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{soup_name_hash}.p", "wb")) + print(f"\nSaved experiment to {directory}.") + # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis df = pd.concat([exp.df for exp in exp_list]) sns.countplot(data=df, x="noise", hue="status_post") plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/fixpoint_status_countplot.png") # Catplot (either kind="point" or "box") that shows before-after training distances to parent - mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance') - sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False) + mlt = df.melt(id_vars=["name", "noise"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance") + ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", col_wrap=min(5, len(exp_list)), sharey=False, legend=False) + ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100]) + ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15) + ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15) plt.savefig(f"output/soup_spawn_basin/{soup_name_hash}/clone_distance_catplot.png") diff --git a/results_new/basin_ 3234988603/Bildschirmfoto 2021-06-20 um 14.00.53.png b/results_new/basin_ 3234988603/Bildschirmfoto 2021-06-20 um 14.00.53.png new file mode 100644 index 0000000..2e591a6 Binary files /dev/null and b/results_new/basin_ 3234988603/Bildschirmfoto 2021-06-20 um 14.00.53.png differ diff --git a/results_new/basin_ 3234988603/before_after_distance_catplot_3234988603.png b/results_new/basin_ 3234988603/before_after_distance_catplot_3234988603.png new file mode 100644 index 0000000..becf850 Binary files /dev/null and b/results_new/basin_ 3234988603/before_after_distance_catplot_3234988603.png differ diff --git a/results_new/basin_ 3234988603/clone_status_after_countplot_3234988603.png b/results_new/basin_ 3234988603/clone_status_after_countplot_3234988603.png new file mode 100644 index 0000000..65e53a1 Binary files /dev/null and b/results_new/basin_ 3234988603/clone_status_after_countplot_3234988603.png differ diff --git a/results_new/basin_ 3234988603/experiment_pickle_3234988603.p b/results_new/basin_ 3234988603/experiment_pickle_3234988603.p new file mode 100644 index 0000000..096ddaa Binary files /dev/null and b/results_new/basin_ 3234988603/experiment_pickle_3234988603.p differ diff --git a/results_new/basin_2889303335/10e-2/ST_11_nets_3d_weights_PCA.png b/results_new/basin_2889303335/10e-2/ST_11_nets_3d_weights_PCA.png new file mode 100644 index 0000000..96445ac Binary files /dev/null and b/results_new/basin_2889303335/10e-2/ST_11_nets_3d_weights_PCA.png differ diff --git a/results_new/basin_2889303335/basin_trajectory_1_2889303335.png b/results_new/basin_2889303335/basin_trajectory_1_2889303335.png new file mode 100644 index 0000000..dbb983e Binary files /dev/null and b/results_new/basin_2889303335/basin_trajectory_1_2889303335.png differ diff --git a/results_new/basin_2889303335/basin_trajectory_2_2889303335.png b/results_new/basin_2889303335/basin_trajectory_2_2889303335.png new file mode 100644 index 0000000..475c4f6 Binary files /dev/null and b/results_new/basin_2889303335/basin_trajectory_2_2889303335.png differ diff --git a/results_new/basin_2889303335/before_after_distance_catplot_2889303335.png b/results_new/basin_2889303335/before_after_distance_catplot_2889303335.png new file mode 100644 index 0000000..0025322 Binary files /dev/null and b/results_new/basin_2889303335/before_after_distance_catplot_2889303335.png differ diff --git a/results_new/basin_2889303335/clone_status_after_countplot_2889303335.png b/results_new/basin_2889303335/clone_status_after_countplot_2889303335.png new file mode 100644 index 0000000..e5e94d0 Binary files /dev/null and b/results_new/basin_2889303335/clone_status_after_countplot_2889303335.png differ diff --git a/results_new/basin_2889303335/experiment_pickle_2889303335.p b/results_new/basin_2889303335/experiment_pickle_2889303335.p new file mode 100644 index 0000000..42632ff Binary files /dev/null and b/results_new/basin_2889303335/experiment_pickle_2889303335.p differ diff --git a/results_new/basin_3570597918/before_after_distance_catplot_3570597918.png b/results_new/basin_3570597918/before_after_distance_catplot_3570597918.png new file mode 100644 index 0000000..ef62238 Binary files /dev/null and b/results_new/basin_3570597918/before_after_distance_catplot_3570597918.png differ diff --git a/results_new/basin_3570597918/clone_status_after_countplot_3570597918.png b/results_new/basin_3570597918/clone_status_after_countplot_3570597918.png new file mode 100644 index 0000000..2456cb2 Binary files /dev/null and b/results_new/basin_3570597918/clone_status_after_countplot_3570597918.png differ diff --git a/results_new/basin_3570597918/experiment_pickle_3570597918.p b/results_new/basin_3570597918/experiment_pickle_3570597918.p new file mode 100644 index 0000000..a7172c0 Binary files /dev/null and b/results_new/basin_3570597918/experiment_pickle_3570597918.p differ diff --git a/results_new/basin_3570597918/fixpoint_accuracy_3570597918.png b/results_new/basin_3570597918/fixpoint_accuracy_3570597918.png new file mode 100644 index 0000000..249ab61 Binary files /dev/null and b/results_new/basin_3570597918/fixpoint_accuracy_3570597918.png differ diff --git a/results_new/basin_3570597918/fixpoint_clone_status_after_countplot_3570597918.png b/results_new/basin_3570597918/fixpoint_clone_status_after_countplot_3570597918.png new file mode 100644 index 0000000..f2add73 Binary files /dev/null and b/results_new/basin_3570597918/fixpoint_clone_status_after_countplot_3570597918.png differ diff --git a/visualization.py b/visualization.py index 1df47ae..ec49e3d 100644 --- a/visualization.py +++ b/visualization.py @@ -92,7 +92,6 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si wm = np.array(wh) n, x, y = wm.shape wm = wm.reshape(n, x * y) - #print(wm.shape, wm) weight_histories.append(wm) weight_data = np.array(weight_histories) @@ -104,7 +103,6 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si for transformed_trajectory, start_time in zip(np.split(weight_data_pca, n), start_times): start_log_time = int(start_time / batch_size) - #print(start_time, start_log_time) xdata = transformed_trajectory[start_log_time:, 0] ydata = transformed_trajectory[start_log_time:, 1] zdata = np.arange(start_time, len(ydata)*batch_size+start_time, batch_size).tolist() @@ -139,7 +137,7 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si else: ax.scatter(np.asarray(xdata), np.asarray(ydata), zdata, s=3) - steps = mpatches.Patch(color="white", label=f"{z_axis_legend}: {len(matrices_weights_history)} steps") + #steps = mpatches.Patch(color="white", label=f"{z_axis_legend}: {len(matrices_weights_history)} steps") population_size = mpatches.Patch(color="white", label=f"Population: {population_size} networks") if z_axis_legend == "Self-application": @@ -147,14 +145,14 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si trained = mpatches.Patch(color="white", label=f"Trained: true") else: trained = mpatches.Patch(color="white", label=f"Trained: false") - ax.legend(handles=[steps, population_size, trained]) + ax.legend(handles=[population_size, trained]) else: - ax.legend(handles=[steps, population_size]) + ax.legend(handles=[population_size]) - ax.set_title(f"PCA Weights history") - ax.set_xlabel("PCA X") - ax.set_ylabel("PCA Y") - ax.set_zlabel(f"Epochs") + ax.set_title(f"PCA Transformed Weight Trajectories") + ax.set_xlabel("PCA Transformed X-Axis") + ax.set_ylabel("PCA Transformed Y-Axis") + ax.set_zlabel(f"Self Training Steps") # FIXME: Replace this kind of operation with pathlib.Path() object interactions directory = Path(directory) @@ -168,7 +166,7 @@ def plot_3d(matrices_weights_history, directory: Union[str, Path], population_si else: plt.savefig(str(filepath)) - # plt.show() + plt.show() def plot_3d_self_train(nets_array: List, exp_name: str, directory: Union[str, Path], batch_size: int, plot_pca_together: bool):