refactoring and running experiments

2022-03-05 11:07:35 +01:00
parent b6c8859081
commit 69c904e156
22 changed files with 849 additions and 3331 deletions
--- a/experiments/init.py
+++ b/experiments/init.py
@ -1,6 +0,0 @@
 from .mixed_setting_exp import run_mixed_experiment
 from .robustness_exp import run_robustness_experiment
 from .self_application_exp import run_SA_experiment
 from .self_train_exp import run_ST_experiment
 from .soup_exp import run_soup_experiment
 import functionalities_test
--- a/experiments/meta_task_exp.py
+++ b/experiments/meta_task_exp.py
@ -1,535 +0,0 @@
 import pickle
 import re
 import shutil
 from collections import defaultdict
 from pathlib import Path
 import sys
 import platform
 import pandas as pd
 import torchmetrics
 import numpy as np
 import torch
 from matplotlib import pyplot as plt
 import seaborn as sns
 from torch import nn
 from torch.nn import Flatten
 from torch.utils.data import Dataset, DataLoader
 from torchvision.datasets import MNIST
 from torchvision.transforms import ToTensor, Compose, Resize
 from tqdm import tqdm
 # noinspection DuplicatedCode
 if platform.node() == 'CarbonX':
    debug = True
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
    print("@ Warning, Debugging Config@!!!!!! @")
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
 else:
    debug = False
    try:
        # noinspection PyUnboundLocalVariable
        if __package__ is None:
            DIR = Path(__file__).resolve().parent
            sys.path.insert(0, str(DIR.parent))
            __package__ = DIR.name
        else:
            DIR = None
    except NameError:
        DIR = None
        pass
 from network import MetaNet, FixTypes as ft
 from sparse_net import SparseNetwork
 from functionalities_test import test_for_fixpoints
 WORKER = 10 if not debug else 2
 debug = False
 BATCHSIZE = 500 if not debug else 50
 EPOCH = 50
 VALIDATION_FRQ = 3 if not debug else 1
 SELF_TRAIN_FRQ = 1 if not debug else 1
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 DATA_PATH = Path('data')
 DATA_PATH.mkdir(exist_ok=True, parents=True)
 if debug:
    torch.autograd.set_detect_anomaly(True)
 class ToFloat:
    def __init__(self):
        pass
    def __call__(self, x):
        return x.to(torch.float32)
 class AddTaskDataset(Dataset):
    def __init__(self, length=int(5e5)):
        super().__init__()
        self.length = length
        self.prng = np.random.default_rng()
    def __len__(self):
        return self.length
    def __getitem__(self, _):
        ab = self.prng.normal(size=(2,)).astype(np.float32)
        return ab, ab.sum(axis=-1, keepdims=True)
 def set_checkpoint(model, out_path, epoch_n, final_model=False):
    epoch_n = str(epoch_n)
    if not final_model:
        ckpt_path = Path(out_path) / 'ckpt' / f'{epoch_n.zfill(4)}_model_ckpt.tp'
    else:
        ckpt_path = Path(out_path) / f'trained_model_ckpt_e{epoch_n}.tp'
    ckpt_path.parent.mkdir(exist_ok=True, parents=True)
    torch.save(model, ckpt_path, pickle_protocol=pickle.HIGHEST_PROTOCOL)
    py_store_path = Path(out_path) / 'exp_py.txt'
    if not py_store_path.exists():
        shutil.copy(__file__, py_store_path)
    return ckpt_path
 def validate(checkpoint_path, ratio=0.1):
    checkpoint_path = Path(checkpoint_path)
    import torchmetrics
    # initialize metric
    validmetric = torchmetrics.Accuracy()
    ut = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
    try:
        datas = MNIST(str(DATA_PATH), transform=ut, train=False)
    except RuntimeError:
        datas = MNIST(str(DATA_PATH), transform=ut, train=False, download=True)
    valid_d = DataLoader(datas, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
    model = torch.load(checkpoint_path, map_location=DEVICE).eval()
    n_samples = int(len(valid_d) * ratio)
    with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
        for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
            valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
            y_valid = model(valid_batch_x)
            # metric on current batch
            acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
            pbar.set_postfix_str(f'Acc: {acc}')
            pbar.update()
            if idx == n_samples:
                break
    # metric on all batches using custom accumulation
    acc = validmetric.compute()
    tqdm.write(f"Avg. accuracy on all data: {acc}")
    return acc
 def new_storage_df(identifier, weight_count):
    if identifier == 'train':
        return pd.DataFrame(columns=['Epoch', 'Batch', 'Metric', 'Score'])
    elif identifier == 'weights':
        return pd.DataFrame(columns=['Epoch', 'Weight', *(f'weight_{x}' for x in range(weight_count))])
 def checkpoint_and_validate(model, out_path, epoch_n, final_model=False):
    out_path = Path(out_path)
    ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
    result = validate(ckpt_path)
    return result
 def plot_training_particle_types(path_to_dataframe):
    plt.clf()
    # load from Drive
    df = pd.read_csv(path_to_dataframe, index_col=False)
    # Set up figure
    fig, ax = plt.subplots()  # initializes figure and plots
    data = df.loc[df['Metric'].isin(ft.all_types())]
    fix_types = data['Metric'].unique()
    data = data.pivot(index='Epoch', columns='Metric', values='Score').reset_index().fillna(0)
    _ = plt.stackplot(data['Epoch'], *[data[fixtype] for fixtype in fix_types], labels=fix_types.tolist())
    ax.set(ylabel='Particle Count', xlabel='Epoch')
    ax.set_title('Particle Type Count')
    fig.legend(loc="center right", title='Particle Type', bbox_to_anchor=(0.85, 0.5))
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(path_to_dataframe.parent / 'training_particle_type_lp.png'), dpi=300)
 def plot_training_result(path_to_dataframe):
    plt.clf()
    # load from Drive
    df = pd.read_csv(path_to_dataframe, index_col=False)
    # Set up figure
    fig, ax1 = plt.subplots()  # initializes figure and plots
    ax2 = ax1.twinx()  # applies twinx to ax2, which is the second y-axis.
    # plots the first set of data
    data = df[(df['Metric'] == 'Task Loss') | (df['Metric'] == 'Self Train Loss')].groupby(['Epoch', 'Metric']).mean()
    palette = sns.color_palette()[1:data.reset_index()['Metric'].unique().shape[0]+1]
    sns.lineplot(data=data.groupby(['Epoch', 'Metric']).mean(), x='Epoch', y='Score', hue='Metric',
                 palette=palette, ax=ax1)
    # plots the second set of data
    data = df[(df['Metric'] == 'Test Accuracy') | (df['Metric'] == 'Train Accuracy')]
    palette = sns.color_palette()[len(palette)+1:data.reset_index()['Metric'].unique().shape[0] + len(palette)+1]
    sns.lineplot(data=data, x='Epoch', y='Score', marker='o', hue='Metric', palette=palette)
    ax1.set(yscale='log', ylabel='Losses')
    ax1.set_title('Training Lineplot')
    ax2.set(ylabel='Accuracy')
    fig.legend(loc="center right", title='Metric', bbox_to_anchor=(0.85, 0.5))
    ax1.get_legend().remove()
    ax2.get_legend().remove()
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300)
 def plot_network_connectivity_by_fixtype(path_to_trained_model):
    m = torch.load(path_to_trained_model, map_location=torch.device('cpu')).eval()
    # noinspection PyProtectedMember
    particles = list(m.particles)
    df = pd.DataFrame(columns=['type', 'layer', 'neuron', 'name'])
    for prtcl in particles:
        l, c, w = [float(x) for x in re.sub("[^0-9|_]", "", prtcl.name).split('_')]
        df.loc[df.shape[0]] = (prtcl.is_fixpoint, l-1, w, prtcl.name)
        df.loc[df.shape[0]] = (prtcl.is_fixpoint, l, c, prtcl.name)
    for layer in list(df['layer'].unique()):
        # Rescale
        divisor = df.loc[(df['layer'] == layer), 'neuron'].max()
        df.loc[(df['layer'] == layer), 'neuron'] /= divisor
    tqdm.write(f'Connectivity Data gathered')
    for n, fixtype in enumerate(ft.all_types()):
        if df[df['type'] == fixtype].shape[0] > 0:
            plt.clf()
            ax = sns.lineplot(y='neuron', x='layer', hue='name', data=df[df['type'] == fixtype],
                              legend=False, estimator=None, lw=1)
            _ = sns.lineplot(y=[0, 1], x=[-1, df['layer'].max()], legend=False, estimator=None, lw=0)
            ax.set_title(fixtype)
            lines = ax.get_lines()
            for line in lines:
                line.set_color(sns.color_palette()[n])
            if debug:
                plt.show()
            else:
                plt.savefig(Path(path_to_trained_model.parent / f'net_connectivity_{fixtype}.png'), dpi=300)
            tqdm.write(f'Connectivity plottet: {fixtype} - n = {df[df["type"] == fixtype].shape[0] // 2}')
        else:
            tqdm.write(f'No Connectivity {fixtype}')
 def run_particle_dropout_test(model_path):
    diff_store_path = model_path.parent / 'diff_store.csv'
    latest_model = torch.load(model_path, map_location=DEVICE).eval()
    prtcl_dict = defaultdict(lambda: 0)
    _ = test_for_fixpoints(prtcl_dict, list(latest_model.particles))
    tqdm.write(str(dict(prtcl_dict)))
    diff_df = pd.DataFrame(columns=['Particle Type', 'Accuracy', 'Diff'])
    acc_pre = validate(model_path, ratio=1).item()
    diff_df.loc[diff_df.shape[0]] = ('All Organism', acc_pre, 0)
    for fixpoint_type in ft.all_types():
        new_model = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero(fixpoint_type)
        if [x for x in new_model.particles if x.is_fixpoint == fixpoint_type]:
            new_ckpt = set_checkpoint(new_model, model_path.parent, fixpoint_type, final_model=True)
            acc_post = validate(new_ckpt, ratio=1).item()
            acc_diff = abs(acc_post - acc_pre)
            tqdm.write(f'Zero_ident diff = {acc_diff}')
            diff_df.loc[diff_df.shape[0]] = (fixpoint_type, acc_post, acc_diff)
    diff_df.to_csv(diff_store_path, mode='a', header=not diff_store_path.exists(), index=False)
    return diff_store_path
 def plot_dropout_stacked_barplot(mdl_path):
    diff_store_path = mdl_path.parent / 'diff_store.csv'
    diff_df = pd.read_csv(diff_store_path)
    particle_dict = defaultdict(lambda: 0)
    latest_model = torch.load(mdl_path, map_location=DEVICE).eval()
    _ = test_for_fixpoints(particle_dict, list(latest_model.particles))
    tqdm.write(str(dict(particle_dict)))
    plt.clf()
    fig, ax = plt.subplots(ncols=2)
    colors = sns.color_palette()[1:diff_df.shape[0]+1]
    _ = sns.barplot(data=diff_df, y='Accuracy', x='Particle Type', ax=ax[0], palette=colors)
    ax[0].set_title('Accuracy after particle dropout')
    ax[0].set_xlabel('Particle Type')
    ax[1].pie(particle_dict.values(), labels=particle_dict.keys(), colors=list(reversed(colors)), )
    ax[1].set_title('Particle Count')
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(diff_store_path.parent / 'dropout_stacked_barplot.png'), dpi=300)
 def run_particle_dropout_and_plot(model_path):
    diff_store_path = run_particle_dropout_test(model_path)
    plot_dropout_stacked_barplot(diff_store_path)
 def flat_for_store(parameters):
    return (x.item() for y in parameters for x in y.detach().flatten())
 def train_self_replication(model, optimizer, st_stps) -> dict:
    self_train_loss = model.combined_self_train(optimizer, st_stps)
    # noinspection PyUnboundLocalVariable
    stp_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
    return stp_log
 def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
    # Zero your gradients for every batch!
    optimizer.zero_grad()
    btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
    y_prd = model(btch_x)
    # loss = loss_fn(y, batch_y.unsqueeze(-1).to(torch.float32))
    loss = loss_func(y_prd, btch_y.to(torch.float))
    loss.backward()
    # Adjust learning weights
    optimizer.step()
    stp_log = dict(Metric='Task Loss', Score=loss.item())
    return stp_log, y_prd
 if __name__ == '__main__':
    training = True
    train_to_id_first = True
    train_to_task_first = False
    seq_task_train = True
    force_st_for_epochs_n = 5
    n_st_per_batch = 2
    activation = None  # nn.ReLU()
    use_sparse_network = False
    for weight_hidden_size in [4, 5, 6]:
        tsk_threshold = 0.85
        weight_hidden_size = weight_hidden_size
        residual_skip = False
        n_seeds = 3
        depth = 3
        assert not (train_to_task_first and train_to_id_first)
        # noinspection PyUnresolvedReferences
        ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
        res_str = f'{"" if residual_skip else "_no_res"}'
        # dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
        id_str = f'{f"_StToId" if train_to_id_first else ""}'
        tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first and tsk_threshold != 1 else ""}'
        sprs_str = '_sprs' if use_sparse_network else ''
        f_str = f'_f_{force_st_for_epochs_n}' if \
            force_st_for_epochs_n and seq_task_train and train_to_task_first else ""
        config_str = f'{res_str}{id_str}{tsk_str}{f_str}{sprs_str}'
        exp_path = Path('output') / f'mn_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
        if not training:
            # noinspection PyRedeclaration
            exp_path = Path('output') / 'mn_st_n_2_100_4'
        for seed in range(n_seeds):
            seed_path = exp_path / str(seed)
            model_save_path = seed_path / '0000_trained_model.zip'
            df_store_path = seed_path / 'train_store.csv'
            weight_store_path = seed_path / 'weight_store.csv'
            srnn_parameters = dict()
            if training:
                # Check if files do exist on project location, warn and break.
                for path in [model_save_path, df_store_path, weight_store_path]:
                    assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
                utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
                try:
                    dataset = MNIST(str(DATA_PATH), transform=utility_transforms)
                except RuntimeError:
                    dataset = MNIST(str(DATA_PATH), transform=utility_transforms, download=True)
                d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
                interface = np.prod(dataset[0][0].shape)
                dense_metanet = MetaNet(interface, depth=depth, width=6, out=10, residual_skip=residual_skip,
                                        weight_hidden_size=weight_hidden_size, activation=activation).to(DEVICE)
                sparse_metanet = SparseNetwork(interface, depth=depth, width=6, out=10, residual_skip=residual_skip,
                                               weight_hidden_size=weight_hidden_size, activation=activation
                                               ).to(DEVICE) if use_sparse_network else dense_metanet
                if use_sparse_network:
                    sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                loss_fn = nn.CrossEntropyLoss()
                dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.004, momentum=0.9)
                sparse_optimizer = torch.optim.SGD(
                    sparse_metanet.parameters(), lr=0.001, momentum=0.9
                                                   ) if use_sparse_network else dense_optimizer
                dense_weights_updated = False
                sparse_weights_updated = False
                train_store = new_storage_df('train', None)
                weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
                init_tsk = train_to_task_first
                for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
                    is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
                    is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
                    sparse_metanet = sparse_metanet.train()
                    dense_metanet = dense_metanet.train()
                    # Init metrics, even we do not need:
                    metric = torchmetrics.Accuracy()
                    # Define what to train in this epoch:
                    do_tsk_train = train_to_task_first
                    force_st    = (force_st_for_epochs_n >= (EPOCH - epoch)) and force_st_for_epochs_n
                    init_st     = (train_to_id_first and not dense_metanet.count_fixpoints() > 200)
                    do_st_train = init_st or is_self_train_epoch or force_st
                    for batch, (batch_x, batch_y) in tqdm(enumerate(d), total=len(d), desc='MetaNet Train - Batch'):
                        # Self Train
                        if do_st_train:
                            # Transfer weights
                            if dense_weights_updated:
                                sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                                dense_weights_updated = False
                            st_steps = n_st_per_batch if not init_st else n_st_per_batch * 10
                            step_log = train_self_replication(sparse_metanet, sparse_optimizer, st_steps)
                            step_log.update(dict(Epoch=epoch, Batch=batch))
                            train_store.loc[train_store.shape[0]] = step_log
                            if use_sparse_network:
                                sparse_weights_updated = True
                        # Task Train
                        if not init_st:
                            # Transfer weights
                            if sparse_weights_updated:
                                dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
                                sparse_weights_updated = False
                            step_log, y_pred = train_task(dense_metanet, dense_optimizer, loss_fn, batch_x, batch_y)
                            step_log.update(dict(Epoch=epoch, Batch=batch))
                            train_store.loc[train_store.shape[0]] = step_log
                            if use_sparse_network:
                                dense_weights_updated = True
                            metric(y_pred.cpu(), batch_y.cpu())
                    if is_validation_epoch:
                        if sparse_weights_updated:
                            dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
                            sparse_weights_updated = False
                        dense_metanet = dense_metanet.eval()
                        if do_tsk_train:
                            validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                                  Metric='Train Accuracy', Score=metric.compute().item())
                            train_store.loc[train_store.shape[0]] = validation_log
                        accuracy = checkpoint_and_validate(dense_metanet, seed_path, epoch).item()
                        validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                              Metric='Test Accuracy', Score=accuracy)
                        train_store.loc[train_store.shape[0]] = validation_log
                        if init_tsk or (train_to_task_first and seq_task_train):
                            init_tsk = accuracy <= tsk_threshold
                    if init_st or is_validation_epoch:
                        if dense_weights_updated:
                            sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                            dense_weights_updated = False
                        counter_dict = defaultdict(lambda: 0)
                        # This returns ID-functions
                        _ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
                        counter_dict = dict(counter_dict)
                        for key, value in counter_dict.items():
                            step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
                            train_store.loc[train_store.shape[0]] = step_log
                        tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
                        if sum(x.is_fixpoint == ft.identity_func for x in dense_metanet.particles) > 200:
                            train_to_id_first = False
                        # Reset Diverged particles
                        sparse_metanet.reset_diverged_particles()
                        if use_sparse_network:
                            sparse_weights_updated = True
                        # FLUSH to disk
                        if is_validation_epoch:
                            for particle in dense_metanet.particles:
                                weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
                                weight_store.loc[weight_store.shape[0]] = weight_log
                            train_store.to_csv(df_store_path, mode='a',
                                               header=not df_store_path.exists(), index=False)
                            weight_store.to_csv(weight_store_path, mode='a',
                                                header=not weight_store_path.exists(), index=False)
                            train_store = new_storage_df('train', None)
                            weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
                ###########################################################
                # EPOCHS endet
                dense_metanet = dense_metanet.eval()
                counter_dict = defaultdict(lambda: 0)
                # This returns ID-functions
                _ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
                for key, value in dict(counter_dict).items():
                    step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
                    train_store.loc[train_store.shape[0]] = step_log
                accuracy = checkpoint_and_validate(dense_metanet, seed_path, EPOCH, final_model=True)
                validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
                                      Metric='Test Accuracy', Score=accuracy.item())
                for particle in dense_metanet.particles:
                    weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
                    weight_store.loc[weight_store.shape[0]] = weight_log
                train_store.loc[train_store.shape[0]] = validation_log
                train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
                weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
            plot_training_result(df_store_path)
            plot_training_particle_types(df_store_path)
            try:
                _ = next(seed_path.glob(f'*e{EPOCH}.tp'))
            except StopIteration:
                print('Model pattern did not trigger.')
                print(f'Search path was: {seed_path}:')
                print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
                exit(1)
            try:
                run_particle_dropout_and_plot(seed_path)
            except ValueError as e:
                print(e)
            try:
                plot_network_connectivity_by_fixtype(model_save_path)
            except ValueError as e:
                print(e)
    if n_seeds >= 2:
        pass
--- a/experiments/meta_task_exp_small.py
+++ b/experiments/meta_task_exp_small.py
@ -1,317 +0,0 @@
 import platform
 import sys
 from collections import defaultdict
 from pathlib import Path
 import numpy as np
 import torch
 import torchmetrics
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 from tqdm import tqdm
 # noinspection DuplicatedCode
 if platform.node() == 'CarbonX':
    debug = True
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
    print("@ Warning, Debugging Config@!!!!!! @")
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
 else:
    debug = False
    try:
        # noinspection PyUnboundLocalVariable
        if __package__ is None:
            DIR = Path(__file__).resolve().parent
            sys.path.insert(0, str(DIR.parent))
            __package__ = DIR.name
        else:
            DIR = None
    except NameError:
        DIR = None
        pass
 from network import MetaNet, FixTypes as ft
 from sparse_net import SparseNetwork
 from functionalities_test import test_for_fixpoints
 from experiments.meta_task_exp import new_storage_df, train_self_replication, train_task, set_checkpoint, \
    flat_for_store, plot_training_result, plot_training_particle_types, run_particle_dropout_and_plot, \
    plot_network_connectivity_by_fixtype
 WORKER = 10 if not debug else 2
 debug = False
 BATCHSIZE = 50 if not debug else 50
 EPOCH = 10
 VALIDATION_FRQ = 1 if not debug else 1
 SELF_TRAIN_FRQ = 1 if not debug else 1
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 class AddTaskDataset(Dataset):
    def __init__(self, length=int(1e5)):
        super().__init__()
        self.length = length
    def __len__(self):
        return self.length
    def __getitem__(self, _):
        ab = torch.randn(size=(2,)).to(torch.float32)
        return ab, ab.sum(axis=-1, keepdims=True)
 def validate(checkpoint_path, valid_d, ratio=1, validmetric=torchmetrics.MeanAbsoluteError()):
    checkpoint_path = Path(checkpoint_path)
    import torchmetrics
    # initialize metric
    model = torch.load(checkpoint_path, map_location=DEVICE).eval()
    n_samples = int(len(valid_d) * ratio)
    with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
        for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
            valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
            y_valid = model(valid_batch_x)
            # metric on current batch
            acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
            pbar.set_postfix_str(f'Acc: {acc}')
            pbar.update()
            if idx == n_samples:
                break
    # metric on all batches using custom accumulation
    acc = validmetric.compute()
    tqdm.write(f"Avg. Accuracy on all data: {acc}")
    return acc
 def checkpoint_and_validate(model, out_path, epoch_n, valid_d, final_model=False):
    out_path = Path(out_path)
    ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
    result = validate(ckpt_path, valid_d)
    return result
 if __name__ == '__main__':
    training = True
    train_to_id_first = False
    train_to_task_first = False
    seq_task_train = True
    force_st_for_epochs_n = 5
    n_st_per_batch = 2
    activation = None  # nn.ReLU()
    use_sparse_network = False
    for weight_hidden_size in [3, 4, 5, 6]:
        tsk_threshold = 0.85
        weight_hidden_size = weight_hidden_size
        residual_skip = True
        n_seeds = 3
        depth = 3
        width = 3
        out = 1
        data_path = Path('data')
        data_path.mkdir(exist_ok=True, parents=True)
        assert not (train_to_task_first and train_to_id_first)
        ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
        s_str = f'_n_{n_st_per_batch}' if n_st_per_batch > 1 else ""
        res_str = f'{"" if residual_skip else "_no_res"}'
        # dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
        id_str = f'{f"_StToId" if train_to_id_first else ""}'
        tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first and tsk_threshold != 1 else ""}'
        sprs_str = '_sprs' if use_sparse_network else ''
        f_str = f'_f_{force_st_for_epochs_n}' if \
            force_st_for_epochs_n and seq_task_train and train_to_task_first else ""
        config_str = f'{s_str}{res_str}{id_str}{tsk_str}{f_str}{sprs_str}'
        exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
        if not training:
            # noinspection PyRedeclaration
            exp_path = Path('output') / 'mn_st_n_2_100_4'
        for seed in range(n_seeds):
            seed_path = exp_path / str(seed)
            model_path = seed_path / '0000_trained_model.zip'
            df_store_path = seed_path / 'train_store.csv'
            weight_store_path = seed_path / 'weight_store.csv'
            srnn_parameters = dict()
            if training:
                # Check if files do exist on project location, warn and break.
                for path in [model_path, df_store_path, weight_store_path]:
                    assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
                train_data = AddTaskDataset()
                valid_data = AddTaskDataset()
                train_load = DataLoader(train_data, batch_size=BATCHSIZE, shuffle=True,
                                        drop_last=True, num_workers=WORKER)
                vali_load = DataLoader(valid_data, batch_size=BATCHSIZE, shuffle=False,
                                       drop_last=True, num_workers=WORKER)
                interface = np.prod(train_data[0][0].shape)
                dense_metanet = MetaNet(interface, depth=depth, width=width, out=out,
                                        residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
                                        activation=activation
                                        ).to(DEVICE)
                sparse_metanet = SparseNetwork(interface, depth=depth, width=width, out=out,
                                               residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
                                               activation=activation
                                               ).to(DEVICE) if use_sparse_network else dense_metanet
                if use_sparse_network:
                    sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                loss_fn = nn.MSELoss()
                dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.00004, momentum=0.9)
                sparse_optimizer = torch.optim.SGD(
                    sparse_metanet.parameters(), lr=0.00001, momentum=0.9
                                                   ) if use_sparse_network else dense_optimizer
                dense_weights_updated = False
                sparse_weights_updated = False
                train_store = new_storage_df('train', None)
                weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
                init_tsk = train_to_task_first
                for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
                    is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
                    is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
                    sparse_metanet = sparse_metanet.train()
                    dense_metanet = dense_metanet.train()
                    # Init metrics, even we do not need:
                    metric = torchmetrics.MeanAbsoluteError()
                    # Define what to train in this epoch:
                    do_tsk_train = train_to_task_first
                    force_st    = (force_st_for_epochs_n >= (EPOCH - epoch)) and force_st_for_epochs_n
                    init_st     = (train_to_id_first and not dense_metanet.count_fixpoints() > 200)
                    do_st_train = init_st or is_self_train_epoch or force_st
                    for batch, (batch_x, batch_y) in tqdm(enumerate(train_load),
                                                          total=len(train_load), desc='MetaNet Train - Batch'
                                                          ):
                        # Self Train
                        if do_st_train:
                            # Transfer weights
                            if dense_weights_updated:
                                sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                                dense_weights_updated = False
                            st_steps = n_st_per_batch if not init_st else n_st_per_batch * 10
                            step_log = train_self_replication(sparse_metanet, sparse_optimizer, st_steps)
                            step_log.update(dict(Epoch=epoch, Batch=batch))
                            train_store.loc[train_store.shape[0]] = step_log
                            if use_sparse_network:
                                sparse_weights_updated = True
                        # Task Train
                        init_st = True
                        if not init_st:
                            # Transfer weights
                            if sparse_weights_updated:
                                dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
                                sparse_weights_updated = False
                            step_log, y_pred = train_task(dense_metanet, dense_optimizer, loss_fn, batch_x, batch_y)
                            step_log.update(dict(Epoch=epoch, Batch=batch))
                            train_store.loc[train_store.shape[0]] = step_log
                            if use_sparse_network:
                                dense_weights_updated = True
                            metric(y_pred.cpu(), batch_y.cpu())
                    if is_validation_epoch:
                        if sparse_weights_updated:
                            dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
                            sparse_weights_updated = False
                        dense_metanet = dense_metanet.eval()
                        if not init_st:
                            validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                                  Metric='Train Accuracy', Score=metric.compute().item())
                            train_store.loc[train_store.shape[0]] = validation_log
                        accuracy = checkpoint_and_validate(dense_metanet, seed_path, epoch, vali_load).item()
                        validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                              Metric='Test Accuracy', Score=accuracy)
                        train_store.loc[train_store.shape[0]] = validation_log
                        if init_tsk or (train_to_task_first and seq_task_train):
                            init_tsk = accuracy <= tsk_threshold
                    if init_st or is_validation_epoch:
                        if dense_weights_updated:
                            sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
                            dense_weights_updated = False
                        counter_dict = defaultdict(lambda: 0)
                        # This returns ID-functions
                        _ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
                        counter_dict = dict(counter_dict)
                        for key, value in counter_dict.items():
                            step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
                            train_store.loc[train_store.shape[0]] = step_log
                        tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
                        if sum(x.is_fixpoint == ft.identity_func for x in dense_metanet.particles) > 200:
                            train_to_id_first = False
                        # Reset Diverged particles
                        sparse_metanet.reset_diverged_particles()
                        if use_sparse_network:
                            sparse_weights_updated = True
                        # FLUSH to disk
                        if is_validation_epoch:
                            for particle in dense_metanet.particles:
                                weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
                                weight_store.loc[weight_store.shape[0]] = weight_log
                            train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
                            weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
                            train_store = new_storage_df('train', None)
                            weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
                ###########################################################
                # EPOCHS endet
                dense_metanet = dense_metanet.eval()
                counter_dict = defaultdict(lambda: 0)
                # This returns ID-functions
                _ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
                for key, value in dict(counter_dict).items():
                    step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
                    train_store.loc[train_store.shape[0]] = step_log
                accuracy = checkpoint_and_validate(dense_metanet, seed_path, EPOCH, vali_load, final_model=True)
                validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
                                      Metric='Test Accuracy', Score=accuracy.item())
                for particle in dense_metanet.particles:
                    weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
                    weight_store.loc[weight_store.shape[0]] = weight_log
                train_store.loc[train_store.shape[0]] = validation_log
                train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
                weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
            plot_training_result(df_store_path)
            plot_training_particle_types(df_store_path)
            try:
                model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
            except StopIteration:
                print('Model pattern did not trigger.')
                print(f'Search path was: {seed_path}:')
                print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
                exit(1)
            try:
                run_particle_dropout_and_plot(model_path)
            except ValueError as e:
                print(e)
            try:
                plot_network_connectivity_by_fixtype(model_path)
            except ValueError as e:
                print(e)
    if n_seeds >= 2:
        pass
--- a/experiments/meta_task_small_utility.py
+++ b/experiments/meta_task_small_utility.py
@ -0,0 +1,77 @@
 from pathlib import Path
 import torch
 import torchmetrics
 from torch.utils.data import Dataset
 from tqdm import tqdm
 from experiments.meta_task_utility import set_checkpoint
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 class AddTaskDataset(Dataset):
    def __init__(self, length=int(1e3)):
        super().__init__()
        self.length = length
    def __len__(self):
        return self.length
    def __getitem__(self, _):
        ab = torch.randn(size=(2,)).to(torch.float32)
        return ab, ab.sum(axis=-1, keepdims=True)
 def validate(checkpoint_path, valid_d, ratio=1, validmetric=torchmetrics.MeanAbsoluteError()):
    checkpoint_path = Path(checkpoint_path)
    # initialize metric
    model = torch.load(checkpoint_path, map_location=DEVICE).eval()
    n_samples = int(len(valid_d) * ratio)
    with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
        for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
            valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
            y_valid = model(valid_batch_x)
            # metric on current batch
            acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
            pbar.set_postfix_str(f'Acc: {acc}')
            pbar.update()
            if idx == n_samples:
                break
    # metric on all batches using custom accumulation
    acc = validmetric.compute()
    tqdm.write(f"Avg. Accuracy on all data: {acc}")
    return acc
 def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
    # Zero your gradients for every batch!
    optimizer.zero_grad()
    btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
    y_prd = model(btch_x)
    loss = loss_func(y_prd, btch_y.to(torch.float))
    loss.backward()
    # Adjust learning weights
    optimizer.step()
    stp_log = dict(Metric='Task Loss', Score=loss.item())
    return stp_log, y_prd
 def checkpoint_and_validate(model, out_path, epoch_n, valid_d, final_model=False):
    out_path = Path(out_path)
    ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
    result = validate(ckpt_path, valid_d)
    return result
 if __name__ == '__main__':
    raise(NotImplementedError('Get out of here'))
--- a/experiments/meta_task_utility.py
+++ b/experiments/meta_task_utility.py
@ -0,0 +1,319 @@
 import pickle
 import re
 import shutil
 from collections import defaultdict
 from pathlib import Path
 import sys
 import platform
 import pandas as pd
 import numpy as np
 import torch
 from matplotlib import pyplot as plt
 import seaborn as sns
 from torch.nn import Flatten
 from torch.utils.data import Dataset, DataLoader
 from torchvision.datasets import MNIST
 from torchvision.transforms import ToTensor, Compose, Resize
 from tqdm import tqdm
 # noinspection DuplicatedCode
 if platform.node() == 'CarbonX':
    debug = True
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
    print("@ Warning, Debugging Config@!!!!!! @")
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
 else:
    debug = False
    try:
        # noinspection PyUnboundLocalVariable
        if __package__ is None:
            DIR = Path(__file__).resolve().parent
            sys.path.insert(0, str(DIR.parent))
            __package__ = DIR.name
        else:
            DIR = None
    except NameError:
        DIR = None
        pass
 from network import FixTypes as ft
 from functionalities_test import test_for_fixpoints
 WORKER = 10 if not debug else 0
 debug = False
 BATCHSIZE = 500 if not debug else 50
 EPOCH = 50
 VALIDATION_FRQ = 3 if not debug else 1
 SELF_TRAIN_FRQ = 1 if not debug else 1
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 DATA_PATH = Path('data')
 DATA_PATH.mkdir(exist_ok=True, parents=True)
 if debug:
    torch.autograd.set_detect_anomaly(True)
 class ToFloat:
    def __init__(self):
        pass
    def __call__(self, x):
        return x.to(torch.float32)
 class AddTaskDataset(Dataset):
    def __init__(self, length=int(5e5)):
        super().__init__()
        self.length = length
        self.prng = np.random.default_rng()
    def __len__(self):
        return self.length
    def __getitem__(self, _):
        ab = self.prng.normal(size=(2,)).astype(np.float32)
        return ab, ab.sum(axis=-1, keepdims=True)
 def set_checkpoint(model, out_path, epoch_n, final_model=False):
    epoch_n = str(epoch_n)
    if not final_model:
        ckpt_path = Path(out_path) / 'ckpt' / f'{epoch_n.zfill(4)}_model_ckpt.tp'
    else:
        ckpt_path = Path(out_path) / f'trained_model_ckpt_e{epoch_n}.tp'
    ckpt_path.parent.mkdir(exist_ok=True, parents=True)
    torch.save(model, ckpt_path, pickle_protocol=pickle.HIGHEST_PROTOCOL)
    py_store_path = Path(out_path) / 'exp_py.txt'
    if not py_store_path.exists():
        shutil.copy(__file__, py_store_path)
    return ckpt_path
 def validate(checkpoint_path, ratio=0.1):
    checkpoint_path = Path(checkpoint_path)
    import torchmetrics
    # initialize metric
    validmetric = torchmetrics.Accuracy()
    ut = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
    try:
        datas = MNIST(str(DATA_PATH), transform=ut, train=False)
    except RuntimeError:
        datas = MNIST(str(DATA_PATH), transform=ut, train=False, download=True)
    valid_d = DataLoader(datas, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
    model = torch.load(checkpoint_path, map_location=DEVICE).eval()
    n_samples = int(len(valid_d) * ratio)
    with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
        for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
            valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
            y_valid = model(valid_batch_x)
            # metric on current batch
            acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
            pbar.set_postfix_str(f'Acc: {acc}')
            pbar.update()
            if idx == n_samples:
                break
    # metric on all batches using custom accumulation
    acc = validmetric.compute()
    tqdm.write(f"Avg. accuracy on all data: {acc}")
    return acc
 def new_storage_df(identifier, weight_count):
    if identifier == 'train':
        return pd.DataFrame(columns=['Epoch', 'Batch', 'Metric', 'Score'])
    elif identifier == 'weights':
        return pd.DataFrame(columns=['Epoch', 'Weight', *(f'weight_{x}' for x in range(weight_count))])
 def checkpoint_and_validate(model, out_path, epoch_n, final_model=False):
    out_path = Path(out_path)
    ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
    result = validate(ckpt_path)
    return result
 def plot_training_particle_types(path_to_dataframe):
    plt.clf()
    # load from Drive
    df = pd.read_csv(path_to_dataframe, index_col=False)
    # Set up figure
    fig, ax = plt.subplots()  # initializes figure and plots
    data = df.loc[df['Metric'].isin(ft.all_types())]
    fix_types = data['Metric'].unique()
    data = data.pivot(index='Epoch', columns='Metric', values='Score').reset_index().fillna(0)
    _ = plt.stackplot(data['Epoch'], *[data[fixtype] for fixtype in fix_types], labels=fix_types.tolist())
    ax.set(ylabel='Particle Count', xlabel='Epoch')
    ax.set_title('Particle Type Count')
    fig.legend(loc="center right", title='Particle Type', bbox_to_anchor=(0.85, 0.5))
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(path_to_dataframe.parent / 'training_particle_type_lp.png'), dpi=300)
 def plot_training_result(path_to_dataframe):
    plt.clf()
    # load from Drive
    df = pd.read_csv(path_to_dataframe, index_col=False)
    # Set up figure
    fig, ax1 = plt.subplots()  # initializes figure and plots
    ax2 = ax1.twinx()  # applies twinx to ax2, which is the second y-axis.
    # plots the first set of data
    data = df[(df['Metric'] == 'Task Loss') | (df['Metric'] == 'Self Train Loss')].groupby(['Epoch', 'Metric']).mean()
    palette = sns.color_palette()[1:data.reset_index()['Metric'].unique().shape[0]+1]
    sns.lineplot(data=data.groupby(['Epoch', 'Metric']).mean(), x='Epoch', y='Score', hue='Metric',
                 palette=palette, ax=ax1)
    # plots the second set of data
    data = df[(df['Metric'] == 'Test Accuracy') | (df['Metric'] == 'Train Accuracy')]
    palette = sns.color_palette()[len(palette)+1:data.reset_index()['Metric'].unique().shape[0] + len(palette)+1]
    sns.lineplot(data=data, x='Epoch', y='Score', marker='o', hue='Metric', palette=palette)
    ax1.set(yscale='log', ylabel='Losses')
    ax1.set_title('Training Lineplot')
    ax2.set(ylabel='Accuracy')
    fig.legend(loc="center right", title='Metric', bbox_to_anchor=(0.85, 0.5))
    ax1.get_legend().remove()
    ax2.get_legend().remove()
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300)
 def plot_network_connectivity_by_fixtype(path_to_trained_model):
    m = torch.load(path_to_trained_model, map_location=torch.device('cpu')).eval()
    # noinspection PyProtectedMember
    particles = list(m.particles)
    df = pd.DataFrame(columns=['type', 'layer', 'neuron', 'name'])
    for prtcl in particles:
        l, c, w = [float(x) for x in re.sub("[^0-9|_]", "", prtcl.name).split('_')]
        df.loc[df.shape[0]] = (prtcl.is_fixpoint, l-1, w, prtcl.name)
        df.loc[df.shape[0]] = (prtcl.is_fixpoint, l, c, prtcl.name)
    for layer in list(df['layer'].unique()):
        # Rescale
        divisor = df.loc[(df['layer'] == layer), 'neuron'].max()
        df.loc[(df['layer'] == layer), 'neuron'] /= divisor
    tqdm.write(f'Connectivity Data gathered')
    for n, fixtype in enumerate(ft.all_types()):
        if df[df['type'] == fixtype].shape[0] > 0:
            plt.clf()
            ax = sns.lineplot(y='neuron', x='layer', hue='name', data=df[df['type'] == fixtype],
                              legend=False, estimator=None, lw=1)
            _ = sns.lineplot(y=[0, 1], x=[-1, df['layer'].max()], legend=False, estimator=None, lw=0)
            ax.set_title(fixtype)
            lines = ax.get_lines()
            for line in lines:
                line.set_color(sns.color_palette()[n])
            if debug:
                plt.show()
            else:
                plt.savefig(Path(path_to_trained_model.parent / f'net_connectivity_{fixtype}.png'), dpi=300)
            tqdm.write(f'Connectivity plottet: {fixtype} - n = {df[df["type"] == fixtype].shape[0] // 2}')
        else:
            tqdm.write(f'No Connectivity {fixtype}')
 def run_particle_dropout_test(model_path):
    diff_store_path = model_path.parent / 'diff_store.csv'
    latest_model = torch.load(model_path, map_location=DEVICE).eval()
    prtcl_dict = defaultdict(lambda: 0)
    _ = test_for_fixpoints(prtcl_dict, list(latest_model.particles))
    tqdm.write(str(dict(prtcl_dict)))
    diff_df = pd.DataFrame(columns=['Particle Type', 'Accuracy', 'Diff'])
    acc_pre = validate(model_path, ratio=1).item()
    diff_df.loc[diff_df.shape[0]] = ('All Organism', acc_pre, 0)
    for fixpoint_type in ft.all_types():
        new_model = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero(fixpoint_type)
        if [x for x in new_model.particles if x.is_fixpoint == fixpoint_type]:
            new_ckpt = set_checkpoint(new_model, model_path.parent, fixpoint_type, final_model=True)
            acc_post = validate(new_ckpt, ratio=1).item()
            acc_diff = abs(acc_post - acc_pre)
            tqdm.write(f'Zero_ident diff = {acc_diff}')
            diff_df.loc[diff_df.shape[0]] = (fixpoint_type, acc_post, acc_diff)
    diff_df.to_csv(diff_store_path, mode='a', header=not diff_store_path.exists(), index=False)
    return diff_store_path
 def plot_dropout_stacked_barplot(mdl_path, diff_store_path):
    diff_df = pd.read_csv(diff_store_path)
    particle_dict = defaultdict(lambda: 0)
    latest_model = torch.load(mdl_path, map_location=DEVICE).eval()
    _ = test_for_fixpoints(particle_dict, list(latest_model.particles))
    tqdm.write(str(dict(particle_dict)))
    plt.clf()
    fig, ax = plt.subplots(ncols=2)
    colors = sns.color_palette()[1:diff_df.shape[0]+1]
    _ = sns.barplot(data=diff_df, y='Accuracy', x='Particle Type', ax=ax[0], palette=colors)
    ax[0].set_title('Accuracy after particle dropout')
    ax[0].set_xlabel('Particle Type')
    ax[1].pie(particle_dict.values(), labels=particle_dict.keys(), colors=list(reversed(colors)), )
    ax[1].set_title('Particle Count')
    plt.tight_layout()
    if debug:
        plt.show()
    else:
        plt.savefig(Path(diff_store_path.parent / 'dropout_stacked_barplot.png'), dpi=300)
 def run_particle_dropout_and_plot(model_path):
    diff_store_path = run_particle_dropout_test(model_path)
    plot_dropout_stacked_barplot(model_path, diff_store_path)
 def flat_for_store(parameters):
    return (x.item() for y in parameters for x in y.detach().flatten())
 def train_self_replication(model, st_stps, **kwargs) -> dict:
    self_train_loss = model.combined_self_train(st_stps, **kwargs)
    # noinspection PyUnboundLocalVariable
    stp_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
    return stp_log
 def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
    # Zero your gradients for every batch!
    optimizer.zero_grad()
    btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
    y_prd = model(btch_x)
    loss = loss_func(y_prd, btch_y.to(torch.long))
    loss.backward()
    # Adjust learning weights
    optimizer.step()
    stp_log = dict(Metric='Task Loss', Score=loss.item())
    return stp_log, y_prd
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/mixed_setting_exp.py
+++ b/experiments/mixed_setting_exp.py
@ -1,177 +0,0 @@
 import os.path
 import pickle
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_experiment, summary_fixpoint_percentage
 from functionalities_test import test_for_fixpoints
 from network import Net
 from visualization import plot_loss, bar_chart_fixpoints, line_chart_fixpoints
 from visualization import plot_3d_self_train
 class MixedSettingExperiment:
    def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, train_nets,
                 epochs, SA_steps, ST_steps_between_SA, log_step_size, directory_name):
        super().__init__()
        self.population_size = population_size
        self.net_input_size = net_i_size
        self.net_hidden_size = net_h_size
        self.net_out_size = net_o_size
        self.net_learning_rate = learning_rate
        self.train_nets = train_nets
        self.epochs = epochs
        self.SA_steps = SA_steps
        self.ST_steps_between_SA = ST_steps_between_SA
        self.log_step_size = log_step_size
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.loss_history = []
        self.fixpoint_counters_history = []
        self.directory_name = directory_name
        os.mkdir(self.directory_name)
        self.nets = []
        self.populate_environment()
        self.fixpoint_percentage()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating mixed experiment %s" % i)
            net_name = f"mixed_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            self.nets.append(net)
        loop_epochs = tqdm(range(self.epochs))
        for j in loop_epochs:
            loop_epochs.set_description("Running mixed experiment %s" % j)
            for i in loop_population_size:
                net = self.nets[i]
                if self.train_nets == "before_SA":
                    for _ in range(self.ST_steps_between_SA):
                        net.self_train(1, self.log_step_size, self.net_learning_rate)
                    net.self_application(self.SA_steps, self.log_step_size)
                elif self.train_nets == "after_SA":
                    net.self_application(self.SA_steps, self.log_step_size)
                    for _ in range(self.ST_steps_between_SA):
                        net.self_train(1, self.log_step_size, self.net_learning_rate)
                print(
                    f"\nLast weight matrix (epoch: {j}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
            test_for_fixpoints(self.fixpoint_counters, self.nets)
            # Rounding the result not to run into other problems later regarding the exact representation of floating number
            fixpoints_percentage = round((self.fixpoint_counters["fix_zero"] + self.fixpoint_counters[
                "fix_sec"]) / self.population_size, 1)
            self.fixpoint_counters_history.append(fixpoints_percentage)
            # Resetting the fixpoint counter. Last iteration not to be reset - it is important for the bar_chart_fixpoints().
            if j < self.epochs:
                self.reset_fixpoint_counters()
    def weights_evolution_3d_experiment(self):
        exp_name = f"Mixed {str(len(self.nets))}"
        # This batch size is not relevant for mixed settings because during an epoch there are more steps of SA & ST happening
        # and only they need the batch size. To not affect the number of epochs shown in the 3D plot, will send
        # forward the number "1" for batch size with the variable <irrelevant_batch_size>
        irrelevant_batch_size = 1
        plot_3d_self_train(self.nets, exp_name, self.directory_name, irrelevant_batch_size, True)
    def count_fixpoints(self):
        exp_details = f"SA steps: {self.SA_steps}; ST steps: {self.ST_steps_between_SA}"
        test_for_fixpoints(self.fixpoint_counters, self.nets)
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
                            exp_details)
    def fixpoint_percentage(self):
        line_chart_fixpoints(self.fixpoint_counters_history, self.epochs, self.ST_steps_between_SA,
                             self.SA_steps, self.directory_name, self.population_size)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory_name)
    def reset_fixpoint_counters(self):
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
 def run_mixed_experiment(population_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate, train_nets,
                         epochs, SA_steps, ST_steps_between_SA, batch_size, name_hash, runs, run_name):
    experiments = {}
    fixpoints_percentages = []
    check_folder("mixed")
    # Running the experiments
    for i in range(runs):
        directory_name = f"experiments/mixed/{run_name}_run_{i}_{str(population_size)}_nets_{SA_steps}_SA_{ST_steps_between_SA}_ST_{str(name_hash)}"
        mixed_experiment = MixedSettingExperiment(
            population_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            train_nets,
            epochs,
            SA_steps,
            ST_steps_between_SA,
            batch_size,
            directory_name
        )
        pickle.dump(mixed_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
        experiments[i] = mixed_experiment
        # Building history of fixpoint percentages for summary
        fixpoint_counters_history = mixed_experiment.fixpoint_counters_history
        if not fixpoints_percentages:
            fixpoints_percentages = mixed_experiment.fixpoint_counters_history
        else:
            # Using list comprehension to make the sum of all the percentages
            fixpoints_percentages = [fixpoints_percentages[i] + fixpoint_counters_history[i] for i in
                                     range(len(fixpoints_percentages))]
    # Building a summary of all the runs
    directory_name = f"experiments/mixed/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{str(name_hash)}"
    os.mkdir(directory_name)
    summary_pre_title = "mixed"
    summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
                                summary_pre_title)
    summary_fixpoint_percentage(runs, epochs, fixpoints_percentages, ST_steps_between_SA, SA_steps, directory_name,
                                population_size)
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/robustness_exp.py
+++ b/experiments/robustness_exp.py
@ -1,151 +0,0 @@
 import copy
 import os.path
 import pickle
 import random
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints, is_identity_function
 from network import Net
 from visualization import bar_chart_fixpoints, box_plot, write_file
 def add_noise(input_data, epsilon=pow(10, -5)):
    output = copy.deepcopy(input_data)
    for k in range(len(input_data)):
        output[k][0] += random.random() * epsilon
    return output
 class RobustnessExperiment:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 ST_steps, directory_name) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.ST_steps = ST_steps
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.id_functions = []
        self.directory_name = directory_name
        os.mkdir(self.directory_name)
        self.nets = []
        # Create population:
        self.populate_environment()
        print("Nets:\n", self.nets)
        self.count_fixpoints()
        [print(net.is_fixpoint) for net in self.nets]
        self.test_robustness()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating robustness experiment %s" % i)
            net_name = f"net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            for _ in range(self.ST_steps):
                net.self_train(1, self.log_step_size, self.net_learning_rate)
            self.nets.append(net)
    def test_robustness(self):
        # test_for_fixpoints(self.fixpoint_counters, self.nets, self.id_functions)
        zero_epsilon = pow(10, -5)
        data = [[0 for _ in range(10)] for _ in range(len(self.id_functions))]
        for i in range(len(self.id_functions)):
            for j in range(10):
                original_net = self.id_functions[i]
                # Creating a clone of the network. Not by copying it, but by creating a completely new network
                # and changing its weights to the original ones.
                original_net_clone = Net(original_net.input_size, original_net.hidden_size, original_net.out_size,
                                         original_net.name)
                # Extra safety for the value of the weights
                original_net_clone.load_state_dict(copy.deepcopy(original_net.state_dict()))
                noisy_weights = add_noise(original_net_clone.input_weight_matrix(), epsilon=pow(10, -j))
                original_net_clone.apply_weights(noisy_weights)
                # Testing if the new net is still an identity function after applying noise
                still_id_func = is_identity_function(original_net_clone, zero_epsilon)
                # If the net is still an id. func. after applying the first run of noise, continue to apply it until otherwise
                while still_id_func and data[i][j] <= 1000:
                    data[i][j] += 1
                    original_net_clone = original_net_clone.self_application(1, self.log_step_size)
                    still_id_func = is_identity_function(original_net_clone, zero_epsilon)
        print(f"Data {data}")
        if data.count(0) == 10:
            print(f"There is no network resisting the robustness test.")
            text = f"For this population of \n {self.population_size} networks \n there is no" \
                   f" network resisting the robustness test."
            write_file(text, self.directory_name)
        else:
            box_plot(data, self.directory_name, self.population_size)
    def count_fixpoints(self):
        exp_details = f"ST steps: {self.ST_steps}"
        self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.nets)
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
                            exp_details)
 def run_robustness_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size,
                              net_learning_rate, epochs, runs, run_name, name_hash):
    experiments = {}
    check_folder("robustness")
    # Running the experiments
    for i in range(runs):
        ST_directory_name = f"experiments/robustness/{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
        robustness_experiment = RobustnessExperiment(
            population_size,
            batch_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            epochs,
            ST_directory_name
        )
        pickle.dump(robustness_experiment, open(f"{ST_directory_name}/full_experiment_pickle.p", "wb"))
        experiments[i] = robustness_experiment
    # Building a summary of all the runs
    directory_name = f"experiments/robustness/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{str(name_hash)}"
    os.mkdir(directory_name)
    summary_pre_title = "robustness"
    summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
                                summary_pre_title)
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/self_application_exp.py
+++ b/experiments/self_application_exp.py
@ -1,120 +0,0 @@
 import os.path
 import pickle
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints
 from network import Net
 from visualization import bar_chart_fixpoints
 from visualization import plot_3d_self_application
 class SelfApplicationExperiment:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size,
                 net_learning_rate, application_steps, train_nets, directory_name, training_steps
                 ) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.SA_steps = application_steps  #
        self.train_nets = train_nets
        self.ST_steps = training_steps
        self.directory_name = directory_name
        os.mkdir(self.directory_name)
        """ Creating the nets & making the SA steps & (maybe) also training the networks. """
        self.nets = []
        # Create population:
        self.populate_environment()
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating SA experiment %s" % i)
            net_name = f"SA_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name
                      )
            for _ in range(self.SA_steps):
                input_data = net.input_weight_matrix()
                target_data = net.create_target_weights(input_data)
                if self.train_nets == "before_SA":
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
                    net.self_application(self.SA_steps, self.log_step_size)
                elif self.train_nets == "after_SA":
                    net.self_application(self.SA_steps, self.log_step_size)
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
                else:
                    net.self_application(self.SA_steps, self.log_step_size)
            self.nets.append(net)
    def weights_evolution_3d_experiment(self):
        exp_name = f"SA_{str(len(self.nets))}_nets_3d_weights_PCA"
        plot_3d_self_application(self.nets, exp_name, self.directory_name, self.log_step_size)
    def count_fixpoints(self):
        test_for_fixpoints(self.fixpoint_counters, self.nets)
        exp_details = f"{self.SA_steps} SA steps"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
                            exp_details)
 def run_SA_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size,
                      net_learning_rate, runs, run_name, name_hash, application_steps, train_nets, training_steps):
    experiments = {}
    check_folder("self_application")
    # Running the experiments
    for i in range(runs):
        directory_name = f"experiments/self_application/{run_name}_run_{i}_{str(population_size)}_nets_{application_steps}_SA_{str(name_hash)}"
        SA_experiment = SelfApplicationExperiment(
            population_size,
            batch_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            application_steps,
            train_nets,
            directory_name,
            training_steps
        )
        pickle.dump(SA_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
        experiments[i] = SA_experiment
    # Building a summary of all the runs
    directory_name = f"experiments/self_application/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{application_steps}_SA_{str(name_hash)}"
    os.mkdir(directory_name)
    summary_pre_title = "SA"
    summary_fixpoint_experiment(runs, population_size, application_steps, experiments, net_learning_rate,
                                directory_name,
                                summary_pre_title)
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/self_train_exp.py
+++ b/experiments/self_train_exp.py
@ -1,116 +0,0 @@
 import os.path
 import pickle
 from pathlib import Path
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints
 from network import Net
 from visualization import plot_loss, bar_chart_fixpoints
 from visualization import plot_3d_self_train
 class SelfTrainExperiment:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, directory_name) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.loss_history = []
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.directory_name = directory_name
        os.mkdir(self.directory_name)
        self.nets = []
        # Create population:
        self.populate_environment()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating ST experiment %s" % i)
            net_name = f"ST_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            for _ in range(self.epochs):
              net.self_train(1, self.log_step_size, self.net_learning_rate)
            print(f"\nLast weight matrix (epoch: {self.epochs}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
            self.nets.append(net)
    def weights_evolution_3d_experiment(self):
        exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
        return plot_3d_self_train(self.nets, exp_name, self.directory_name, self.log_step_size)
    def count_fixpoints(self):
        test_for_fixpoints(self.fixpoint_counters, self.nets)
        exp_details = f"Self-train for {self.epochs} epochs"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
                            exp_details)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory_name)
 def run_ST_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                      epochs, runs, run_name, name_hash):
    experiments = {}
    logging_directory = Path('output') / 'self_training'
    logging_directory.mkdir(parents=True, exist_ok=True)
    # Running the experiments
    for i in range(runs):
        experiment_name = f"{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
        this_exp_directory = logging_directory / experiment_name
        ST_experiment = SelfTrainExperiment(
            population_size,
            batch_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            epochs,
            this_exp_directory
        )
        with (this_exp_directory / 'full_experiment_pickle.p').open('wb') as f:
            pickle.dump(ST_experiment, f)
        experiments[i] = ST_experiment
    # Building a summary of all the runs
    summary_name = f"/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
    summary_directory_name = logging_directory / summary_name
    summary_directory_name.mkdir(parents=True, exist_ok=True)
    summary_pre_title = "ST"
    summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, summary_directory_name,
                                summary_pre_title)
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/self_train_secondary_exp.py
+++ b/experiments/self_train_secondary_exp.py
@ -1,114 +0,0 @@
 import pickle
 from pathlib import Path
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints
 from network import SecondaryNet
 from visualization import plot_loss, bar_chart_fixpoints
 from visualization import plot_3d_self_train
 class SelfTrainExperimentSecondary:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, directory: Path) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.loss_history = []
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.directory_name = Path(directory)
        self.directory_name.mkdir(parents=True, exist_ok=True)
        self.nets = []
        # Create population:
        self.populate_environment()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating ST experiment %s" % i)
            net_name = f"ST_net_{str(i)}"
            net = SecondaryNet(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            for _ in range(self.epochs):
                net.self_train(1, self.log_step_size, self.net_learning_rate)
            print(f"\nLast weight matrix (epoch: {self.epochs}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
            self.nets.append(net)
    def weights_evolution_3d_experiment(self):
        exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
        return plot_3d_self_train(self.nets, exp_name, self.directory_name, self.log_step_size)
    def count_fixpoints(self):
        test_for_fixpoints(self.fixpoint_counters, self.nets)
        exp_details = f"Self-train for {self.epochs} epochs"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
                            exp_details)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory_name)
 def run_ST_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                      epochs, runs, run_name, name_hash):
    experiments = {}
    logging_directory = Path('output') / 'self_training'
    logging_directory.mkdir(parents=True, exist_ok=True)
    # Running the experiments
    for i in range(runs):
        experiment_name = f"{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
        this_exp_directory = logging_directory / experiment_name
        ST_experiment = SelfTrainExperimentSecondary(
            population_size,
            batch_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            epochs,
            this_exp_directory
        )
        with (this_exp_directory / 'full_experiment_pickle.p').open('wb') as f:
            pickle.dump(ST_experiment, f)
        experiments[i] = ST_experiment
    # Building a summary of all the runs
    summary_name = f"/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
    summary_directory_name = logging_directory / summary_name
    summary_directory_name.mkdir(parents=True, exist_ok=True)
    summary_pre_title = "ST"
    summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, summary_directory_name,
                                summary_pre_title)
 if __name__ == '__main__':
    raise NotImplementedError('Test this here!!!')
--- a/experiments/soup_exp.py
+++ b/experiments/soup_exp.py
@ -1,190 +0,0 @@
 import random
 import os.path
 import pickle
 from pathlib import Path
 from typing import Union
 from tqdm import tqdm
 from experiments.helpers import check_folder, summary_fixpoint_percentage, summary_fixpoint_experiment
 from functionalities_test import test_for_fixpoints
 from network import Net
 from visualization import plot_loss, bar_chart_fixpoints, plot_3d_soup, line_chart_fixpoints
 class SoupExperiment:
    def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, attack_chance,
                 train_nets, ST_steps, epochs, log_step_size, directory: Union[str, Path]):
        super().__init__()
        self.population_size = population_size
        self.net_input_size = net_i_size
        self.net_hidden_size = net_h_size
        self.net_out_size = net_o_size
        self.net_learning_rate = learning_rate
        self.attack_chance = attack_chance
        self.train_nets = train_nets
        # self.SA_steps = SA_steps
        self.ST_steps = ST_steps
        self.epochs = epochs
        self.log_step_size = log_step_size
        self.loss_history = []
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        # <self.fixpoint_counters_history> is used for keeping track of the amount of fixpoints in %
        self.fixpoint_counters_history = []
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        self.population = []
        self.populate_environment()
        self.evolve()
        self.fixpoint_percentage()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in tqdm(range(self.population_size)):
            loop_population_size.set_description("Populating soup experiment %s" % i)
            net_name = f"soup_network_{i}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            self.population.append(net)
    def population_self_train(self):
        #  Self-training each network in the population
        for j in range(self.population_size):
            net = self.population[j]
            for _ in range(self.ST_steps):
                net.self_train(1, self.log_step_size, self.net_learning_rate)
    def population_attack(self):
        # A network attacking another network with a given percentage
        if random.randint(1, 100) <= self.attack_chance:
            random_net1, random_net2 = random.sample(range(self.population_size), 2)
            random_net1 = self.population[random_net1]
            random_net2 = self.population[random_net2]
            print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
            random_net1.attack(random_net2)
    def evolve(self):
        """ Evolving consists of attacking & self-training. """
        loop_epochs = tqdm(range(self.epochs))
        for i in loop_epochs:
            loop_epochs.set_description("Evolving soup %s" % i)
            # A network attacking another network with a given percentage
            self.population_attack()
            #  Self-training each network in the population
            self.population_self_train()
            # Testing for fixpoints after each batch of ST steps to see relevant data
            if i % self.ST_steps == 0:
                test_for_fixpoints(self.fixpoint_counters, self.population)
                fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
                self.fixpoint_counters_history.append(fixpoints_percentage)
            # Resetting the fixpoint counter. Last iteration not to be reset -
            #  it is important for the bar_chart_fixpoints().
            if i < self.epochs:
                self.reset_fixpoint_counters()
    def weights_evolution_3d_experiment(self):
        exp_name = f"soup_{self.population_size}_nets_{self.ST_steps}_training_{self.epochs}_epochs"
        return plot_3d_soup(self.population, exp_name, self.directory)
    def count_fixpoints(self):
        test_for_fixpoints(self.fixpoint_counters, self.population)
        exp_details = f"Evolution steps: {self.epochs} epochs"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
                            exp_details)
    def fixpoint_percentage(self):
        runs = self.epochs / self.ST_steps
        SA_steps = None
        line_chart_fixpoints(self.fixpoint_counters_history, runs, self.ST_steps, SA_steps, self.directory,
                             self.population_size)
    def visualize_loss(self):
        for i in range(len(self.population)):
            net_loss_history = self.population[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
    def reset_fixpoint_counters(self):
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
 def run_soup_experiment(population_size, attack_chance, net_input_size, net_hidden_size, net_out_size,
                        net_learning_rate, epochs, batch_size, runs, run_name, name_hash, ST_steps, train_nets):
    experiments = {}
    fixpoints_percentages = []
    check_folder("soup")
    # Running the experiments
    for i in range(runs):
        # FIXME: Make this a pathlib.Path() Operation
        directory_name = f"experiments/soup/{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
        soup_experiment = SoupExperiment(
            population_size,
            net_input_size,
            net_hidden_size,
            net_out_size,
            net_learning_rate,
            attack_chance,
            train_nets,
            ST_steps,
            epochs,
            batch_size,
            directory_name
        )
        pickle.dump(soup_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
        experiments[i] = soup_experiment
        # Building history of fixpoint percentages for summary
        fixpoint_counters_history = soup_experiment.fixpoint_counters_history
        if not fixpoints_percentages:
            fixpoints_percentages = soup_experiment.fixpoint_counters_history
        else:
            # Using list comprehension to make the sum of all the percentages
            fixpoints_percentages = [fixpoints_percentages[i] + fixpoint_counters_history[i] for i in
                                     range(len(fixpoints_percentages))]
    # Creating a folder for the summary of the current runs
    # FIXME: Make this a pathlib.Path() Operation
    directory_name = f"experiments/soup/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
    os.mkdir(directory_name)
    # Building a summary of all the runs
    summary_pre_title = "soup"
    summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
                                summary_pre_title)
    SA_steps = None
    summary_fixpoint_percentage(runs, epochs, fixpoints_percentages, ST_steps, SA_steps, directory_name,
                                population_size)
--- a/experiments/soup_melt_exp.py
+++ b/experiments/soup_melt_exp.py
@ -1,50 +0,0 @@
 import random
 from tqdm import tqdm
 from experiments.soup_exp import SoupExperiment
 from functionalities_test import test_for_fixpoints
 class MeltingSoupExperiment(SoupExperiment):
    def __init__(self, melt_chance, *args, keep_population_size=True, **kwargs):
        super(MeltingSoupExperiment, self).__init__(*args, **kwargs)
        self.keep_population_size = keep_population_size
        self.melt_chance = melt_chance
    def population_melt(self):
        # A network melting with another network by a given percentage
        if random.randint(1, 100) <= self.melt_chance:
            random_net1_idx, random_net2_idx, destroy_idx = random.sample(range(self.population_size), 3)
            random_net1 = self.population[random_net1_idx]
            random_net2 = self.population[random_net2_idx]
            print(f"\n Melt: {random_net1.name} -> {random_net2.name}")
            melted_network = random_net1.melt(random_net2)
            if self.keep_population_size:
                del self.population[destroy_idx]
            self.population.append(melted_network)
    def evolve(self):
        """ Evolving consists of attacking, melting & self-training. """
        loop_epochs = tqdm(range(self.epochs))
        for i in loop_epochs:
            loop_epochs.set_description("Evolving soup %s" % i)
            self.population_attack()
            self.population_melt()
            self.population_self_train()
            # Testing for fixpoints after each batch of ST steps to see relevant data
            if i % self.ST_steps == 0:
                test_for_fixpoints(self.fixpoint_counters, self.population)
                fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
                self.fixpoint_counters_history.append(fixpoints_percentage)
            # Resetting the fixpoint counter. Last iteration not to be reset -
            #  it is important for the bar_chart_fixpoints().
            if i < self.epochs:
                self.reset_fixpoint_counters()
--- a/journal_basin_linspace_clones.py
+++ b/journal_basin_linspace_clones.py
@ -1,203 +0,0 @@
 import copy
 import itertools
 from pathlib import Path
 import random
 import pickle
 import pandas as pd
 import numpy as np
 import torch
 from functionalities_test import is_identity_function, test_status
 from journal_basins import SpawnExperiment, mean_invariate_manhattan_distance
 from network import Net
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSE
 class SpawnLinspaceExperiment(SpawnExperiment):
    def spawn_and_continue(self, number_clones: int = None):
        number_clones = number_clones or self.nr_clones
        df = pd.DataFrame(
            columns=['clone', 'parent', 'parent2',
                     'MAE_pre', 'MAE_post',
                     'MSE_pre', 'MSE_post',
                     'MIM_pre', 'MIM_post',
                     'noise', 'status_pst'])
        # For every initial net {i} after populating (that is fixpoint after first epoch);
        # parent = self.parents[0]
        # parent_clone = clone = Net(parent.input_size, parent.hidden_size, parent.out_size,
        #                         name=f"{parent.name}_clone_{0}", start_time=self.ST_steps)
        # parent_clone.apply_weights(torch.as_tensor(parent.create_target_weights(parent.input_weight_matrix())))
        # parent_clone = parent_clone.apply_noise(self.noise)
        # self.parents.append(parent_clone)
        pairwise_net_list = list(itertools.combinations(self.parents, 2))
        for net1, net2 in pairwise_net_list:
            # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
            # to see full trajectory (but the clones will be very hard to see).
            # Make one target to compare distances to clones later when they have trained.
            net1.start_time = self.ST_steps - 150
            net1_input_data = net1.input_weight_matrix().detach()
            net1_target_data = net1.create_target_weights(net1_input_data).detach()
            net2.start_time = self.ST_steps - 150
            net2_input_data = net2.input_weight_matrix().detach()
            net2_target_data = net2.create_target_weights(net2_input_data).detach()
            if is_identity_function(net1) and is_identity_function(net2):
                # if True:
                # Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
                # To plot clones starting after first epoch (z=ST_steps), set that as start_time!
                # To make sure PCA will plot the same trajectory up until this point, we clone the
                # parent-net's weight history as well.
                in_between_weights = np.linspace(net1_target_data, net2_target_data, number_clones, endpoint=False)
                # in_between_weights = np.logspace(net1_target_data, net2_target_data, number_clones, endpoint=False)
                for j, in_between_weight in enumerate(in_between_weights):
                    clone = Net(net1.input_size, net1.hidden_size, net1.out_size,
                                name=f"{net1.name}_{net2.name}_clone_{str(j)}", start_time=self.ST_steps + 100)
                    clone.apply_weights(torch.as_tensor(in_between_weight))
                    clone.s_train_weights_history = copy.deepcopy(net1.s_train_weights_history)
                    clone.number_trained = copy.deepcopy(net1.number_trained)
                    # Pre Training distances (after noise application of course)
                    clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix()).detach()
                    MAE_pre = MAE(net1_target_data, clone_pre_weights)
                    MSE_pre = MSE(net1_target_data, clone_pre_weights)
                    MIM_pre = mean_invariate_manhattan_distance(net1_target_data, clone_pre_weights)
                    try:
                        # Then finish training each clone {j} (for remaining epoch-1 * ST_steps) ..
                        for _ in range(self.epochs - 1):
                            for _ in range(self.ST_steps):
                                clone.self_train(1, self.log_step_size, self.net_learning_rate)
                                if any([torch.isnan(x).any() for x in clone.parameters()]):
                                    raise ValueError
                    except ValueError:
                        print("Ran into nan in 'in beetween weights' array.")
                        df.loc[len(df)] = [j, net1.name, net2.name,
                                           MAE_pre, 0,
                                           MSE_pre, 0,
                                           MIM_pre, 0,
                                           self.noise, clone.is_fixpoint]
                        continue
                    # Post Training distances for comparison
                    clone_post_weights = clone.create_target_weights(clone.input_weight_matrix()).detach()
                    MAE_post = MAE(net1_target_data, clone_post_weights)
                    MSE_post = MSE(net1_target_data, clone_post_weights)
                    MIM_post = mean_invariate_manhattan_distance(net1_target_data, clone_post_weights)
                    # .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
                    test_status(clone)
                    if is_identity_function(clone):
                        print(f"Clone {j} (between {net1.name} and {net2.name}) is fixpoint."
                              f"\nMSE({net1.name},{j}): {MSE_post}"
                              f"\nMAE({net1.name},{j}): {MAE_post}"
                              f"\nMIM({net1.name},{j}): {MIM_post}\n")
                        self.nets.append(clone)
                    df.loc[len(df)] = [j, net1.name, net2.name,
                                       MAE_pre, MAE_post,
                                       MSE_pre, MSE_post,
                                       MIM_pre, MIM_post,
                                       self.noise, clone.is_fixpoint]
        for net1, net2 in pairwise_net_list:
            try:
                value = 'MAE'
                c_selector = [f'{value}_pre', f'{value}_post']
                values = df.loc[(df['parent'] == net1.name) & (df['parent2'] == net2.name)][c_selector]
                this_min, this_max = values.values.min(), values.values.max()
                df.loc[(df['parent'] == net1.name) &
                       (df['parent2'] == net2.name), c_selector] = (values - this_min) / (this_max - this_min)
            except ValueError:
                pass
        for parent in self.parents:
            for _ in range(self.epochs - 1):
                for _ in range(self.ST_steps):
                    parent.self_train(1, self.log_step_size, self.net_learning_rate)
        self.df = df
 if __name__ == '__main__':
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    # Define number of runs & name:
    ST_runs = 1
    ST_runs_name = "test-27"
    ST_steps = 2000
    ST_epochs = 2
    ST_log_step_size = 10
    # Define number of networks & their architecture
    nr_clones = 25
    ST_population_size = 10
    ST_net_hidden_size = 2
    ST_net_learning_rate = 0.04
    ST_name_hash = random.getrandbits(32)
    print(f"Running the Spawn experiment:")
    exp = SpawnLinspaceExperiment(
        population_size=ST_population_size,
        log_step_size=ST_log_step_size,
        net_input_size=NET_INPUT_SIZE,
        net_hidden_size=ST_net_hidden_size,
        net_out_size=NET_OUT_SIZE,
        net_learning_rate=ST_net_learning_rate,
        epochs=ST_epochs,
        st_steps=ST_steps,
        nr_clones=nr_clones,
        noise=1e-8,
        directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'linage'
    )
    df = exp.df
    directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}' / 'linage'
    with (directory / f"experiment_pickle_{ST_name_hash}.p").open('wb') as f:
        pickle.dump(exp, f)
    print(f"\nSaved experiment to {directory}.")
    # Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
    # sns.countplot(data=df, x="noise", hue="status_post")
    # plt.savefig(f"output/spawn_basin/{ST_name_hash}/fixpoint_status_countplot.png")
    # Catplot (either kind="point" or "box") that shows before-after training distances to parent
    # mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance')
    # sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False)
    # plt.savefig(f"output/spawn_basin/{ST_name_hash}/clone_distance_catplot.png")
    # Pointplot with pre and after parent Distances
    import seaborn as sns
    from matplotlib import pyplot as plt, ticker
    # ptplt = sns.pointplot(data=exp.df, x='MAE_pre', y='MAE_post', join=False)
    ptplt = sns.scatterplot(x=exp.df['MAE_pre'], y=exp.df['MAE_post'])
    # ptplt.set(xscale='log', yscale='log')
    x0, x1 = ptplt.axes.get_xlim()
    y0, y1 = ptplt.axes.get_ylim()
    lims = [max(x0, y0), min(x1, y1)]
    # This is the x=y line using transforms
    ptplt.plot(lims, lims, 'w', linestyle='dashdot', transform=ptplt.axes.transData)
    ptplt.plot([0, 1], [0, 1], ':k', transform=ptplt.axes.transAxes)
    ptplt.set(xlabel='Mean Absolute Distance before Self-Training',
              ylabel='Mean Absolute Distance after Self-Training')
    # ptplt.axes.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: round(float(x), 2)))
    # ptplt.xticks(rotation=45)
    #for ind, label in enumerate(ptplt.get_xticklabels()):
    #    if ind % 10 == 0:  # every 10th label is kept
    #        label.set_visible(True)
    #    else:
    #        label.set_visible(False)
    filepath = exp.directory / 'mim_dist_plot.pdf'
    plt.tight_layout()
    plt.savefig(filepath, dpi=600, format='pdf', bbox_inches='tight')
--- a/journal_basins.py
+++ b/journal_basins.py
@ -1,315 +0,0 @@
 import os
 from pathlib import Path
 import pickle
 from tqdm import tqdm
 import random
 import copy
 from functionalities_test import is_identity_function, test_status
 from network import Net
 from visualization import plot_3d_self_train, plot_loss
 import numpy as np
 from tabulate import tabulate
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSE
 import pandas as pd
 import seaborn as sns
 from matplotlib import pyplot as plt
 import torch
 import torch.nn.functional as F
 def prng():
    return random.random()
 def l1(tup):
    a, b = tup
    return abs(a - b)
 def mean_invariate_manhattan_distance(x, y):
    # One of these one-liners that might be smart or really dumb. Goal is to find pairwise
    # distances of ascending values, ie. sum (abs(min1_X-min1_Y), abs(min2_X-min2Y) ...) / mean.
    # Idea was to find weight sets that have same values but just in different positions, that would
    # make this distance 0.
    try:
        return np.mean(list(map(l1, zip(sorted(x.detach().numpy()), sorted(y.detach().numpy())))))
    except AttributeError:
        return np.mean(list(map(l1, zip(sorted(x.numpy()), sorted(y.numpy())))))
 def distance_matrix(nets, distance="MIM", print_it=True):
    matrix = [[0 for _ in range(len(nets))] for _ in range(len(nets))]
    for net in range(len(nets)):
        weights = nets[net].input_weight_matrix()[:, 0]
        for other_net in range(len(nets)):
            other_weights = nets[other_net].input_weight_matrix()[:, 0]
            if distance in ["MSE"]:
                matrix[net][other_net] = MSE(weights, other_weights)
            elif distance in ["MAE"]:
                matrix[net][other_net] = MAE(weights, other_weights)
            elif distance in ["MIM"]:
                matrix[net][other_net] = mean_invariate_manhattan_distance(weights, other_weights)
    if print_it:
        print(f"\nDistance matrix (all to all) [{distance}]:")
        headers = [i.name for i in nets]
        print(tabulate(matrix, showindex=headers, headers=headers, tablefmt='orgtbl'))
    return matrix
 def distance_from_parent(nets, distance="MIM", print_it=True):
    list_of_matrices = []
    parents = list(filter(lambda x: "clone" not in x.name and is_identity_function(x), nets))
    distance_range = range(10)
    for parent in parents:
        parent_weights = parent.create_target_weights(parent.input_weight_matrix())
        clones = list(filter(lambda y: parent.name in y.name and parent.name != y.name, nets))
        matrix = [[0 for _ in distance_range] for _ in range(len(clones))]
        for dist in distance_range:
            for idx, clone in enumerate(clones):
                clone_weights = clone.create_target_weights(clone.input_weight_matrix())
                if distance in ["MSE"]:
                    matrix[idx][dist] = MSE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MAE"]:
                    matrix[idx][dist] = MAE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MIM"]:
                    matrix[idx][dist] = mean_invariate_manhattan_distance(parent_weights, clone_weights) < pow(10,
                                                                                                               -dist)
        if print_it:
            print(f"\nDistances from parent {parent.name} [{distance}]:")
            col_headers = [str(f"10e-{d}") for d in distance_range]
            row_headers = [str(f"clone_{i}") for i in range(len(clones))]
            print(tabulate(matrix, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        list_of_matrices.append(matrix)
    return list_of_matrices
 class SpawnExperiment:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, st_steps, nr_clones, noise, directory) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.ST_steps = st_steps
        self.loss_history = []
        self.nets = []
        self.nr_clones = nr_clones
        self.noise = noise or 10e-5
        print("\nNOISE:", self.noise)
        self.parents = []
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        self.populate_environment()
        self.spawn_and_continue()
        self.weights_evolution_3d_experiment()
        # self.visualize_loss()
        self.distance_matrix = distance_matrix(self.nets, print_it=False)
        self.parent_clone_distances = distance_from_parent(self.nets, print_it=False)
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating experiment %s" % i)
            net_name = f"ST_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            for _ in range(self.ST_steps):
                net.self_train(1, self.log_step_size, self.net_learning_rate)
            self.nets.append(net)
            self.parents.append(net)
    def spawn_and_continue(self, number_clones: int = None):
        number_clones = number_clones or self.nr_clones
        df = pd.DataFrame(
            columns=['name', 'MAE_pre', 'MAE_post', 'MSE_pre', 'MSE_post', 'MIM_pre', 'MIM_post', 'noise',
                     'status_post'])
        # For every initial net {i} after populating (that is fixpoint after first epoch);
        for i in range(self.population_size):
            net = self.nets[i]
            # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
            # to see full trajectory (but the clones will be very hard to see). 
            # Make one target to compare distances to clones later when they have trained.
            net.start_time = self.ST_steps - 350
            net_input_data = net.input_weight_matrix()
            net_target_data = net.create_target_weights(net_input_data)
            if is_identity_function(net):
                print(f"\nNet {i} is fixpoint")
                # Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
                # To plot clones starting after first epoch (z=ST_steps), set that as start_time!
                # To make sure PCA will plot the same trajectory up until this point, we clone the
                # parent-net's weight history as well.
                for j in range(number_clones):
                    clone = Net(net.input_size, net.hidden_size, net.out_size,
                                f"ST_net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps)
                    clone.load_state_dict(copy.deepcopy(net.state_dict())) 
                    rand_noise = prng() * self.noise
                    clone = clone.apply_noise(rand_noise)
                    clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history)
                    clone.number_trained = copy.deepcopy(net.number_trained)
                    # Pre Training distances (after noise application of course)
                    clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix())
                    MAE_pre = MAE(net_target_data, clone_pre_weights)
                    MSE_pre = MSE(net_target_data, clone_pre_weights)
                    MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)
                    # Then finish training each clone {j} (for remaining epoch-1 * ST_steps) ..
                    for _ in range(self.epochs - 1):
                        for _ in range(self.ST_steps):
                            clone.self_train(1, self.log_step_size, self.net_learning_rate)
                    # Post Training distances for comparison
                    clone_post_weights = clone.create_target_weights(clone.input_weight_matrix())
                    MAE_post = MAE(net_target_data, clone_post_weights)
                    MSE_post = MSE(net_target_data, clone_post_weights)
                    MIM_post = mean_invariate_manhattan_distance(net_target_data, clone_post_weights)
                    # .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
                    test_status(clone)
                    if is_identity_function(clone):
                        print(f"Clone {j} (of net_{i}) is fixpoint."
                              f"\nMSE({i},{j}): {MSE_post}"
                              f"\nMAE({i},{j}): {MAE_post}"
                              f"\nMIM({i},{j}): {MIM_post}\n")
                        self.nets.append(clone)
                    df.loc[clone.name] = [clone.name, MAE_pre, MAE_post, MSE_pre, MSE_post, MIM_pre, MIM_post, self.noise, clone.is_fixpoint]
                # Finally take parent net {i} and finish it's training for comparison to clone development.
                for _ in range(self.epochs - 1):
                    for _ in range(self.ST_steps):
                        net.self_train(1, self.log_step_size, self.net_learning_rate)
                net_weights_after = net.create_target_weights(net.input_weight_matrix())
                print(f"Parent net's distance to original position."
                      f"\nMSE(OG,new): {MAE(net_target_data, net_weights_after)}"
                      f"\nMAE(OG,new): {MSE(net_target_data, net_weights_after)}"
                      f"\nMIM(OG,new): {mean_invariate_manhattan_distance(net_target_data, net_weights_after)}\n")
        self.df = df
    def weights_evolution_3d_experiment(self):
        exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
        return plot_3d_self_train(self.nets, exp_name, self.directory, self.log_step_size, plot_pca_together=True)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    # Define number of runs & name:
    ST_runs = 1
    ST_runs_name = "test-27"
    ST_steps = 2500
    ST_epochs = 2
    ST_log_step_size = 10
    # Define number of networks & their architecture
    nr_clones = 10
    ST_population_size = 1
    ST_net_hidden_size = 2
    ST_net_learning_rate = 0.04
    ST_name_hash = random.getrandbits(32)
    print(f"Running the Spawn experiment:")
    exp_list = []
    for noise_factor in range(2, 3):
        exp = SpawnExperiment(
            population_size=ST_population_size,
            log_step_size=ST_log_step_size,
            net_input_size=NET_INPUT_SIZE,
            net_hidden_size=ST_net_hidden_size,
            net_out_size=NET_OUT_SIZE,
            net_learning_rate=ST_net_learning_rate,
            epochs=ST_epochs,
            st_steps=ST_steps,
            nr_clones=nr_clones,
            noise=pow(10, -noise_factor),
            directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'10e-{noise_factor}'
        )
        exp_list.append(exp)
    directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}'
    pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
    print(f"\nSaved experiment to {directory}.")
    # Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent)  
    df = pd.concat([exp.df for exp in exp_list])
    df = df.dropna().reset_index()
    df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"])/df.loc[i]["noise"] for i in range(len(df))]
    df["class"] = [ "approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))]
    # Countplot of all fixpoint clone after training per class.
    ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=11.7/5.27, legend=False)
    ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15)
    ax.set_xticklabels(labels=('$\mathregular{10^{-10}}$', '$\mathregular{10^{-9}}$', '$\mathregular{10^{-8}}$', '$\mathregular{10^{-7}}$', '$\mathregular{10^{-6}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-4}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-2}}$', '$\mathregular{10^{-1}}$'), fontsize=15)
    plt.legend(bbox_to_anchor=(0.01, 0.85), loc=2, borderaxespad=0.)
    plt.legend(fontsize='large')
    plt.savefig(f"{directory}/clone_status_after_countplot_{ST_name_hash}.png")
    plt.clf()
    # Catplot of before-after comparison of the clone's weights. Colors links depending on class (approaching, distancing, stationary (i.e., MAE=0)). Blue, orange and green are based on countplot above, should be save for colorblindness (see https://gist.github.com/mwaskom/b35f6ebc2d4b340b4f64a4e28e778486)-
    mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance")
    P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))]
    P = sns.color_palette(P, as_cmap=False)
    ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P, col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
    ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
    ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
    ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
    # plt.ticklabel_format(style='sci', axis='x')
    plt.savefig(f"{directory}/before_after_distance_catplot_{ST_name_hash}.png")
    plt.clf()
    # Catplot of child_nets L1 Prediction "progress" compared to parents. Computes one round of accuracy first. If net is a parent net (not a clone), then we reset weights to timestep of cloning first (from the weight history). So 5k (end) -> 2.5k training (in this experiment, so careful with len(history)/2, this might only work here!)
    df_acc = pd.DataFrame(columns=["name", "noise", "l1_acc", "Network Type"])
    for i in range(len(exp_list)):
        noise = exp_list[i].noise
        print(f"\nNoise: {noise}")
        for network in exp_list[i].nets:
            is_parent = "clone" not in network.name
            if is_parent:
                network.apply_weights(torch.tensor(network.s_train_weights_history[int(len(network.s_train_weights_history)/2)][0]))
            input_data = network.input_weight_matrix()
            target_data = network.create_target_weights(input_data)
            predicted_values = network(input_data)
            mse_loss =  F.mse_loss(target_data, predicted_values).item()
            l1_loss = F.l1_loss(target_data, predicted_values).item()
            df_acc.loc[len(df_acc)+1] = [network.name, noise, l1_loss, "parents" if is_parent else "child_nets"]
            print("MSE:", mse_loss, "\t", "L1: ", l1_loss, "\t", network.name)
    # Note: If there are outliers then showfliers=False is necessary or it will zoom way to far out. If parent and child_nets accuracy is too far apart this plot might not work (only shows either parents or part of the child_nets).
    ax = sns.catplot(data=df_acc, y="l1_acc", x="noise", hue="Network Type", kind="box", legend=False, showfliers=False, height=5.27, aspect=11.7/5.27, sharey=False)
    ax.map(plt.axhline, y=10**-6, ls='--')
    ax.map(plt.axhline, y=10**-7, ls='--')
    ax.set_axis_labels("Noise levels", "L1 Prediction Loss After Training", fontsize=15)
    ax.set_xticklabels(labels=('$\mathregular{10^{-10}}$', '$\mathregular{10^{-9}}$', '$\mathregular{10^{-8}}$', '$\mathregular{10^{-7}}$', '$\mathregular{10^{-6}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-4}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-2}}$', '$\mathregular{10^{-1}}$'), fontsize=15)
    plt.legend(bbox_to_anchor=(0.01, 0.85), loc=2, borderaxespad=0.)
    plt.legend(fontsize='large')
    plt.savefig(f"{directory}/parent_vs_children_accuracy_{ST_name_hash}.png")
    plt.clf()
--- a/journal_robustness.py
+++ b/journal_robustness.py
@ -1,246 +0,0 @@
 import pickle
 import pandas as pd
 import torch
 import random
 import copy
 from pathlib import Path
 from matplotlib.ticker import ScalarFormatter
 from tqdm import tqdm
 from tabulate import tabulate
 from functionalities_test import is_identity_function, is_zero_fixpoint, test_for_fixpoints, is_divergent
 from network import Net
 from torch.nn import functional as F
 from visualization import plot_loss, bar_chart_fixpoints
 import seaborn as sns
 from matplotlib import pyplot as plt
 def prng():
    return random.random()
 def generate_perfekt_synthetic_fixpoint_weights():
    return torch.tensor([[1.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0],
                         [1.0], [0.0], [0.0], [0.0],
                         [1.0], [0.0]
                         ], dtype=torch.float32)
 PALETTE = 10 * (
    "#377eb8",
    "#4daf4a",
    "#984ea3",
    "#e41a1c",
    "#ff7f00",
    "#a65628",
    "#f781bf",
    "#888888",
    "#a6cee3",
    "#b2df8a",
    "#cab2d6",
    "#fb9a99",
    "#fdbf6f",
 )
 class RobustnessComparisonExperiment:
    @staticmethod
    def apply_noise(network, noise: int):
        # Changing the weights of a network to values + noise
        for layer_id, layer_name in enumerate(network.state_dict()):
            for line_id, line_values in enumerate(network.state_dict()[layer_name]):
                for weight_id, weight_value in enumerate(network.state_dict()[layer_name][line_id]):
                    # network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    if prng() < 0.5:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
                    else:
                        network.state_dict()[layer_name][line_id][weight_id] = weight_value - noise
        return network
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, st_steps, synthetic, directory) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.ST_steps = st_steps
        self.loss_history = []
        self.is_synthetic = synthetic
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        self.id_functions = []
        self.nets = self.populate_environment()
        self.count_fixpoints()
        self.time_to_vergence, self.time_as_fixpoint = self.test_robustness(
            seeds=population_size if self.is_synthetic else 1)
    def populate_environment(self):
        nets = []
        if self.is_synthetic:
            ''' Either use perfect / hand-constructed fixpoint ... '''
            net_name = f"net_{str(0)}_synthetic"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            net.apply_weights(generate_perfekt_synthetic_fixpoint_weights())
            nets.append(net)
        else:
            loop_population_size = tqdm(range(self.population_size))
            for i in loop_population_size:
                loop_population_size.set_description("Populating experiment %s" % i)
                ''' .. or use natural approach to train fixpoints from random initialisation. '''
                net_name = f"net_{str(i)}"
                net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
                for _ in range(self.epochs):
                    net.self_train(self.ST_steps, self.log_step_size, self.net_learning_rate)
                nets.append(net)
        return nets
    def test_robustness(self, print_it=True, noise_levels=10, seeds=10):
        assert (len(self.id_functions) == 1 and seeds > 1) or (len(self.id_functions) > 1 and seeds == 1)
        time_to_vergence = [[0 for _ in range(noise_levels)] for _ in
                            range(seeds if self.is_synthetic else len(self.id_functions))]
        time_as_fixpoint = [[0 for _ in range(noise_levels)] for _ in
                            range(seeds if self.is_synthetic else len(self.id_functions))]
        row_headers = []
        # This checks wether to use synthetic setting with multiple seeds
        #   or multi network settings with a singlee seed
        df = pd.DataFrame(columns=['setting', 'Noise Level', 'Self Train Steps', 'absolute_loss',
                                   'Time to convergence', 'Time as fixpoint'])
        with tqdm(total=max(len(self.id_functions), seeds)) as pbar:
            for i, fixpoint in enumerate(self.id_functions):  # 1 / n
                row_headers.append(fixpoint.name)
                for seed in range(seeds):  # n / 1
                    setting = seed if self.is_synthetic else i
                    for noise_level in range(noise_levels):
                        steps = 0
                        clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size,
                                    f"{fixpoint.name}_clone_noise_1e-{noise_level}")
                        clone.load_state_dict(copy.deepcopy(fixpoint.state_dict()))
                        clone = clone.apply_noise(pow(10, -noise_level))
                        while not is_zero_fixpoint(clone) and not is_divergent(clone):
                            # -> before
                            clone_weight_pre_application = clone.input_weight_matrix()
                            target_data_pre_application = clone.create_target_weights(clone_weight_pre_application)
                            clone.self_application(1, self.log_step_size)
                            time_to_vergence[setting][noise_level] += 1
                            # -> after
                            clone_weight_post_application = clone.input_weight_matrix()
                            target_data_post_application = clone.create_target_weights(clone_weight_post_application)
                            absolute_loss = F.l1_loss(target_data_pre_application, target_data_post_application).item()
                            if is_identity_function(clone):
                                time_as_fixpoint[setting][noise_level] += 1
                                # When this raises a Type Error, we found a second order fixpoint!
                            steps += 1
                            df.loc[df.shape[0]] = [setting, f'$\mathregular{{10^{{-{noise_level}}}}}$',
                                                   steps, absolute_loss,
                                                   time_to_vergence[setting][noise_level],
                                                   time_as_fixpoint[setting][noise_level]]
                    pbar.update(1)
        # Get the measuremts at the highest time_time_to_vergence
        df_sorted = df.sort_values('Self Train Steps', ascending=False).drop_duplicates(['setting', 'Noise Level'])
        df_melted = df_sorted.reset_index().melt(id_vars=['setting', 'Noise Level', 'Self Train Steps'],
                                                 value_vars=['Time to convergence', 'Time as fixpoint'],
                                                 var_name="Measurement",
                                                 value_name="Steps").sort_values('Noise Level')
        # Plotting
        # plt.rcParams.update({
        #    "text.usetex": True,
        #    "font.family": "sans-serif",
        #    "font.size": 12,
        #    "font.weight": 'bold',
        #    "font.sans-serif": ["Helvetica"]})
        sns.set(style='whitegrid', font_scale=2)
        bf = sns.boxplot(data=df_melted, y='Steps', x='Noise Level', hue='Measurement', palette=PALETTE)
        synthetic = 'synthetic' if self.is_synthetic else 'natural'
        plt.tight_layout()
        # sns.set(rc={'figure.figsize': (10, 50)})
        # bx = sns.catplot(data=df[df['absolute_loss'] < 1], y='absolute_loss', x='application_step', kind='box',
        #                  col='noise_level', col_wrap=3, showfliers=False)
        filename = f"absolute_loss_perapplication_boxplot_grid_{'synthetic' if self.is_synthetic else 'wild'}.png"
        filepath = self.directory / filename
        plt.savefig(str(filepath))
        if print_it:
            col_headers = [str(f"1e-{d}") for d in range(noise_levels)]
            print(f"\nAppplications steps until divergence / zero: ")
            # print(tabulate(time_to_vergence, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
            print(f"\nTime as fixpoint: ")
            # print(tabulate(time_as_fixpoint, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        return time_as_fixpoint, time_to_vergence
    def count_fixpoints(self):
        exp_details = f"ST steps: {self.ST_steps}"
        self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.nets)
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
                            exp_details)
    def visualize_loss(self):
        for i in range(len(self.nets)):
            net_loss_history = self.nets[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    ST_steps = 1000
    ST_epochs = 5
    ST_log_step_size = 10
    ST_population_size = 10
    ST_net_hidden_size = 2
    ST_net_learning_rate = 0.004
    ST_name_hash = random.getrandbits(32)
    ST_synthetic = False
    print(f"Running the robustness comparison experiment:")
    exp = RobustnessComparisonExperiment(
        population_size=ST_population_size,
        log_step_size=ST_log_step_size,
        net_input_size=NET_INPUT_SIZE,
        net_hidden_size=ST_net_hidden_size,
        net_out_size=NET_OUT_SIZE,
        net_learning_rate=ST_net_learning_rate,
        epochs=ST_epochs,
        st_steps=ST_steps,
        synthetic=ST_synthetic,
        directory=Path('output') / 'journal_robustness' / f'{ST_name_hash}'
    )
    directory = Path('output') / 'journal_robustness' / f'{ST_name_hash}'
    pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
    print(f"\nSaved experiment to {directory}.")
--- a/journal_soup_basins.py
+++ b/journal_soup_basins.py
@ -1,341 +0,0 @@
 import pickle
 import random
 import copy
 from pathlib import Path
 import numpy as np
 import pandas as pd
 import seaborn as sns
 import torch
 from matplotlib import pyplot as plt
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSE
 from tabulate import tabulate
 from tqdm import tqdm
 from functionalities_test import is_identity_function, test_status, is_zero_fixpoint, is_divergent, \
    is_secondary_fixpoint
 from journal_basins import mean_invariate_manhattan_distance
 from network import Net
 from visualization import plot_loss, plot_3d_soup
 def l1(tup):
    a, b = tup
    return abs(a - b)
 def distance_matrix(nets, distance="MIM", print_it=True):
    matrix = [[0 for _ in range(len(nets))] for _ in range(len(nets))]
    for net in range(len(nets)):
        weights = nets[net].input_weight_matrix()[:, 0]
        for other_net in range(len(nets)):
            other_weights = nets[other_net].input_weight_matrix()[:, 0]
            if distance in ["MSE"]:
                matrix[net][other_net] = MSE(weights, other_weights)
            elif distance in ["MAE"]:
                matrix[net][other_net] = MAE(weights, other_weights)
            elif distance in ["MIM"]:
                matrix[net][other_net] = mean_invariate_manhattan_distance(weights, other_weights)
    if print_it:
        print(f"\nDistance matrix (all to all) [{distance}]:")
        headers = [i.name for i in nets]
        print(tabulate(matrix, showindex=headers, headers=headers, tablefmt='orgtbl'))
    return matrix
 def distance_from_parent(nets, distance="MIM", print_it=True):
    list_of_matrices = []
    parents = list(filter(lambda x: "clone" not in x.name and is_identity_function(x), nets))
    distance_range = range(10)
    for parent in parents:
        parent_weights = parent.create_target_weights(parent.input_weight_matrix())
        clones = list(filter(lambda y: parent.name in y.name and parent.name != y.name, nets))
        matrix = [[0 for _ in distance_range] for _ in range(len(clones))]
        for dist in distance_range:
            for idx, clone in enumerate(clones):
                clone_weights = clone.create_target_weights(clone.input_weight_matrix())
                if distance in ["MSE"]:
                    matrix[idx][dist] = MSE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MAE"]:
                    matrix[idx][dist] = MAE(parent_weights, clone_weights) < pow(10, -dist)
                elif distance in ["MIM"]:
                    matrix[idx][dist] = mean_invariate_manhattan_distance(parent_weights, clone_weights) < pow(10,
                                                                                                               -dist)
        if print_it:
            print(f"\nDistances from parent {parent.name} [{distance}]:")
            col_headers = [str(f"10e-{d}") for d in distance_range]
            row_headers = [str(f"clone_{i}") for i in range(len(clones))]
            print(tabulate(matrix, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        list_of_matrices.append(matrix)
    return list_of_matrices
 class SoupSpawnExperiment:
    def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
                 epochs, st_steps, attack_chance, nr_clones, noise, directory) -> None:
        self.population_size = population_size
        self.log_step_size = log_step_size
        self.net_input_size = net_input_size
        self.net_hidden_size = net_hidden_size
        self.net_out_size = net_out_size
        self.net_learning_rate = net_learning_rate
        self.epochs = epochs
        self.ST_steps = st_steps
        self.attack_chance = attack_chance
        self.loss_history = []
        self.nr_clones = nr_clones
        self.noise = noise or 10e-5
        print("\nNOISE:", self.noise)
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        # Populating environment & evolving entities
        self.parents = []
        self.clones = []
        self.parents_with_clones = []
        self.parents_clones_id_functions = []
        self.populate_environment()
        self.spawn_and_continue()
        # self.weights_evolution_3d_experiment(self.parents, "only_parents")
        self.weights_evolution_3d_experiment(self.clones, "only_clones")
        self.weights_evolution_3d_experiment(self.parents_with_clones, "parents_with_clones")
        # self.weights_evolution_3d_experiment(self.parents_clones_id_functions, "id_f_with_parents")
        # self.visualize_loss()
        self.distance_matrix = distance_matrix(self.parents_clones_id_functions, print_it=False)
        self.parent_clone_distances = distance_from_parent(self.parents_clones_id_functions, print_it=False)
        # self.save()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in loop_population_size:
            loop_population_size.set_description("Populating experiment %s" % i)
            net_name = f"parent_net_{str(i)}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            for _ in range(self.ST_steps):
                net.self_train(1, self.log_step_size, self.net_learning_rate)
            self.parents.append(net)
            self.parents_with_clones.append(net)
            if is_identity_function(net):
                self.parents_clones_id_functions.append(net)
                print(f"\nNet {net.name} is identity function")
            if is_divergent(net):
                print(f"\nNet {net.name} is divergent")
            if is_zero_fixpoint(net):
                print(f"\nNet {net.name} is zero fixpoint")
            if is_secondary_fixpoint(net):
                print(f"\nNet {net.name} is secondary fixpoint")
    def evolve(self, population):
        print(f"Clone soup has a population of {len(population)} networks")
        loop_epochs = tqdm(range(self.epochs - 1))
        for i in loop_epochs:
            loop_epochs.set_description("\nEvolving clone soup %s" % i)
            # A network attacking another network with a given percentage
            if random.randint(1, 100) <= self.attack_chance:
                random_net1, random_net2 = random.sample(range(len(population)), 2)
                random_net1 = population[random_net1]
                random_net2 = population[random_net2]
                print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
                random_net1.attack(random_net2)
            #  Self-training each network in the population
            for j in range(len(population)):
                net = population[j]
                for _ in range(self.ST_steps):
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
    def spawn_and_continue(self, number_clones: int = None):
        number_clones = number_clones or self.nr_clones
        df = pd.DataFrame(
            columns=['name', 'parent', 'MAE_pre', 'MAE_post', 'MSE_pre', 'MSE_post', 'MIM_pre', 'MIM_post', 'noise',
                     'status_post'])
        # MAE_pre, MSE_pre, MIM_pre = 0, 0, 0
        # For every initial net {i} after populating (that is fixpoint after first epoch);
        for i in range(len(self.parents)):
            net = self.parents[i]
            # We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
            # to see full trajectory (but the clones will be very hard to see).
            # Make one target to compare distances to clones later when they have trained.
            net.start_time = self.ST_steps - 150
            net_input_data = net.input_weight_matrix()
            net_target_data = net.create_target_weights(net_input_data)
            # print(f"\nNet {i} is fixpoint")
            # Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
            # To plot clones starting after first epoch (z=ST_steps), set that as start_time!
            # To make sure PCA will plot the same trajectory up until this point, we clone the
            # parent-net's weight history as well.
            for j in range(number_clones):
                clone = Net(net.input_size, net.hidden_size, net.out_size,
                            f"net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps)
                clone.load_state_dict(copy.deepcopy(net.state_dict()))
                clone = clone.apply_noise(self.noise)
                clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history)
                clone.number_trained = copy.deepcopy(net.number_trained)
                # Pre Training distances (after noise application of course)
                clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix())
                MAE_pre = MAE(net_target_data, clone_pre_weights)
                MSE_pre = MSE(net_target_data, clone_pre_weights)
                MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)
                df.loc[len(df)] = [clone.name, net.name, MAE_pre, 0, MSE_pre, 0, MIM_pre, 0, self.noise, ""]
                net.child_nets.append(clone)
                self.clones.append(clone)
                self.parents_with_clones.append(clone)
        self.evolve(self.clones)
        # evolve also with the parents together
        # self.evolve(self.parents_with_clones)
        for i in range(len(self.parents)):
            net = self.parents[i]
            net_input_data = net.input_weight_matrix()
            net_target_data = net.create_target_weights(net_input_data)
            for j in range(len(net.child_nets)):
                clone = net.child_nets[j]
                # Post Training distances for comparison
                clone_post_weights = clone.create_target_weights(clone.input_weight_matrix())
                MAE_post = MAE(net_target_data, clone_post_weights)
                MSE_post = MSE(net_target_data, clone_post_weights)
                MIM_post = mean_invariate_manhattan_distance(net_target_data, clone_post_weights)
                # .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
                test_status(clone)
                if is_identity_function(clone):
                    print(f"Clone {j} (of net_{i}) is fixpoint."
                          f"\nMSE({i},{j}): {MSE_post}"
                          f"\nMAE({i},{j}): {MAE_post}"
                          f"\nMIM({i},{j}): {MIM_post}\n")
                    self.parents_clones_id_functions.append(clone)
                # df.loc[df.name == clone.name, ["MAE_post", "MSE_post", "MIM_post"]] = [MAE_pre, MSE_pre, MIM_pre]
                df.loc[df.name == clone.name, ["MAE_post", "MSE_post", "MIM_post", "status_post"]] = [MAE_post,
                                                                                                      MSE_post,
                                                                                                      MIM_post,
                                                                                                      clone.is_fixpoint]
            # Finally take parent net {i} and finish it's training for comparison to clone development.
            for _ in range(self.epochs - 1):
                for _ in range(self.ST_steps):
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
            net_weights_after = net.create_target_weights(net.input_weight_matrix())
            print(f"Parent net's distance to original position."
                  f"\nMSE(OG,new): {MAE(net_target_data, net_weights_after)}"
                  f"\nMAE(OG,new): {MSE(net_target_data, net_weights_after)}"
                  f"\nMIM(OG,new): {mean_invariate_manhattan_distance(net_target_data, net_weights_after)}\n")
        self.df = df
    def weights_evolution_3d_experiment(self, nets_population, suffix):
        exp_name = f"soup_basins_{str(len(nets_population))}_nets_3d_weights_PCA_{suffix}"
        return plot_3d_soup(nets_population, exp_name, self.directory)
    def visualize_loss(self):
        for i in range(len(self.parents)):
            net_loss_history = self.parents[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    # Define number of runs & name:
    ST_runs = 3
    ST_runs_name = "test-27"
    soup_ST_steps = 1500
    soup_epochs = 2
    soup_log_step_size = 10
    # Define number of networks & their architecture
    nr_clones = 5
    soup_population_size = 3
    soup_net_hidden_size = 2
    soup_net_learning_rate = 0.04
    soup_attack_chance = 10
    soup_name_hash = random.getrandbits(32)
    print(f"Running the Soup-Spawn experiment:")
    exp_list = []
    for noise_factor in range(2, 5):
        exp = SoupSpawnExperiment(
            population_size=soup_population_size,
            log_step_size=soup_log_step_size,
            net_input_size=NET_INPUT_SIZE,
            net_hidden_size=soup_net_hidden_size,
            net_out_size=NET_OUT_SIZE,
            net_learning_rate=soup_net_learning_rate,
            epochs=soup_epochs,
            st_steps=soup_ST_steps,
            attack_chance=soup_attack_chance,
            nr_clones=nr_clones,
            noise=pow(10, -noise_factor),
            directory=Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}' / f'10e-{noise_factor}'
        )
        exp_list.append(exp)
    directory = Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}'
    pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{soup_name_hash}.p", "wb"))
    print(f"\nSaved experiment to {directory}.")
    # Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent)
    df = pd.concat([exp.df for exp in exp_list])
    df = df.dropna().reset_index()
    df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"]) for i in range(len(df))]
    df["class"] = ["approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))]
    # Countplot of all fixpoint clone after training per class. Uncomment and manually adjust xticklabels if x-ax size gets too small.
    ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=12.7 / 5.27)
    ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15)
    # ax.set_xticklabels(labels=('10e-10', '10e-9', '10e-8', '10e-7', '10e-6', '10e-5', '10e-4', '10e-3', '10e-2', '10e-1'), fontsize=15)
    plt.savefig(f"{directory}/clone_status_after_countplot_{soup_name_hash}.png")
    plt.clf()
    # Catplot (either kind="point" or "box") that shows before-after training distances to parent
    mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State",
                  value_name="Distance")
    P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))]
    # P = sns.color_palette(P, as_cmap=False)
    ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P,
                     col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
    ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
    ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
    ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
    plt.savefig(f"{directory}/before_after_distance_catplot_{soup_name_hash}.png")
    plt.clf()
--- a/journal_soup_robustness.py
+++ b/journal_soup_robustness.py
@ -1,252 +0,0 @@
 import copy
 import random
 from pathlib import Path
 from typing import Union
 import numpy as np
 import pandas as pd
 import seaborn as sns
 from matplotlib.ticker import ScalarFormatter
 from tqdm import tqdm
 from matplotlib import pyplot as plt
 from torch.nn import functional as F
 from tabulate import tabulate
 from functionalities_test import test_for_fixpoints, is_zero_fixpoint, is_divergent, is_identity_function
 from network import Net
 from visualization import plot_loss, bar_chart_fixpoints, plot_3d_soup, line_chart_fixpoints
 def prng():
    return random.random()
 class SoupRobustnessExperiment:
    def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, attack_chance,
                 train_nets, ST_steps, epochs, log_step_size, directory: Union[str, Path]):
        super().__init__()
        self.population_size = population_size
        self.net_input_size = net_i_size
        self.net_hidden_size = net_h_size
        self.net_out_size = net_o_size
        self.net_learning_rate = learning_rate
        self.attack_chance = attack_chance
        self.train_nets = train_nets
        # self.SA_steps = SA_steps
        self.ST_steps = ST_steps
        self.epochs = epochs
        self.log_step_size = log_step_size
        self.loss_history = []
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
        # <self.fixpoint_counters_history> is used for keeping track of the amount of fixpoints in %
        self.fixpoint_counters_history = []
        self.id_functions = []
        self.directory = Path(directory)
        self.directory.mkdir(parents=True, exist_ok=True)
        self.population = []
        self.populate_environment()
        self.evolve()
        self.fixpoint_percentage()
        self.weights_evolution_3d_experiment()
        self.count_fixpoints()
        self.visualize_loss()
        self.time_to_vergence, self.time_as_fixpoint = self.test_robustness()
    def populate_environment(self):
        loop_population_size = tqdm(range(self.population_size))
        for i in tqdm(range(self.population_size)):
            loop_population_size.set_description("Populating soup experiment %s" % i)
            net_name = f"soup_network_{i}"
            net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
            self.population.append(net)
    def evolve(self):
        """ Evolving consists of attacking & self-training. """
        loop_epochs = tqdm(range(self.epochs))
        for i in loop_epochs:
            loop_epochs.set_description("Evolving soup %s" % i)
            # A network attacking another network with a given percentage
            if random.randint(1, 100) <= self.attack_chance:
                random_net1, random_net2 = random.sample(range(self.population_size), 2)
                random_net1 = self.population[random_net1]
                random_net2 = self.population[random_net2]
                print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
                random_net1.attack(random_net2)
            #  Self-training each network in the population
            for j in range(self.population_size):
                net = self.population[j]
                for _ in range(self.ST_steps):
                    net.self_train(1, self.log_step_size, self.net_learning_rate)
            # Testing for fixpoints after each batch of ST steps to see relevant data
            if i % self.ST_steps == 0:
                test_for_fixpoints(self.fixpoint_counters, self.population)
                fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
                self.fixpoint_counters_history.append(fixpoints_percentage)
            # Resetting the fixpoint counter. Last iteration not to be reset -
            #  it is important for the bar_chart_fixpoints().
            if i < self.epochs:
                self.reset_fixpoint_counters()
    def test_robustness(self, print_it=True, noise_levels=10, seeds=10):
        # assert (len(self.id_functions) == 1 and seeds > 1) or (len(self.id_functions) > 1 and seeds == 1)
        is_synthetic = True if len(self.id_functions) > 1 and seeds == 1 else False
        avg_time_to_vergence = [[0 for _ in range(noise_levels)] for _ in
                                range(seeds if is_synthetic else len(self.id_functions))]
        avg_time_as_fixpoint = [[0 for _ in range(noise_levels)] for _ in
                                range(seeds if is_synthetic else len(self.id_functions))]
        row_headers = []
        data_pos = 0
        # This checks wether to use synthetic setting with multiple seeds
        #   or multi network settings with a singlee seed
        df = pd.DataFrame(columns=['seed', 'noise_level', 'application_step', 'absolute_loss'])
        for i, fixpoint in enumerate(self.id_functions):  # 1 / n
            row_headers.append(fixpoint.name)
            for seed in range(seeds):  # n / 1
                for noise_level in range(noise_levels):
                    self_application_steps = 1
                    clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size,
                                f"{fixpoint.name}_clone_noise10e-{noise_level}")
                    clone.load_state_dict(copy.deepcopy(fixpoint.state_dict()))
                    clone = clone.apply_noise(pow(10, -noise_level))
                    while not is_zero_fixpoint(clone) and not is_divergent(clone):
                        if is_identity_function(clone):
                            avg_time_as_fixpoint[i][noise_level] += 1
                        # -> before
                        clone_weight_pre_application = clone.input_weight_matrix()
                        target_data_pre_application = clone.create_target_weights(clone_weight_pre_application)
                        clone.self_application(1, self.log_step_size)
                        avg_time_to_vergence[i][noise_level] += 1
                        # -> after
                        clone_weight_post_application = clone.input_weight_matrix()
                        target_data_post_application = clone.create_target_weights(clone_weight_post_application)
                        absolute_loss = F.l1_loss(target_data_pre_application, target_data_post_application).item()
                        setting = i if is_synthetic else seed
                        df.loc[data_pos] = [setting, noise_level, self_application_steps, absolute_loss]
                        data_pos += 1
                        self_application_steps += 1
        # calculate the average:
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.dropna()
        # sns.set(rc={'figure.figsize': (10, 50)})
        sns.set_theme(style="ticks")
        bx = sns.catplot(data=df[df['absolute_loss'] < 1], y='absolute_loss', x='application_step', kind='box',
                         col='noise_level', col_wrap=3, showfliers=False)
        directory = Path('output') / 'robustness'
        filename = f"absolute_loss_perapplication_boxplot_grid.png"
        filepath = directory / filename
        plt.savefig(str(filepath))
        if print_it:
            col_headers = [str(f"10-{d}") for d in range(noise_levels)]
            print(f"\nAppplications steps until divergence / zero: ")
            print(tabulate(avg_time_to_vergence, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
            print(f"\nTime as fixpoint: ")
            print(tabulate(avg_time_as_fixpoint, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
        return avg_time_as_fixpoint, avg_time_to_vergence
    def weights_evolution_3d_experiment(self):
        exp_name = f"soup_{self.population_size}_nets_{self.ST_steps}_training_{self.epochs}_epochs"
        return plot_3d_soup(self.population, exp_name, self.directory)
    def count_fixpoints(self):
        self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.population)
        exp_details = f"Evolution steps: {self.epochs} epochs"
        bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
                            exp_details)
    def fixpoint_percentage(self):
        runs = self.epochs / self.ST_steps
        SA_steps = None
        line_chart_fixpoints(self.fixpoint_counters_history, runs, self.ST_steps, SA_steps, self.directory,
                             self.population_size)
    def visualize_loss(self):
        for i in range(len(self.population)):
            net_loss_history = self.population[i].loss_history
            self.loss_history.append(net_loss_history)
        plot_loss(self.loss_history, self.directory)
    def reset_fixpoint_counters(self):
        self.fixpoint_counters = {
            "identity_func": 0,
            "divergent": 0,
            "fix_zero": 0,
            "fix_weak": 0,
            "fix_sec": 0,
            "other_func": 0
        }
 if __name__ == "__main__":
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    soup_epochs = 100
    soup_log_step_size = 5
    soup_ST_steps = 20
    # soup_SA_steps = 10
    # Define number of networks & their architecture
    soup_population_size = 4
    soup_net_hidden_size = 2
    soup_net_learning_rate = 0.04
    # soup_attack_chance in %
    soup_attack_chance = 10
    # not used yet: soup_train_nets has 3 possible values "no", "before_SA", "after_SA".
    soup_train_nets = "no"
    soup_name_hash = random.getrandbits(32)
    soup_synthetic = True
    print(f"Running the robustness comparison experiment:")
    SoupRobustnessExperiment(
        population_size=soup_population_size,
        net_i_size=NET_INPUT_SIZE,
        net_h_size=soup_net_hidden_size,
        net_o_size=NET_OUT_SIZE,
        learning_rate=soup_net_learning_rate,
        attack_chance=soup_attack_chance,
        train_nets=soup_train_nets,
        ST_steps=soup_ST_steps,
        epochs=soup_epochs,
        log_step_size=soup_log_step_size,
        directory=Path('output') / 'robustness' / f'{soup_name_hash}'
    )
--- a/main.py
+++ b/main.py
@ -1,150 +0,0 @@
 from experiments import *
 import random
 # TODO maybe add also SA to the soup
 def run_experiments(run_ST, run_SA, run_soup, run_mixed, run_robustness):
    if run_ST:
        print(f"Running the ST experiment:")
        run_ST_experiment(ST_population_size, ST_log_step_size, NET_INPUT_SIZE, ST_net_hidden_size, NET_OUT_SIZE,
                          ST_net_learning_rate,
                          ST_epochs, ST_runs, ST_runs_name, ST_name_hash)
    if run_SA:
        print(f"\n Running the SA experiment:")
        run_SA_experiment(SA_population_size, SA_log_step_size, NET_INPUT_SIZE, SA_net_hidden_size, NET_OUT_SIZE,
                          SA_net_learning_rate, SA_runs, SA_runs_name, SA_name_hash,
                          SA_steps, SA_train_nets, SA_ST_steps)
    if run_soup:
        print(f"\n Running the soup experiment:")
        run_soup_experiment(soup_population_size, soup_attack_chance, NET_INPUT_SIZE, soup_net_hidden_size,
                            NET_OUT_SIZE, soup_net_learning_rate, soup_epochs, soup_log_step_size, soup_runs,
                            soup_runs_name, soup_name_hash, soup_ST_steps, soup_train_nets)
    if run_mixed:
        print(f"\n Running the mixed experiment:")
        run_mixed_experiment(mixed_population_size, NET_INPUT_SIZE, mixed_net_hidden_size, NET_OUT_SIZE,
                             mixed_net_learning_rate, mixed_train_nets, mixed_epochs, mixed_SA_steps,
                             mixed_ST_steps_between_SA, mixed_log_step_size, mixed_name_hash, mixed_total_runs,
                             mixed_runs_name)
    if run_robustness:
        print(f"Running the robustness experiment:")
        run_robustness_experiment(rob_population_size, rob_log_step_size, NET_INPUT_SIZE, rob_net_hidden_size,
                                  NET_OUT_SIZE, rob_net_learning_rate, rob_ST_steps, rob_runs, rob_runs_name,
                                  rob_name_hash)
    if not run_ST and not run_SA and not run_soup and not run_mixed and not run_robustness:
        print(f"No experiments to be run.")
 if __name__ == '__main__':
    # Constants:
    NET_INPUT_SIZE = 4
    NET_OUT_SIZE = 1
    run_ST_experiment_bool = False
    run_SA_experiment_bool = False
    run_soup_experiment_bool = False
    run_mixed_experiment_bool = False
    run_robustness_bool = True
    """ ------------------------------------- Self-training (ST) experiment ------------------------------------- """
    # Define number of runs & name:
    ST_runs = 1
    ST_runs_name = "test-27"
    ST_epochs = 1000
    ST_log_step_size = 10
    # Define number of networks & their architecture
    ST_population_size = 1
    ST_net_hidden_size = 2
    ST_net_learning_rate = 0.04
    ST_name_hash = random.getrandbits(32)
    """ ----------------------------------- Self-application (SA) experiment ----------------------------------- """
    # Define number of runs, name, etc.:
    SA_runs_name = "test-17"
    SA_runs = 2
    SA_steps = 100
    SA_app_batch_size = 5
    SA_train_batch_size = 5
    SA_log_step_size = 5
    # Define number of networks & their architecture
    SA_population_size = 10
    SA_net_hidden_size = 2
    SA_net_learning_rate = 0.04
    # SA_train_nets has 3 possible values "no", "before_SA", "after_SA".
    SA_train_nets = "no"
    SA_ST_steps = 300
    SA_name_hash = random.getrandbits(32)
    """ -------------------------------------------- Soup experiment -------------------------------------------- """
    # Define number of runs, name, etc.:
    soup_runs = 1
    soup_runs_name = "test-16"
    soup_epochs = 100
    soup_log_step_size = 5
    soup_ST_steps = 20
    # soup_SA_steps = 10
    # Define number of networks & their architecture
    soup_population_size = 5
    soup_net_hidden_size = 2
    soup_net_learning_rate = 0.04
    # soup_attack_chance in %
    soup_attack_chance = 10
    # not used yet: soup_train_nets has 3 possible values "no", "before_SA", "after_SA".
    soup_train_nets = "no"
    soup_name_hash = random.getrandbits(32)
    """ ------------------------------------------- Mixed experiment -------------------------------------------- """
    # Define number of runs, name, etc.:
    mixed_runs_name = "test-17"
    mixed_total_runs = 2
    # Define number of networks & their architecture
    mixed_population_size = 5
    mixed_net_hidden_size = 2
    mixed_epochs = 10
    # Set the <batch_size> to the same value as <ST_steps_between_SA> to see the weights plotted
    # ONLY after each epoch, and not after a certain amount of steps.
    mixed_log_step_size = 5
    mixed_ST_steps_between_SA = 50
    mixed_SA_steps = 4
    mixed_net_learning_rate = 0.04
    # mixed_train_nets has 2 possible values "before_SA", "after_SA".
    mixed_train_nets = "after_SA"
    mixed_name_hash = random.getrandbits(32)
    """ ----------------------------------------- Robustness experiment ----------------------------------------- """
    # Define number of runs & name:
    rob_runs = 1
    rob_runs_name = "test-07"
    rob_ST_steps = 1500
    rob_log_step_size = 10
    # Define number of networks & their architecture
    rob_population_size = 1
    rob_net_hidden_size = 2
    rob_net_learning_rate = 0.04
    rob_name_hash = random.getrandbits(32)
    """ ---------------------------------------- Running the experiment ----------------------------------------- """
    run_experiments(run_ST_experiment_bool, run_SA_experiment_bool, run_soup_experiment_bool, run_mixed_experiment_bool,
                    run_robustness_bool)
--- a/meta_task_exp.py
+++ b/meta_task_exp.py
@ -0,0 +1,218 @@
 from collections import defaultdict
 from pathlib import Path
 import platform
 import torchmetrics
 import numpy as np
 import torch
 from torch import nn
 from torch.nn import Flatten
 from torch.utils.data import DataLoader
 from torchvision.datasets import MNIST
 from torchvision.transforms import ToTensor, Compose, Resize
 from tqdm import tqdm
 # noinspection DuplicatedCode
 from experiments.meta_task_utility import ToFloat, new_storage_df, train_task, checkpoint_and_validate, flat_for_store, \
    plot_training_result, plot_training_particle_types, plot_network_connectivity_by_fixtype, \
    run_particle_dropout_and_plot
 if platform.node() == 'CarbonX':
    debug = True
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
    print("@ Warning, Debugging Config@!!!!!! @")
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
 else:
    debug = False
 from network import MetaNet
 from functionalities_test import test_for_fixpoints
 WORKER = 10 if not debug else 2
 debug = False
 BATCHSIZE = 2000 if not debug else 50
 EPOCH = 50
 VALIDATION_FRQ = 3 if not debug else 1
 SELF_TRAIN_FRQ = 1 if not debug else 1
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 DATA_PATH = Path('data')
 DATA_PATH.mkdir(exist_ok=True, parents=True)
 if debug:
    torch.autograd.set_detect_anomaly(True)
 if __name__ == '__main__':
    training = True
    n_st = 300
    activation = None  # nn.ReLU()
    for weight_hidden_size in [4, 5]:
        weight_hidden_size = weight_hidden_size
        residual_skip = True
        n_seeds = 3
        depth = 3
        width = 3
        out = 10
        data_path = Path('data')
        data_path.mkdir(exist_ok=True, parents=True)
        # noinspection PyUnresolvedReferences
        ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
        res_str = f'{"" if residual_skip else "_no_res"}'
        st_str = f'_nst_{n_st}'
        config_str = f'{res_str}{ac_str}{st_str}'
        exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}'
        if not training:
            # noinspection PyRedeclaration
            exp_path = Path('output') / 'add_st_50_5'
        for seed in range(n_seeds):
            seed_path = exp_path / str(seed)
            df_store_path = seed_path / 'train_store.csv'
            weight_store_path = seed_path / 'weight_store.csv'
            srnn_parameters = dict()
            if training:
                # Check if files do exist on project location, warn and break.
                for path in [df_store_path, weight_store_path]:
                    assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
                utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
                try:
                    train_dataset = MNIST(str(DATA_PATH), transform=utility_transforms)
                except RuntimeError:
                    train_dataset = MNIST(str(DATA_PATH), transform=utility_transforms, download=True)
                train_loader = DataLoader(train_dataset, batch_size=BATCHSIZE, shuffle=True,
                                          drop_last=True, num_workers=WORKER)
                interface = np.prod(train_dataset[0][0].shape)
                metanet = MetaNet(interface, depth=depth, width=width, out=out,
                                  residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
                                  activation=activation
                                  ).to(DEVICE)
                loss_fn = nn.CrossEntropyLoss()
                optimizer = torch.optim.SGD(metanet.parameters(), lr=0.004, momentum=0.9)
                train_store = new_storage_df('train', None)
                weight_store = new_storage_df('weights', metanet.particle_parameter_count)
                for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
                    is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
                    is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
                    metanet = metanet.train()
                    # Init metrics, even we do not need:
                    metric = torchmetrics.Accuracy()
                    n_st_per_batch = n_st // len(train_loader)
                    for batch, (batch_x, batch_y) in tqdm(enumerate(train_loader),
                                                          total=len(train_loader), desc='MetaNet Train - Batch'
                                                          ):
                        # Self Train
                        self_train_loss = metanet.combined_self_train(n_st_per_batch,
                                                                      reduction='mean', per_particle=False)
                        # noinspection PyUnboundLocalVariable
                        st_step_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
                        st_step_log.update(dict(Epoch=epoch, Batch=batch))
                        train_store.loc[train_store.shape[0]] = st_step_log
                        # Task Train
                        tsk_step_log, y_pred = train_task(metanet, optimizer, loss_fn, batch_x, batch_y)
                        tsk_step_log.update(dict(Epoch=epoch, Batch=batch))
                        train_store.loc[train_store.shape[0]] = tsk_step_log
                        metric(y_pred.cpu(), batch_y.cpu())
                    if is_validation_epoch:
                        metanet = metanet.eval()
                        try:
                            validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                                  Metric='Train Accuracy', Score=metric.compute().item())
                            train_store.loc[train_store.shape[0]] = validation_log
                        except RuntimeError:
                            pass
                        accuracy = checkpoint_and_validate(metanet, seed_path, epoch).item()
                        validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                              Metric='Test Accuracy', Score=accuracy)
                        train_store.loc[train_store.shape[0]] = validation_log
                    if is_validation_epoch:
                        counter_dict = defaultdict(lambda: 0)
                        # This returns ID-functions
                        _ = test_for_fixpoints(counter_dict, list(metanet.particles))
                        counter_dict = dict(counter_dict)
                        for key, value in counter_dict.items():
                            val_step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
                            train_store.loc[train_store.shape[0]] = val_step_log
                        tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
                        # FLUSH to disk
                        if is_validation_epoch:
                            for particle in metanet.particles:
                                weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
                                weight_store.loc[weight_store.shape[0]] = weight_log
                            train_store.to_csv(df_store_path, mode='a',
                                               header=not df_store_path.exists(), index=False)
                            weight_store.to_csv(weight_store_path, mode='a',
                                                header=not weight_store_path.exists(), index=False)
                            train_store = new_storage_df('train', None)
                            weight_store = new_storage_df('weights', metanet.particle_parameter_count)
                ###########################################################
                # EPOCHS endet
                metanet = metanet.eval()
                counter_dict = defaultdict(lambda: 0)
                # This returns ID-functions
                _ = test_for_fixpoints(counter_dict, list(metanet.particles))
                for key, value in dict(counter_dict).items():
                    step_log = dict(Epoch=int(EPOCH)+1, Batch=BATCHSIZE, Metric=key, Score=value)
                    train_store.loc[train_store.shape[0]] = step_log
                accuracy = checkpoint_and_validate(metanet, seed_path, EPOCH, final_model=True)
                validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
                                      Metric='Test Accuracy', Score=accuracy.item())
                train_store.loc[train_store.shape[0]] = validation_log
                for particle in metanet.particles:
                    weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
                    weight_store.loc[weight_store.shape[0]] = weight_log
                # FLUSH to disk
                train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
                weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
            plot_training_result(df_store_path)
            plot_training_particle_types(df_store_path)
            try:
                model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
            except StopIteration:
                print('Model pattern did not trigger.')
                print(f'Search path was: {seed_path}:')
                print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
                exit(1)
            try:
                # noinspection PyUnboundLocalVariable
                run_particle_dropout_and_plot(model_path)
            except (ValueError, NameError) as e:
                print(e)
            try:
                plot_network_connectivity_by_fixtype(model_path)
            except (ValueError, NameError)as e:
                print(e)
    if n_seeds >= 2:
        pass
--- a/meta_task_exp_small.py
+++ b/meta_task_exp_small.py
@ -0,0 +1,188 @@
 from collections import defaultdict
 from pathlib import Path
 import numpy as np
 import torch
 import torchmetrics
 from torch import nn
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from experiments.meta_task_small_utility import AddTaskDataset, checkpoint_and_validate, train_task
 from network import MetaNet
 from functionalities_test import test_for_fixpoints
 from experiments.meta_task_utility import new_storage_df, flat_for_store, plot_training_result, \
    plot_training_particle_types, run_particle_dropout_and_plot, plot_network_connectivity_by_fixtype
 WORKER = 0
 BATCHSIZE = 50
 EPOCH = 30
 VALIDATION_FRQ = 3
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 if __name__ == '__main__':
    training = True
    n_st = 500
    activation = None  # nn.ReLU()
    for weight_hidden_size in [3,4,5]:
        tsk_threshold = 0.85
        weight_hidden_size = weight_hidden_size
        residual_skip = True
        n_seeds = 3
        depth = 3
        width = 3
        out = 1
        data_path = Path('data')
        data_path.mkdir(exist_ok=True, parents=True)
        # noinspection PyUnresolvedReferences
        ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
        res_str = f'{"" if residual_skip else "_no_res"}'
        # dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
        config_str = f'{res_str}'
        exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
        if not training:
            # noinspection PyRedeclaration
            exp_path = Path('output') / 'mn_st_n_2_100_4'
        for seed in range(n_seeds):
            seed_path = exp_path / str(seed)
            model_path = seed_path / '0000_trained_model.zip'
            df_store_path = seed_path / 'train_store.csv'
            weight_store_path = seed_path / 'weight_store.csv'
            srnn_parameters = dict()
            if training:
                # Check if files do exist on project location, warn and break.
                for path in [model_path, df_store_path, weight_store_path]:
                    assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
                train_data = AddTaskDataset()
                valid_data = AddTaskDataset()
                train_load = DataLoader(train_data, batch_size=BATCHSIZE, shuffle=True,
                                        drop_last=True, num_workers=WORKER)
                vali_load = DataLoader(valid_data, batch_size=BATCHSIZE, shuffle=False,
                                       drop_last=True, num_workers=WORKER)
                interface = np.prod(train_data[0][0].shape)
                metanet = MetaNet(interface, depth=depth, width=width, out=out,
                                  residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
                                  activation=activation
                                  ).to(DEVICE)
                loss_fn = nn.MSELoss()
                optimizer = torch.optim.SGD(metanet.parameters(), lr=0.004, momentum=0.9)
                train_store = new_storage_df('train', None)
                weight_store = new_storage_df('weights', metanet.particle_parameter_count)
                for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
                    is_validation_epoch = epoch % VALIDATION_FRQ == 0
                    metanet = metanet.train()
                    # Init metrics, even we do not need:
                    metric = torchmetrics.MeanAbsoluteError()
                    n_st_per_batch = n_st // len(train_load)
                    for batch, (batch_x, batch_y) in tqdm(enumerate(train_load),
                                                          total=len(train_load), desc='MetaNet Train - Batch'
                                                          ):
                        # Self Train
                        self_train_loss = metanet.combined_self_train(n_st_per_batch,
                                                                      reduction='mean', per_particle=False)
                        # noinspection PyUnboundLocalVariable
                        st_step_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
                        st_step_log.update(dict(Epoch=epoch, Batch=batch))
                        train_store.loc[train_store.shape[0]] = st_step_log
                        # Task Train
                        tsk_step_log, y_pred = train_task(metanet, optimizer, loss_fn, batch_x, batch_y)
                        tsk_step_log.update(dict(Epoch=epoch, Batch=batch))
                        train_store.loc[train_store.shape[0]] = tsk_step_log
                        metric(y_pred.cpu(), batch_y.cpu())
                    if is_validation_epoch:
                        metanet = metanet.eval()
                        if metric.total.item():
                            validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                                  Metric='Train Accuracy', Score=metric.compute().item())
                            train_store.loc[train_store.shape[0]] = validation_log
                        accuracy = checkpoint_and_validate(metanet, seed_path, epoch, vali_load).item()
                        validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
                                              Metric='Test Accuracy', Score=accuracy)
                        train_store.loc[train_store.shape[0]] = validation_log
                    if is_validation_epoch:
                        counter_dict = defaultdict(lambda: 0)
                        # This returns ID-functions
                        _ = test_for_fixpoints(counter_dict, list(metanet.particles))
                        counter_dict = dict(counter_dict)
                        for key, value in counter_dict.items():
                            val_step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
                            train_store.loc[train_store.shape[0]] = val_step_log
                        tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
                        # FLUSH to disk
                        if is_validation_epoch:
                            for particle in metanet.particles:
                                weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
                                weight_store.loc[weight_store.shape[0]] = weight_log
                            train_store.to_csv(df_store_path, mode='a',
                                               header=not df_store_path.exists(), index=False)
                            weight_store.to_csv(weight_store_path, mode='a',
                                                header=not weight_store_path.exists(), index=False)
                            train_store = new_storage_df('train', None)
                            weight_store = new_storage_df('weights', metanet.particle_parameter_count)
                ###########################################################
                # EPOCHS endet
                metanet = metanet.eval()
                counter_dict = defaultdict(lambda: 0)
                # This returns ID-functions
                _ = test_for_fixpoints(counter_dict, list(metanet.particles))
                for key, value in dict(counter_dict).items():
                    step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
                    train_store.loc[train_store.shape[0]] = step_log
                accuracy = checkpoint_and_validate(metanet, seed_path, EPOCH, vali_load, final_model=True)
                validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
                                      Metric='Test Accuracy', Score=accuracy.item())
                for particle in metanet.particles:
                    weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
                    weight_store.loc[weight_store.shape[0]] = weight_log
                train_store.loc[train_store.shape[0]] = validation_log
                train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
                weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
            plot_training_result(df_store_path)
            plot_training_particle_types(df_store_path)
            try:
                model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
            except StopIteration:
                print('Model pattern did not trigger.')
                print(f'Search path was: {seed_path}:')
                print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
                exit(1)
            try:
                run_particle_dropout_and_plot(model_path)
            except ValueError as e:
                print(e)
            try:
                plot_network_connectivity_by_fixtype(model_path)
            except ValueError as e:
                print(e)
    if n_seeds >= 2:
        pass
--- a/experiments/meta_task_sanity_exp.py
+++ b/experiments/meta_task_sanity_exp.py
@ -53,8 +53,9 @@ class MultiplyByXTaskDataset(Dataset):
 if __name__ == '__main__':
-    net = Net(5, 4, 1)
+    net = Net(5, 4, 1, lr=0.004)
    multiplication_target = 0.03
    st_steps = 0
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.004, momentum=0.9)
@ -68,31 +69,16 @@ if __name__ == '__main__':
        mean_self_tain_loss = []
        for batch, (batch_x, batch_y) in tenumerate(dataloader):
-            self_train_loss, _ = net.self_train(2, save_history=False, learning_rate=0.004)
+            self_train_loss, _ = net.self_train(1000 // 20, save_history=False)
            for _ in range(2):
                optimizer.zero_grad()
                input_data = net.input_weight_matrix()
                target_data = net.create_target_weights(input_data)
                output = net(input_data)
                self_train_loss = loss_fn(output, target_data)
                self_train_loss.backward()
                optimizer.step()
            is_fixpoint = functionalities_test.is_identity_function(net)
            if not is_fixpoint:
                st_steps += 2
            optimizer.zero_grad()
            batch_x_emb = torch.zeros(batch_x.shape[0], 5)
            batch_x_emb[:, -1] = batch_x.squeeze()
            y = net(batch_x_emb)
            loss = loss_fn(y, batch_y)
            loss.backward()
            optimizer.step()
            if is_fixpoint:
-                tqdm.write(f'is fixpoint after st : {is_fixpoint}')
+                tqdm.write(f'is fixpoint after st : {is_fixpoint}, first reached after st_steps: {st_steps}')
                tqdm.write(f'is fixpoint after tsk: {functionalities_test.is_identity_function(net)}')
-            mean_batch_loss.append(loss.detach())
+            #mean_batch_loss.append(loss.detach())
            mean_self_tain_loss.append(self_train_loss.detach())
        train_frame.loc[train_frame.shape[0]] = dict(Epoch=epoch, Batch=batch,
--- a/network.py
+++ b/network.py
@ -75,7 +75,7 @@ class Net(nn.Module):
                i += size
        return self
-    def __init__(self, i_size: int, h_size: int, o_size: int, name=None, start_time=1) -> None:
+    def __init__(self, i_size: int, h_size: int, o_size: int, name=None, start_time=1, lr=0.004) -> None:
        super().__init__()
        self.start_time = start_time
@ -104,6 +104,7 @@ class Net(nn.Module):
        self._weight_pos_enc_and_mask = None
        self.apply(xavier_init)
        self.optimizer = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9)
    @property
    def _weight_pos_enc(self):
@ -117,14 +118,17 @@ class Net(nn.Module):
                        torch.cat(
                            (
                                # Those are the weights
-                                torch.full((x.numel(), 1), 0, device=d),
+                                torch.full((x.numel(), 1), 0, device=d, requires_grad=False),
                                # Layer enumeration
-                                torch.full((x.numel(), 1), layer_id, device=d),
+                                torch.full((x.numel(), 1), layer_id, device=d, requires_grad=False),
                                # Cell Enumeration
-                                torch.arange(layer.out_features, device=d).repeat_interleave(layer.in_features).view(-1, 1),
+                                torch.arange(layer.out_features, device=d, requires_grad=False
                                             ).repeat_interleave(layer.in_features).view(-1, 1),
                                # Weight Enumeration within the Cells
-                                torch.arange(layer.in_features, device=d).view(-1, 1).repeat(layer.out_features, 1),
+                                torch.arange(layer.in_features, device=d, requires_grad=False
-                                *(torch.full((x.numel(), 1), 0, device=d) for _ in range(self.input_size-4))
+                                             ).view(-1, 1).repeat(layer.out_features, 1),
                                *(torch.full((x.numel(), 1), 0, device=d, requires_grad=False
                                             ) for _ in range(self.input_size-4))
                            ), dim=1)
                    )
                # Finalize
@ -138,7 +142,7 @@ class Net(nn.Module):
                # computations
                # create a mask where pos is 0 if it is to be replaced
-                mask = torch.ones_like(weight_matrix)
+                mask = torch.ones_like(weight_matrix, requires_grad=False)
                mask[:, 0] = 0
                self._weight_pos_enc_and_mask = weight_matrix.detach(), mask.detach()
@ -175,22 +179,20 @@ class Net(nn.Module):
    def self_train(self,
                   training_steps: int,
                   log_step_size: int = 0,
-                   learning_rate: float = 0.0004,
+                   save_history: bool = False,
-                   save_history: bool = True
+                   reduction: str = 'mean'
                   ) -> (Tensor, list):
        """ Training a network to predict its own weights in order to self-replicate. """
        optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
        for training_step in range(training_steps):
            self.number_trained += 1
-            optimizer.zero_grad()
+            self.optimizer.zero_grad()
            input_data = self.input_weight_matrix()
            target_data = self.create_target_weights(input_data)
            output = self(input_data)
-            loss = F.mse_loss(output, target_data)
+            loss = F.mse_loss(output, target_data, reduction=reduction)
            loss.backward()
-            optimizer.step()
+            self.optimizer.step()
            if save_history:
                # Saving the history of the weights after a certain amount of steps (aka log_step_size) for research.
@ -207,7 +209,6 @@ class Net(nn.Module):
                                self.s_train_weights_history.append(weights.T.detach().numpy())
                                self.loss_history.append(loss.item())
        # Saving weights only at the end of a soup/mixed exp. epoch.
        if save_history:
            if "soup" in self.name or "mixed" in self.name:
@ -216,7 +217,7 @@ class Net(nn.Module):
                self.loss_history.append(loss.item())
        self.trained = True
-        return loss, self.loss_history
+        return loss.detach(), self.loss_history
    def self_application(self, SA_steps: int, log_step_size: Union[int, None] = None):
        """ Inputting the weights of a network to itself for a number of steps, without backpropagation. """
@ -463,21 +464,33 @@ class MetaNet(nn.Module):
    def particles(self):
        return (cell for metalayer in self.all_layers for cell in metalayer.particles)
-    def combined_self_train(self, optimizer, n_st_steps, reduction='mean'):
+    def combined_self_train(self, n_st_steps, reduction='mean', per_particle=True):
        losses = []
-        for particle in self.particles:
+
        if per_particle:
            for particle in self.particles:
                loss, _ = particle.self_train(n_st_steps, reduction=reduction)
                losses.append(loss.detach())
        else:
            optim = torch.optim.SGD(self.parameters(), lr=0.004, momentum=0.9)
            for _ in range(n_st_steps):
-                optimizer.zero_grad()
+                optim.zero_grad()
-                # Intergrate optimizer and backward function
+                train_losses = []
-                input_data = particle.input_weight_matrix()
+                for particle in self.particles:
-                target_data = particle.create_target_weights(input_data)
+                    # Intergrate optimizer and backward function
-                output = particle(input_data)
+                    input_data = particle.input_weight_matrix()
-                losses.append(F.mse_loss(output, target_data, reduction=reduction))
+                    target_data = particle.create_target_weights(input_data)
-                losses.backward()
+                    output = particle(input_data)
-                optimizer.step()
+                    loss = F.mse_loss(output, target_data, reduction=reduction)
                    train_losses.append(loss)
                train_losses = torch.hstack(train_losses).sum(dim=-1, keepdim=True)
                train_losses.backward()
                optim.step()
                losses.append(train_losses.detach())
        losses = torch.hstack(losses).sum(dim=-1, keepdim=True)
-        return losses.detach()
+        return losses
    @property
    def hyperparams(self):