refactoring and running experiments
This commit is contained in:
parent
b6c8859081
commit
69c904e156
@ -1,6 +0,0 @@
|
||||
from .mixed_setting_exp import run_mixed_experiment
|
||||
from .robustness_exp import run_robustness_experiment
|
||||
from .self_application_exp import run_SA_experiment
|
||||
from .self_train_exp import run_ST_experiment
|
||||
from .soup_exp import run_soup_experiment
|
||||
import functionalities_test
|
@ -1,535 +0,0 @@
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import platform
|
||||
|
||||
import pandas as pd
|
||||
import torchmetrics
|
||||
import numpy as np
|
||||
import torch
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
from torch import nn
|
||||
from torch.nn import Flatten
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torchvision.datasets import MNIST
|
||||
from torchvision.transforms import ToTensor, Compose, Resize
|
||||
from tqdm import tqdm
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
if platform.node() == 'CarbonX':
|
||||
debug = True
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
print("@ Warning, Debugging Config@!!!!!! @")
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
else:
|
||||
debug = False
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from network import MetaNet, FixTypes as ft
|
||||
from sparse_net import SparseNetwork
|
||||
from functionalities_test import test_for_fixpoints
|
||||
|
||||
WORKER = 10 if not debug else 2
|
||||
debug = False
|
||||
BATCHSIZE = 500 if not debug else 50
|
||||
EPOCH = 50
|
||||
VALIDATION_FRQ = 3 if not debug else 1
|
||||
SELF_TRAIN_FRQ = 1 if not debug else 1
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
DATA_PATH = Path('data')
|
||||
DATA_PATH.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
if debug:
|
||||
torch.autograd.set_detect_anomaly(True)
|
||||
|
||||
|
||||
class ToFloat:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, x):
|
||||
return x.to(torch.float32)
|
||||
|
||||
|
||||
class AddTaskDataset(Dataset):
|
||||
def __init__(self, length=int(5e5)):
|
||||
super().__init__()
|
||||
self.length = length
|
||||
self.prng = np.random.default_rng()
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, _):
|
||||
ab = self.prng.normal(size=(2,)).astype(np.float32)
|
||||
return ab, ab.sum(axis=-1, keepdims=True)
|
||||
|
||||
|
||||
def set_checkpoint(model, out_path, epoch_n, final_model=False):
|
||||
epoch_n = str(epoch_n)
|
||||
if not final_model:
|
||||
ckpt_path = Path(out_path) / 'ckpt' / f'{epoch_n.zfill(4)}_model_ckpt.tp'
|
||||
else:
|
||||
ckpt_path = Path(out_path) / f'trained_model_ckpt_e{epoch_n}.tp'
|
||||
ckpt_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
torch.save(model, ckpt_path, pickle_protocol=pickle.HIGHEST_PROTOCOL)
|
||||
py_store_path = Path(out_path) / 'exp_py.txt'
|
||||
if not py_store_path.exists():
|
||||
shutil.copy(__file__, py_store_path)
|
||||
return ckpt_path
|
||||
|
||||
|
||||
def validate(checkpoint_path, ratio=0.1):
|
||||
checkpoint_path = Path(checkpoint_path)
|
||||
import torchmetrics
|
||||
|
||||
# initialize metric
|
||||
validmetric = torchmetrics.Accuracy()
|
||||
ut = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
||||
|
||||
try:
|
||||
datas = MNIST(str(DATA_PATH), transform=ut, train=False)
|
||||
except RuntimeError:
|
||||
datas = MNIST(str(DATA_PATH), transform=ut, train=False, download=True)
|
||||
valid_d = DataLoader(datas, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
|
||||
|
||||
model = torch.load(checkpoint_path, map_location=DEVICE).eval()
|
||||
n_samples = int(len(valid_d) * ratio)
|
||||
|
||||
with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
|
||||
for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
|
||||
valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
|
||||
y_valid = model(valid_batch_x)
|
||||
|
||||
# metric on current batch
|
||||
acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
|
||||
pbar.set_postfix_str(f'Acc: {acc}')
|
||||
pbar.update()
|
||||
if idx == n_samples:
|
||||
break
|
||||
|
||||
# metric on all batches using custom accumulation
|
||||
acc = validmetric.compute()
|
||||
tqdm.write(f"Avg. accuracy on all data: {acc}")
|
||||
return acc
|
||||
|
||||
|
||||
def new_storage_df(identifier, weight_count):
|
||||
if identifier == 'train':
|
||||
return pd.DataFrame(columns=['Epoch', 'Batch', 'Metric', 'Score'])
|
||||
elif identifier == 'weights':
|
||||
return pd.DataFrame(columns=['Epoch', 'Weight', *(f'weight_{x}' for x in range(weight_count))])
|
||||
|
||||
|
||||
def checkpoint_and_validate(model, out_path, epoch_n, final_model=False):
|
||||
out_path = Path(out_path)
|
||||
ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
|
||||
result = validate(ckpt_path)
|
||||
return result
|
||||
|
||||
|
||||
def plot_training_particle_types(path_to_dataframe):
|
||||
plt.clf()
|
||||
# load from Drive
|
||||
df = pd.read_csv(path_to_dataframe, index_col=False)
|
||||
# Set up figure
|
||||
fig, ax = plt.subplots() # initializes figure and plots
|
||||
data = df.loc[df['Metric'].isin(ft.all_types())]
|
||||
fix_types = data['Metric'].unique()
|
||||
data = data.pivot(index='Epoch', columns='Metric', values='Score').reset_index().fillna(0)
|
||||
_ = plt.stackplot(data['Epoch'], *[data[fixtype] for fixtype in fix_types], labels=fix_types.tolist())
|
||||
|
||||
ax.set(ylabel='Particle Count', xlabel='Epoch')
|
||||
ax.set_title('Particle Type Count')
|
||||
|
||||
fig.legend(loc="center right", title='Particle Type', bbox_to_anchor=(0.85, 0.5))
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_dataframe.parent / 'training_particle_type_lp.png'), dpi=300)
|
||||
|
||||
|
||||
def plot_training_result(path_to_dataframe):
|
||||
plt.clf()
|
||||
# load from Drive
|
||||
df = pd.read_csv(path_to_dataframe, index_col=False)
|
||||
|
||||
# Set up figure
|
||||
fig, ax1 = plt.subplots() # initializes figure and plots
|
||||
ax2 = ax1.twinx() # applies twinx to ax2, which is the second y-axis.
|
||||
|
||||
# plots the first set of data
|
||||
data = df[(df['Metric'] == 'Task Loss') | (df['Metric'] == 'Self Train Loss')].groupby(['Epoch', 'Metric']).mean()
|
||||
palette = sns.color_palette()[1:data.reset_index()['Metric'].unique().shape[0]+1]
|
||||
sns.lineplot(data=data.groupby(['Epoch', 'Metric']).mean(), x='Epoch', y='Score', hue='Metric',
|
||||
palette=palette, ax=ax1)
|
||||
|
||||
# plots the second set of data
|
||||
data = df[(df['Metric'] == 'Test Accuracy') | (df['Metric'] == 'Train Accuracy')]
|
||||
palette = sns.color_palette()[len(palette)+1:data.reset_index()['Metric'].unique().shape[0] + len(palette)+1]
|
||||
sns.lineplot(data=data, x='Epoch', y='Score', marker='o', hue='Metric', palette=palette)
|
||||
|
||||
ax1.set(yscale='log', ylabel='Losses')
|
||||
ax1.set_title('Training Lineplot')
|
||||
ax2.set(ylabel='Accuracy')
|
||||
|
||||
fig.legend(loc="center right", title='Metric', bbox_to_anchor=(0.85, 0.5))
|
||||
ax1.get_legend().remove()
|
||||
ax2.get_legend().remove()
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300)
|
||||
|
||||
|
||||
def plot_network_connectivity_by_fixtype(path_to_trained_model):
|
||||
m = torch.load(path_to_trained_model, map_location=torch.device('cpu')).eval()
|
||||
# noinspection PyProtectedMember
|
||||
particles = list(m.particles)
|
||||
df = pd.DataFrame(columns=['type', 'layer', 'neuron', 'name'])
|
||||
|
||||
for prtcl in particles:
|
||||
l, c, w = [float(x) for x in re.sub("[^0-9|_]", "", prtcl.name).split('_')]
|
||||
df.loc[df.shape[0]] = (prtcl.is_fixpoint, l-1, w, prtcl.name)
|
||||
df.loc[df.shape[0]] = (prtcl.is_fixpoint, l, c, prtcl.name)
|
||||
for layer in list(df['layer'].unique()):
|
||||
# Rescale
|
||||
divisor = df.loc[(df['layer'] == layer), 'neuron'].max()
|
||||
df.loc[(df['layer'] == layer), 'neuron'] /= divisor
|
||||
|
||||
tqdm.write(f'Connectivity Data gathered')
|
||||
for n, fixtype in enumerate(ft.all_types()):
|
||||
if df[df['type'] == fixtype].shape[0] > 0:
|
||||
plt.clf()
|
||||
ax = sns.lineplot(y='neuron', x='layer', hue='name', data=df[df['type'] == fixtype],
|
||||
legend=False, estimator=None, lw=1)
|
||||
_ = sns.lineplot(y=[0, 1], x=[-1, df['layer'].max()], legend=False, estimator=None, lw=0)
|
||||
ax.set_title(fixtype)
|
||||
lines = ax.get_lines()
|
||||
for line in lines:
|
||||
line.set_color(sns.color_palette()[n])
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_trained_model.parent / f'net_connectivity_{fixtype}.png'), dpi=300)
|
||||
tqdm.write(f'Connectivity plottet: {fixtype} - n = {df[df["type"] == fixtype].shape[0] // 2}')
|
||||
else:
|
||||
tqdm.write(f'No Connectivity {fixtype}')
|
||||
|
||||
|
||||
def run_particle_dropout_test(model_path):
|
||||
diff_store_path = model_path.parent / 'diff_store.csv'
|
||||
latest_model = torch.load(model_path, map_location=DEVICE).eval()
|
||||
prtcl_dict = defaultdict(lambda: 0)
|
||||
_ = test_for_fixpoints(prtcl_dict, list(latest_model.particles))
|
||||
tqdm.write(str(dict(prtcl_dict)))
|
||||
diff_df = pd.DataFrame(columns=['Particle Type', 'Accuracy', 'Diff'])
|
||||
|
||||
acc_pre = validate(model_path, ratio=1).item()
|
||||
diff_df.loc[diff_df.shape[0]] = ('All Organism', acc_pre, 0)
|
||||
|
||||
for fixpoint_type in ft.all_types():
|
||||
new_model = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero(fixpoint_type)
|
||||
if [x for x in new_model.particles if x.is_fixpoint == fixpoint_type]:
|
||||
new_ckpt = set_checkpoint(new_model, model_path.parent, fixpoint_type, final_model=True)
|
||||
acc_post = validate(new_ckpt, ratio=1).item()
|
||||
acc_diff = abs(acc_post - acc_pre)
|
||||
tqdm.write(f'Zero_ident diff = {acc_diff}')
|
||||
diff_df.loc[diff_df.shape[0]] = (fixpoint_type, acc_post, acc_diff)
|
||||
|
||||
diff_df.to_csv(diff_store_path, mode='a', header=not diff_store_path.exists(), index=False)
|
||||
return diff_store_path
|
||||
|
||||
|
||||
def plot_dropout_stacked_barplot(mdl_path):
|
||||
diff_store_path = mdl_path.parent / 'diff_store.csv'
|
||||
diff_df = pd.read_csv(diff_store_path)
|
||||
particle_dict = defaultdict(lambda: 0)
|
||||
latest_model = torch.load(mdl_path, map_location=DEVICE).eval()
|
||||
_ = test_for_fixpoints(particle_dict, list(latest_model.particles))
|
||||
tqdm.write(str(dict(particle_dict)))
|
||||
plt.clf()
|
||||
fig, ax = plt.subplots(ncols=2)
|
||||
colors = sns.color_palette()[1:diff_df.shape[0]+1]
|
||||
_ = sns.barplot(data=diff_df, y='Accuracy', x='Particle Type', ax=ax[0], palette=colors)
|
||||
|
||||
ax[0].set_title('Accuracy after particle dropout')
|
||||
ax[0].set_xlabel('Particle Type')
|
||||
|
||||
ax[1].pie(particle_dict.values(), labels=particle_dict.keys(), colors=list(reversed(colors)), )
|
||||
ax[1].set_title('Particle Count')
|
||||
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(diff_store_path.parent / 'dropout_stacked_barplot.png'), dpi=300)
|
||||
|
||||
|
||||
def run_particle_dropout_and_plot(model_path):
|
||||
diff_store_path = run_particle_dropout_test(model_path)
|
||||
plot_dropout_stacked_barplot(diff_store_path)
|
||||
|
||||
|
||||
def flat_for_store(parameters):
|
||||
return (x.item() for y in parameters for x in y.detach().flatten())
|
||||
|
||||
|
||||
def train_self_replication(model, optimizer, st_stps) -> dict:
|
||||
self_train_loss = model.combined_self_train(optimizer, st_stps)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
stp_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
|
||||
return stp_log
|
||||
|
||||
|
||||
def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
|
||||
# Zero your gradients for every batch!
|
||||
optimizer.zero_grad()
|
||||
btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
|
||||
y_prd = model(btch_x)
|
||||
# loss = loss_fn(y, batch_y.unsqueeze(-1).to(torch.float32))
|
||||
loss = loss_func(y_prd, btch_y.to(torch.float))
|
||||
loss.backward()
|
||||
|
||||
# Adjust learning weights
|
||||
optimizer.step()
|
||||
|
||||
stp_log = dict(Metric='Task Loss', Score=loss.item())
|
||||
|
||||
return stp_log, y_prd
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
training = True
|
||||
train_to_id_first = True
|
||||
train_to_task_first = False
|
||||
seq_task_train = True
|
||||
force_st_for_epochs_n = 5
|
||||
n_st_per_batch = 2
|
||||
activation = None # nn.ReLU()
|
||||
|
||||
use_sparse_network = False
|
||||
|
||||
for weight_hidden_size in [4, 5, 6]:
|
||||
|
||||
tsk_threshold = 0.85
|
||||
weight_hidden_size = weight_hidden_size
|
||||
residual_skip = False
|
||||
n_seeds = 3
|
||||
depth = 3
|
||||
|
||||
assert not (train_to_task_first and train_to_id_first)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
|
||||
res_str = f'{"" if residual_skip else "_no_res"}'
|
||||
# dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
|
||||
id_str = f'{f"_StToId" if train_to_id_first else ""}'
|
||||
tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first and tsk_threshold != 1 else ""}'
|
||||
sprs_str = '_sprs' if use_sparse_network else ''
|
||||
f_str = f'_f_{force_st_for_epochs_n}' if \
|
||||
force_st_for_epochs_n and seq_task_train and train_to_task_first else ""
|
||||
config_str = f'{res_str}{id_str}{tsk_str}{f_str}{sprs_str}'
|
||||
exp_path = Path('output') / f'mn_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
|
||||
|
||||
if not training:
|
||||
# noinspection PyRedeclaration
|
||||
exp_path = Path('output') / 'mn_st_n_2_100_4'
|
||||
|
||||
for seed in range(n_seeds):
|
||||
seed_path = exp_path / str(seed)
|
||||
|
||||
model_save_path = seed_path / '0000_trained_model.zip'
|
||||
df_store_path = seed_path / 'train_store.csv'
|
||||
weight_store_path = seed_path / 'weight_store.csv'
|
||||
srnn_parameters = dict()
|
||||
|
||||
if training:
|
||||
# Check if files do exist on project location, warn and break.
|
||||
for path in [model_save_path, df_store_path, weight_store_path]:
|
||||
assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
|
||||
|
||||
utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
||||
try:
|
||||
dataset = MNIST(str(DATA_PATH), transform=utility_transforms)
|
||||
except RuntimeError:
|
||||
dataset = MNIST(str(DATA_PATH), transform=utility_transforms, download=True)
|
||||
d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
|
||||
|
||||
interface = np.prod(dataset[0][0].shape)
|
||||
dense_metanet = MetaNet(interface, depth=depth, width=6, out=10, residual_skip=residual_skip,
|
||||
weight_hidden_size=weight_hidden_size, activation=activation).to(DEVICE)
|
||||
sparse_metanet = SparseNetwork(interface, depth=depth, width=6, out=10, residual_skip=residual_skip,
|
||||
weight_hidden_size=weight_hidden_size, activation=activation
|
||||
).to(DEVICE) if use_sparse_network else dense_metanet
|
||||
if use_sparse_network:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
|
||||
loss_fn = nn.CrossEntropyLoss()
|
||||
dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.004, momentum=0.9)
|
||||
sparse_optimizer = torch.optim.SGD(
|
||||
sparse_metanet.parameters(), lr=0.001, momentum=0.9
|
||||
) if use_sparse_network else dense_optimizer
|
||||
|
||||
dense_weights_updated = False
|
||||
sparse_weights_updated = False
|
||||
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
|
||||
|
||||
init_tsk = train_to_task_first
|
||||
for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
|
||||
is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
|
||||
is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
|
||||
sparse_metanet = sparse_metanet.train()
|
||||
dense_metanet = dense_metanet.train()
|
||||
|
||||
# Init metrics, even we do not need:
|
||||
metric = torchmetrics.Accuracy()
|
||||
|
||||
# Define what to train in this epoch:
|
||||
do_tsk_train = train_to_task_first
|
||||
force_st = (force_st_for_epochs_n >= (EPOCH - epoch)) and force_st_for_epochs_n
|
||||
init_st = (train_to_id_first and not dense_metanet.count_fixpoints() > 200)
|
||||
do_st_train = init_st or is_self_train_epoch or force_st
|
||||
|
||||
for batch, (batch_x, batch_y) in tqdm(enumerate(d), total=len(d), desc='MetaNet Train - Batch'):
|
||||
|
||||
# Self Train
|
||||
if do_st_train:
|
||||
# Transfer weights
|
||||
if dense_weights_updated:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
dense_weights_updated = False
|
||||
st_steps = n_st_per_batch if not init_st else n_st_per_batch * 10
|
||||
step_log = train_self_replication(sparse_metanet, sparse_optimizer, st_steps)
|
||||
step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
if use_sparse_network:
|
||||
sparse_weights_updated = True
|
||||
|
||||
# Task Train
|
||||
if not init_st:
|
||||
# Transfer weights
|
||||
if sparse_weights_updated:
|
||||
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
||||
sparse_weights_updated = False
|
||||
step_log, y_pred = train_task(dense_metanet, dense_optimizer, loss_fn, batch_x, batch_y)
|
||||
|
||||
step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
if use_sparse_network:
|
||||
dense_weights_updated = True
|
||||
metric(y_pred.cpu(), batch_y.cpu())
|
||||
|
||||
if is_validation_epoch:
|
||||
if sparse_weights_updated:
|
||||
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
||||
sparse_weights_updated = False
|
||||
|
||||
dense_metanet = dense_metanet.eval()
|
||||
if do_tsk_train:
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Train Accuracy', Score=metric.compute().item())
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
|
||||
accuracy = checkpoint_and_validate(dense_metanet, seed_path, epoch).item()
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy)
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
if init_tsk or (train_to_task_first and seq_task_train):
|
||||
init_tsk = accuracy <= tsk_threshold
|
||||
if init_st or is_validation_epoch:
|
||||
if dense_weights_updated:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
dense_weights_updated = False
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
|
||||
counter_dict = dict(counter_dict)
|
||||
for key, value in counter_dict.items():
|
||||
step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
|
||||
if sum(x.is_fixpoint == ft.identity_func for x in dense_metanet.particles) > 200:
|
||||
train_to_id_first = False
|
||||
# Reset Diverged particles
|
||||
sparse_metanet.reset_diverged_particles()
|
||||
if use_sparse_network:
|
||||
sparse_weights_updated = True
|
||||
|
||||
# FLUSH to disk
|
||||
if is_validation_epoch:
|
||||
for particle in dense_metanet.particles:
|
||||
weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
train_store.to_csv(df_store_path, mode='a',
|
||||
header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a',
|
||||
header=not weight_store_path.exists(), index=False)
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
|
||||
|
||||
###########################################################
|
||||
# EPOCHS endet
|
||||
dense_metanet = dense_metanet.eval()
|
||||
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
|
||||
for key, value in dict(counter_dict).items():
|
||||
step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
accuracy = checkpoint_and_validate(dense_metanet, seed_path, EPOCH, final_model=True)
|
||||
validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy.item())
|
||||
for particle in dense_metanet.particles:
|
||||
weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
|
||||
|
||||
plot_training_result(df_store_path)
|
||||
plot_training_particle_types(df_store_path)
|
||||
|
||||
try:
|
||||
_ = next(seed_path.glob(f'*e{EPOCH}.tp'))
|
||||
except StopIteration:
|
||||
print('Model pattern did not trigger.')
|
||||
print(f'Search path was: {seed_path}:')
|
||||
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
run_particle_dropout_and_plot(seed_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
try:
|
||||
plot_network_connectivity_by_fixtype(model_save_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
|
||||
if n_seeds >= 2:
|
||||
pass
|
@ -1,317 +0,0 @@
|
||||
import platform
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchmetrics
|
||||
from torch import nn
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from tqdm import tqdm
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
if platform.node() == 'CarbonX':
|
||||
debug = True
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
print("@ Warning, Debugging Config@!!!!!! @")
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
else:
|
||||
debug = False
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from network import MetaNet, FixTypes as ft
|
||||
from sparse_net import SparseNetwork
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from experiments.meta_task_exp import new_storage_df, train_self_replication, train_task, set_checkpoint, \
|
||||
flat_for_store, plot_training_result, plot_training_particle_types, run_particle_dropout_and_plot, \
|
||||
plot_network_connectivity_by_fixtype
|
||||
|
||||
WORKER = 10 if not debug else 2
|
||||
debug = False
|
||||
BATCHSIZE = 50 if not debug else 50
|
||||
EPOCH = 10
|
||||
VALIDATION_FRQ = 1 if not debug else 1
|
||||
SELF_TRAIN_FRQ = 1 if not debug else 1
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
|
||||
class AddTaskDataset(Dataset):
|
||||
def __init__(self, length=int(1e5)):
|
||||
super().__init__()
|
||||
self.length = length
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, _):
|
||||
ab = torch.randn(size=(2,)).to(torch.float32)
|
||||
return ab, ab.sum(axis=-1, keepdims=True)
|
||||
|
||||
|
||||
def validate(checkpoint_path, valid_d, ratio=1, validmetric=torchmetrics.MeanAbsoluteError()):
|
||||
checkpoint_path = Path(checkpoint_path)
|
||||
import torchmetrics
|
||||
|
||||
# initialize metric
|
||||
model = torch.load(checkpoint_path, map_location=DEVICE).eval()
|
||||
n_samples = int(len(valid_d) * ratio)
|
||||
|
||||
with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
|
||||
for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
|
||||
valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
|
||||
y_valid = model(valid_batch_x)
|
||||
|
||||
# metric on current batch
|
||||
acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
|
||||
pbar.set_postfix_str(f'Acc: {acc}')
|
||||
pbar.update()
|
||||
if idx == n_samples:
|
||||
break
|
||||
|
||||
# metric on all batches using custom accumulation
|
||||
acc = validmetric.compute()
|
||||
tqdm.write(f"Avg. Accuracy on all data: {acc}")
|
||||
return acc
|
||||
|
||||
|
||||
def checkpoint_and_validate(model, out_path, epoch_n, valid_d, final_model=False):
|
||||
out_path = Path(out_path)
|
||||
ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
|
||||
result = validate(ckpt_path, valid_d)
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
training = True
|
||||
train_to_id_first = False
|
||||
train_to_task_first = False
|
||||
seq_task_train = True
|
||||
force_st_for_epochs_n = 5
|
||||
n_st_per_batch = 2
|
||||
activation = None # nn.ReLU()
|
||||
|
||||
use_sparse_network = False
|
||||
|
||||
for weight_hidden_size in [3, 4, 5, 6]:
|
||||
|
||||
tsk_threshold = 0.85
|
||||
weight_hidden_size = weight_hidden_size
|
||||
residual_skip = True
|
||||
n_seeds = 3
|
||||
depth = 3
|
||||
width = 3
|
||||
out = 1
|
||||
|
||||
data_path = Path('data')
|
||||
data_path.mkdir(exist_ok=True, parents=True)
|
||||
assert not (train_to_task_first and train_to_id_first)
|
||||
|
||||
ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
|
||||
s_str = f'_n_{n_st_per_batch}' if n_st_per_batch > 1 else ""
|
||||
res_str = f'{"" if residual_skip else "_no_res"}'
|
||||
# dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
|
||||
id_str = f'{f"_StToId" if train_to_id_first else ""}'
|
||||
tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first and tsk_threshold != 1 else ""}'
|
||||
sprs_str = '_sprs' if use_sparse_network else ''
|
||||
f_str = f'_f_{force_st_for_epochs_n}' if \
|
||||
force_st_for_epochs_n and seq_task_train and train_to_task_first else ""
|
||||
config_str = f'{s_str}{res_str}{id_str}{tsk_str}{f_str}{sprs_str}'
|
||||
exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
|
||||
|
||||
if not training:
|
||||
# noinspection PyRedeclaration
|
||||
exp_path = Path('output') / 'mn_st_n_2_100_4'
|
||||
|
||||
for seed in range(n_seeds):
|
||||
seed_path = exp_path / str(seed)
|
||||
|
||||
model_path = seed_path / '0000_trained_model.zip'
|
||||
df_store_path = seed_path / 'train_store.csv'
|
||||
weight_store_path = seed_path / 'weight_store.csv'
|
||||
srnn_parameters = dict()
|
||||
|
||||
if training:
|
||||
# Check if files do exist on project location, warn and break.
|
||||
for path in [model_path, df_store_path, weight_store_path]:
|
||||
assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
|
||||
|
||||
train_data = AddTaskDataset()
|
||||
valid_data = AddTaskDataset()
|
||||
train_load = DataLoader(train_data, batch_size=BATCHSIZE, shuffle=True,
|
||||
drop_last=True, num_workers=WORKER)
|
||||
vali_load = DataLoader(valid_data, batch_size=BATCHSIZE, shuffle=False,
|
||||
drop_last=True, num_workers=WORKER)
|
||||
|
||||
interface = np.prod(train_data[0][0].shape)
|
||||
dense_metanet = MetaNet(interface, depth=depth, width=width, out=out,
|
||||
residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
|
||||
activation=activation
|
||||
).to(DEVICE)
|
||||
sparse_metanet = SparseNetwork(interface, depth=depth, width=width, out=out,
|
||||
residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
|
||||
activation=activation
|
||||
).to(DEVICE) if use_sparse_network else dense_metanet
|
||||
if use_sparse_network:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
|
||||
loss_fn = nn.MSELoss()
|
||||
dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.00004, momentum=0.9)
|
||||
sparse_optimizer = torch.optim.SGD(
|
||||
sparse_metanet.parameters(), lr=0.00001, momentum=0.9
|
||||
) if use_sparse_network else dense_optimizer
|
||||
|
||||
dense_weights_updated = False
|
||||
sparse_weights_updated = False
|
||||
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
|
||||
|
||||
init_tsk = train_to_task_first
|
||||
for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
|
||||
is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
|
||||
is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
|
||||
sparse_metanet = sparse_metanet.train()
|
||||
dense_metanet = dense_metanet.train()
|
||||
|
||||
# Init metrics, even we do not need:
|
||||
metric = torchmetrics.MeanAbsoluteError()
|
||||
|
||||
# Define what to train in this epoch:
|
||||
do_tsk_train = train_to_task_first
|
||||
force_st = (force_st_for_epochs_n >= (EPOCH - epoch)) and force_st_for_epochs_n
|
||||
init_st = (train_to_id_first and not dense_metanet.count_fixpoints() > 200)
|
||||
do_st_train = init_st or is_self_train_epoch or force_st
|
||||
|
||||
for batch, (batch_x, batch_y) in tqdm(enumerate(train_load),
|
||||
total=len(train_load), desc='MetaNet Train - Batch'
|
||||
):
|
||||
|
||||
# Self Train
|
||||
if do_st_train:
|
||||
# Transfer weights
|
||||
if dense_weights_updated:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
dense_weights_updated = False
|
||||
st_steps = n_st_per_batch if not init_st else n_st_per_batch * 10
|
||||
step_log = train_self_replication(sparse_metanet, sparse_optimizer, st_steps)
|
||||
step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
if use_sparse_network:
|
||||
sparse_weights_updated = True
|
||||
|
||||
# Task Train
|
||||
init_st = True
|
||||
if not init_st:
|
||||
# Transfer weights
|
||||
if sparse_weights_updated:
|
||||
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
||||
sparse_weights_updated = False
|
||||
step_log, y_pred = train_task(dense_metanet, dense_optimizer, loss_fn, batch_x, batch_y)
|
||||
|
||||
step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
if use_sparse_network:
|
||||
dense_weights_updated = True
|
||||
metric(y_pred.cpu(), batch_y.cpu())
|
||||
|
||||
if is_validation_epoch:
|
||||
if sparse_weights_updated:
|
||||
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
||||
sparse_weights_updated = False
|
||||
|
||||
dense_metanet = dense_metanet.eval()
|
||||
if not init_st:
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Train Accuracy', Score=metric.compute().item())
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
|
||||
accuracy = checkpoint_and_validate(dense_metanet, seed_path, epoch, vali_load).item()
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy)
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
if init_tsk or (train_to_task_first and seq_task_train):
|
||||
init_tsk = accuracy <= tsk_threshold
|
||||
if init_st or is_validation_epoch:
|
||||
if dense_weights_updated:
|
||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||
dense_weights_updated = False
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
|
||||
counter_dict = dict(counter_dict)
|
||||
for key, value in counter_dict.items():
|
||||
step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
|
||||
if sum(x.is_fixpoint == ft.identity_func for x in dense_metanet.particles) > 200:
|
||||
train_to_id_first = False
|
||||
# Reset Diverged particles
|
||||
sparse_metanet.reset_diverged_particles()
|
||||
if use_sparse_network:
|
||||
sparse_weights_updated = True
|
||||
|
||||
# FLUSH to disk
|
||||
if is_validation_epoch:
|
||||
for particle in dense_metanet.particles:
|
||||
weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', dense_metanet.particle_parameter_count)
|
||||
|
||||
###########################################################
|
||||
# EPOCHS endet
|
||||
dense_metanet = dense_metanet.eval()
|
||||
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(dense_metanet.particles))
|
||||
for key, value in dict(counter_dict).items():
|
||||
step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
accuracy = checkpoint_and_validate(dense_metanet, seed_path, EPOCH, vali_load, final_model=True)
|
||||
validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy.item())
|
||||
for particle in dense_metanet.particles:
|
||||
weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
|
||||
|
||||
plot_training_result(df_store_path)
|
||||
plot_training_particle_types(df_store_path)
|
||||
|
||||
try:
|
||||
model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
|
||||
except StopIteration:
|
||||
print('Model pattern did not trigger.')
|
||||
print(f'Search path was: {seed_path}:')
|
||||
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
run_particle_dropout_and_plot(model_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
try:
|
||||
plot_network_connectivity_by_fixtype(model_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
|
||||
if n_seeds >= 2:
|
||||
pass
|
77
experiments/meta_task_small_utility.py
Normal file
77
experiments/meta_task_small_utility.py
Normal file
@ -0,0 +1,77 @@
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import torchmetrics
|
||||
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.meta_task_utility import set_checkpoint
|
||||
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
|
||||
class AddTaskDataset(Dataset):
|
||||
def __init__(self, length=int(1e3)):
|
||||
super().__init__()
|
||||
self.length = length
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, _):
|
||||
ab = torch.randn(size=(2,)).to(torch.float32)
|
||||
return ab, ab.sum(axis=-1, keepdims=True)
|
||||
|
||||
|
||||
def validate(checkpoint_path, valid_d, ratio=1, validmetric=torchmetrics.MeanAbsoluteError()):
|
||||
checkpoint_path = Path(checkpoint_path)
|
||||
|
||||
# initialize metric
|
||||
model = torch.load(checkpoint_path, map_location=DEVICE).eval()
|
||||
n_samples = int(len(valid_d) * ratio)
|
||||
|
||||
with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
|
||||
for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
|
||||
valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
|
||||
y_valid = model(valid_batch_x)
|
||||
|
||||
# metric on current batch
|
||||
acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
|
||||
pbar.set_postfix_str(f'Acc: {acc}')
|
||||
pbar.update()
|
||||
if idx == n_samples:
|
||||
break
|
||||
|
||||
# metric on all batches using custom accumulation
|
||||
acc = validmetric.compute()
|
||||
tqdm.write(f"Avg. Accuracy on all data: {acc}")
|
||||
return acc
|
||||
|
||||
|
||||
def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
|
||||
# Zero your gradients for every batch!
|
||||
optimizer.zero_grad()
|
||||
btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
|
||||
y_prd = model(btch_x)
|
||||
|
||||
loss = loss_func(y_prd, btch_y.to(torch.float))
|
||||
loss.backward()
|
||||
|
||||
# Adjust learning weights
|
||||
optimizer.step()
|
||||
|
||||
stp_log = dict(Metric='Task Loss', Score=loss.item())
|
||||
|
||||
return stp_log, y_prd
|
||||
|
||||
|
||||
def checkpoint_and_validate(model, out_path, epoch_n, valid_d, final_model=False):
|
||||
out_path = Path(out_path)
|
||||
ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
|
||||
result = validate(ckpt_path, valid_d)
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise(NotImplementedError('Get out of here'))
|
319
experiments/meta_task_utility.py
Normal file
319
experiments/meta_task_utility.py
Normal file
@ -0,0 +1,319 @@
|
||||
import pickle
|
||||
import re
|
||||
import shutil
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import platform
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch
|
||||
from matplotlib import pyplot as plt
|
||||
import seaborn as sns
|
||||
from torch.nn import Flatten
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torchvision.datasets import MNIST
|
||||
from torchvision.transforms import ToTensor, Compose, Resize
|
||||
from tqdm import tqdm
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
if platform.node() == 'CarbonX':
|
||||
debug = True
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
print("@ Warning, Debugging Config@!!!!!! @")
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
else:
|
||||
debug = False
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
if __package__ is None:
|
||||
DIR = Path(__file__).resolve().parent
|
||||
sys.path.insert(0, str(DIR.parent))
|
||||
__package__ = DIR.name
|
||||
else:
|
||||
DIR = None
|
||||
except NameError:
|
||||
DIR = None
|
||||
pass
|
||||
|
||||
from network import FixTypes as ft
|
||||
from functionalities_test import test_for_fixpoints
|
||||
|
||||
WORKER = 10 if not debug else 0
|
||||
debug = False
|
||||
BATCHSIZE = 500 if not debug else 50
|
||||
EPOCH = 50
|
||||
VALIDATION_FRQ = 3 if not debug else 1
|
||||
SELF_TRAIN_FRQ = 1 if not debug else 1
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
DATA_PATH = Path('data')
|
||||
DATA_PATH.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
if debug:
|
||||
torch.autograd.set_detect_anomaly(True)
|
||||
|
||||
|
||||
class ToFloat:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, x):
|
||||
return x.to(torch.float32)
|
||||
|
||||
|
||||
class AddTaskDataset(Dataset):
|
||||
def __init__(self, length=int(5e5)):
|
||||
super().__init__()
|
||||
self.length = length
|
||||
self.prng = np.random.default_rng()
|
||||
|
||||
def __len__(self):
|
||||
return self.length
|
||||
|
||||
def __getitem__(self, _):
|
||||
ab = self.prng.normal(size=(2,)).astype(np.float32)
|
||||
return ab, ab.sum(axis=-1, keepdims=True)
|
||||
|
||||
|
||||
def set_checkpoint(model, out_path, epoch_n, final_model=False):
|
||||
epoch_n = str(epoch_n)
|
||||
if not final_model:
|
||||
ckpt_path = Path(out_path) / 'ckpt' / f'{epoch_n.zfill(4)}_model_ckpt.tp'
|
||||
else:
|
||||
ckpt_path = Path(out_path) / f'trained_model_ckpt_e{epoch_n}.tp'
|
||||
ckpt_path.parent.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
torch.save(model, ckpt_path, pickle_protocol=pickle.HIGHEST_PROTOCOL)
|
||||
py_store_path = Path(out_path) / 'exp_py.txt'
|
||||
if not py_store_path.exists():
|
||||
shutil.copy(__file__, py_store_path)
|
||||
return ckpt_path
|
||||
|
||||
|
||||
def validate(checkpoint_path, ratio=0.1):
|
||||
checkpoint_path = Path(checkpoint_path)
|
||||
import torchmetrics
|
||||
|
||||
# initialize metric
|
||||
validmetric = torchmetrics.Accuracy()
|
||||
ut = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
||||
|
||||
try:
|
||||
datas = MNIST(str(DATA_PATH), transform=ut, train=False)
|
||||
except RuntimeError:
|
||||
datas = MNIST(str(DATA_PATH), transform=ut, train=False, download=True)
|
||||
valid_d = DataLoader(datas, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
|
||||
|
||||
model = torch.load(checkpoint_path, map_location=DEVICE).eval()
|
||||
n_samples = int(len(valid_d) * ratio)
|
||||
|
||||
with tqdm(total=n_samples, desc='Validation Run: ') as pbar:
|
||||
for idx, (valid_batch_x, valid_batch_y) in enumerate(valid_d):
|
||||
valid_batch_x, valid_batch_y = valid_batch_x.to(DEVICE), valid_batch_y.to(DEVICE)
|
||||
y_valid = model(valid_batch_x)
|
||||
|
||||
# metric on current batch
|
||||
acc = validmetric(y_valid.cpu(), valid_batch_y.cpu())
|
||||
pbar.set_postfix_str(f'Acc: {acc}')
|
||||
pbar.update()
|
||||
if idx == n_samples:
|
||||
break
|
||||
|
||||
# metric on all batches using custom accumulation
|
||||
acc = validmetric.compute()
|
||||
tqdm.write(f"Avg. accuracy on all data: {acc}")
|
||||
return acc
|
||||
|
||||
|
||||
def new_storage_df(identifier, weight_count):
|
||||
if identifier == 'train':
|
||||
return pd.DataFrame(columns=['Epoch', 'Batch', 'Metric', 'Score'])
|
||||
elif identifier == 'weights':
|
||||
return pd.DataFrame(columns=['Epoch', 'Weight', *(f'weight_{x}' for x in range(weight_count))])
|
||||
|
||||
|
||||
def checkpoint_and_validate(model, out_path, epoch_n, final_model=False):
|
||||
out_path = Path(out_path)
|
||||
ckpt_path = set_checkpoint(model, out_path, epoch_n, final_model=final_model)
|
||||
result = validate(ckpt_path)
|
||||
return result
|
||||
|
||||
|
||||
def plot_training_particle_types(path_to_dataframe):
|
||||
plt.clf()
|
||||
# load from Drive
|
||||
df = pd.read_csv(path_to_dataframe, index_col=False)
|
||||
# Set up figure
|
||||
fig, ax = plt.subplots() # initializes figure and plots
|
||||
data = df.loc[df['Metric'].isin(ft.all_types())]
|
||||
fix_types = data['Metric'].unique()
|
||||
data = data.pivot(index='Epoch', columns='Metric', values='Score').reset_index().fillna(0)
|
||||
_ = plt.stackplot(data['Epoch'], *[data[fixtype] for fixtype in fix_types], labels=fix_types.tolist())
|
||||
|
||||
ax.set(ylabel='Particle Count', xlabel='Epoch')
|
||||
ax.set_title('Particle Type Count')
|
||||
|
||||
fig.legend(loc="center right", title='Particle Type', bbox_to_anchor=(0.85, 0.5))
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_dataframe.parent / 'training_particle_type_lp.png'), dpi=300)
|
||||
|
||||
|
||||
def plot_training_result(path_to_dataframe):
|
||||
plt.clf()
|
||||
# load from Drive
|
||||
df = pd.read_csv(path_to_dataframe, index_col=False)
|
||||
|
||||
# Set up figure
|
||||
fig, ax1 = plt.subplots() # initializes figure and plots
|
||||
ax2 = ax1.twinx() # applies twinx to ax2, which is the second y-axis.
|
||||
|
||||
# plots the first set of data
|
||||
data = df[(df['Metric'] == 'Task Loss') | (df['Metric'] == 'Self Train Loss')].groupby(['Epoch', 'Metric']).mean()
|
||||
palette = sns.color_palette()[1:data.reset_index()['Metric'].unique().shape[0]+1]
|
||||
sns.lineplot(data=data.groupby(['Epoch', 'Metric']).mean(), x='Epoch', y='Score', hue='Metric',
|
||||
palette=palette, ax=ax1)
|
||||
|
||||
# plots the second set of data
|
||||
data = df[(df['Metric'] == 'Test Accuracy') | (df['Metric'] == 'Train Accuracy')]
|
||||
palette = sns.color_palette()[len(palette)+1:data.reset_index()['Metric'].unique().shape[0] + len(palette)+1]
|
||||
sns.lineplot(data=data, x='Epoch', y='Score', marker='o', hue='Metric', palette=palette)
|
||||
|
||||
ax1.set(yscale='log', ylabel='Losses')
|
||||
ax1.set_title('Training Lineplot')
|
||||
ax2.set(ylabel='Accuracy')
|
||||
|
||||
fig.legend(loc="center right", title='Metric', bbox_to_anchor=(0.85, 0.5))
|
||||
ax1.get_legend().remove()
|
||||
ax2.get_legend().remove()
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300)
|
||||
|
||||
|
||||
def plot_network_connectivity_by_fixtype(path_to_trained_model):
|
||||
m = torch.load(path_to_trained_model, map_location=torch.device('cpu')).eval()
|
||||
# noinspection PyProtectedMember
|
||||
particles = list(m.particles)
|
||||
df = pd.DataFrame(columns=['type', 'layer', 'neuron', 'name'])
|
||||
|
||||
for prtcl in particles:
|
||||
l, c, w = [float(x) for x in re.sub("[^0-9|_]", "", prtcl.name).split('_')]
|
||||
df.loc[df.shape[0]] = (prtcl.is_fixpoint, l-1, w, prtcl.name)
|
||||
df.loc[df.shape[0]] = (prtcl.is_fixpoint, l, c, prtcl.name)
|
||||
for layer in list(df['layer'].unique()):
|
||||
# Rescale
|
||||
divisor = df.loc[(df['layer'] == layer), 'neuron'].max()
|
||||
df.loc[(df['layer'] == layer), 'neuron'] /= divisor
|
||||
|
||||
tqdm.write(f'Connectivity Data gathered')
|
||||
for n, fixtype in enumerate(ft.all_types()):
|
||||
if df[df['type'] == fixtype].shape[0] > 0:
|
||||
plt.clf()
|
||||
ax = sns.lineplot(y='neuron', x='layer', hue='name', data=df[df['type'] == fixtype],
|
||||
legend=False, estimator=None, lw=1)
|
||||
_ = sns.lineplot(y=[0, 1], x=[-1, df['layer'].max()], legend=False, estimator=None, lw=0)
|
||||
ax.set_title(fixtype)
|
||||
lines = ax.get_lines()
|
||||
for line in lines:
|
||||
line.set_color(sns.color_palette()[n])
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(path_to_trained_model.parent / f'net_connectivity_{fixtype}.png'), dpi=300)
|
||||
tqdm.write(f'Connectivity plottet: {fixtype} - n = {df[df["type"] == fixtype].shape[0] // 2}')
|
||||
else:
|
||||
tqdm.write(f'No Connectivity {fixtype}')
|
||||
|
||||
|
||||
def run_particle_dropout_test(model_path):
|
||||
diff_store_path = model_path.parent / 'diff_store.csv'
|
||||
latest_model = torch.load(model_path, map_location=DEVICE).eval()
|
||||
prtcl_dict = defaultdict(lambda: 0)
|
||||
_ = test_for_fixpoints(prtcl_dict, list(latest_model.particles))
|
||||
tqdm.write(str(dict(prtcl_dict)))
|
||||
diff_df = pd.DataFrame(columns=['Particle Type', 'Accuracy', 'Diff'])
|
||||
|
||||
acc_pre = validate(model_path, ratio=1).item()
|
||||
diff_df.loc[diff_df.shape[0]] = ('All Organism', acc_pre, 0)
|
||||
|
||||
for fixpoint_type in ft.all_types():
|
||||
new_model = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero(fixpoint_type)
|
||||
if [x for x in new_model.particles if x.is_fixpoint == fixpoint_type]:
|
||||
new_ckpt = set_checkpoint(new_model, model_path.parent, fixpoint_type, final_model=True)
|
||||
acc_post = validate(new_ckpt, ratio=1).item()
|
||||
acc_diff = abs(acc_post - acc_pre)
|
||||
tqdm.write(f'Zero_ident diff = {acc_diff}')
|
||||
diff_df.loc[diff_df.shape[0]] = (fixpoint_type, acc_post, acc_diff)
|
||||
|
||||
diff_df.to_csv(diff_store_path, mode='a', header=not diff_store_path.exists(), index=False)
|
||||
return diff_store_path
|
||||
|
||||
|
||||
def plot_dropout_stacked_barplot(mdl_path, diff_store_path):
|
||||
|
||||
diff_df = pd.read_csv(diff_store_path)
|
||||
particle_dict = defaultdict(lambda: 0)
|
||||
latest_model = torch.load(mdl_path, map_location=DEVICE).eval()
|
||||
_ = test_for_fixpoints(particle_dict, list(latest_model.particles))
|
||||
tqdm.write(str(dict(particle_dict)))
|
||||
plt.clf()
|
||||
fig, ax = plt.subplots(ncols=2)
|
||||
colors = sns.color_palette()[1:diff_df.shape[0]+1]
|
||||
_ = sns.barplot(data=diff_df, y='Accuracy', x='Particle Type', ax=ax[0], palette=colors)
|
||||
|
||||
ax[0].set_title('Accuracy after particle dropout')
|
||||
ax[0].set_xlabel('Particle Type')
|
||||
|
||||
ax[1].pie(particle_dict.values(), labels=particle_dict.keys(), colors=list(reversed(colors)), )
|
||||
ax[1].set_title('Particle Count')
|
||||
|
||||
plt.tight_layout()
|
||||
if debug:
|
||||
plt.show()
|
||||
else:
|
||||
plt.savefig(Path(diff_store_path.parent / 'dropout_stacked_barplot.png'), dpi=300)
|
||||
|
||||
|
||||
def run_particle_dropout_and_plot(model_path):
|
||||
diff_store_path = run_particle_dropout_test(model_path)
|
||||
plot_dropout_stacked_barplot(model_path, diff_store_path)
|
||||
|
||||
|
||||
def flat_for_store(parameters):
|
||||
return (x.item() for y in parameters for x in y.detach().flatten())
|
||||
|
||||
|
||||
def train_self_replication(model, st_stps, **kwargs) -> dict:
|
||||
self_train_loss = model.combined_self_train(st_stps, **kwargs)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
stp_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
|
||||
return stp_log
|
||||
|
||||
|
||||
def train_task(model, optimizer, loss_func, btch_x, btch_y) -> (dict, torch.Tensor):
|
||||
# Zero your gradients for every batch!
|
||||
optimizer.zero_grad()
|
||||
btch_x, btch_y = btch_x.to(DEVICE), btch_y.to(DEVICE)
|
||||
y_prd = model(btch_x)
|
||||
|
||||
loss = loss_func(y_prd, btch_y.to(torch.long))
|
||||
loss.backward()
|
||||
|
||||
# Adjust learning weights
|
||||
optimizer.step()
|
||||
|
||||
stp_log = dict(Metric='Task Loss', Score=loss.item())
|
||||
|
||||
return stp_log, y_prd
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,177 +0,0 @@
|
||||
import os.path
|
||||
import pickle
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_experiment, summary_fixpoint_percentage
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from network import Net
|
||||
from visualization import plot_loss, bar_chart_fixpoints, line_chart_fixpoints
|
||||
from visualization import plot_3d_self_train
|
||||
|
||||
|
||||
class MixedSettingExperiment:
|
||||
def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, train_nets,
|
||||
epochs, SA_steps, ST_steps_between_SA, log_step_size, directory_name):
|
||||
super().__init__()
|
||||
self.population_size = population_size
|
||||
|
||||
self.net_input_size = net_i_size
|
||||
self.net_hidden_size = net_h_size
|
||||
self.net_out_size = net_o_size
|
||||
self.net_learning_rate = learning_rate
|
||||
self.train_nets = train_nets
|
||||
self.epochs = epochs
|
||||
self.SA_steps = SA_steps
|
||||
self.ST_steps_between_SA = ST_steps_between_SA
|
||||
self.log_step_size = log_step_size
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
self.loss_history = []
|
||||
|
||||
self.fixpoint_counters_history = []
|
||||
|
||||
self.directory_name = directory_name
|
||||
os.mkdir(self.directory_name)
|
||||
|
||||
self.nets = []
|
||||
self.populate_environment()
|
||||
|
||||
self.fixpoint_percentage()
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
self.visualize_loss()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating mixed experiment %s" % i)
|
||||
|
||||
net_name = f"mixed_net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
self.nets.append(net)
|
||||
|
||||
loop_epochs = tqdm(range(self.epochs))
|
||||
for j in loop_epochs:
|
||||
loop_epochs.set_description("Running mixed experiment %s" % j)
|
||||
|
||||
for i in loop_population_size:
|
||||
net = self.nets[i]
|
||||
|
||||
if self.train_nets == "before_SA":
|
||||
for _ in range(self.ST_steps_between_SA):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
net.self_application(self.SA_steps, self.log_step_size)
|
||||
|
||||
elif self.train_nets == "after_SA":
|
||||
net.self_application(self.SA_steps, self.log_step_size)
|
||||
for _ in range(self.ST_steps_between_SA):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
print(
|
||||
f"\nLast weight matrix (epoch: {j}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
|
||||
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
# Rounding the result not to run into other problems later regarding the exact representation of floating number
|
||||
fixpoints_percentage = round((self.fixpoint_counters["fix_zero"] + self.fixpoint_counters[
|
||||
"fix_sec"]) / self.population_size, 1)
|
||||
self.fixpoint_counters_history.append(fixpoints_percentage)
|
||||
|
||||
# Resetting the fixpoint counter. Last iteration not to be reset - it is important for the bar_chart_fixpoints().
|
||||
if j < self.epochs:
|
||||
self.reset_fixpoint_counters()
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"Mixed {str(len(self.nets))}"
|
||||
|
||||
# This batch size is not relevant for mixed settings because during an epoch there are more steps of SA & ST happening
|
||||
# and only they need the batch size. To not affect the number of epochs shown in the 3D plot, will send
|
||||
# forward the number "1" for batch size with the variable <irrelevant_batch_size>
|
||||
irrelevant_batch_size = 1
|
||||
plot_3d_self_train(self.nets, exp_name, self.directory_name, irrelevant_batch_size, True)
|
||||
|
||||
def count_fixpoints(self):
|
||||
exp_details = f"SA steps: {self.SA_steps}; ST steps: {self.ST_steps_between_SA}"
|
||||
|
||||
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def fixpoint_percentage(self):
|
||||
line_chart_fixpoints(self.fixpoint_counters_history, self.epochs, self.ST_steps_between_SA,
|
||||
self.SA_steps, self.directory_name, self.population_size)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.nets)):
|
||||
net_loss_history = self.nets[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
|
||||
plot_loss(self.loss_history, self.directory_name)
|
||||
|
||||
def reset_fixpoint_counters(self):
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
|
||||
def run_mixed_experiment(population_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate, train_nets,
|
||||
epochs, SA_steps, ST_steps_between_SA, batch_size, name_hash, runs, run_name):
|
||||
experiments = {}
|
||||
fixpoints_percentages = []
|
||||
|
||||
check_folder("mixed")
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
directory_name = f"experiments/mixed/{run_name}_run_{i}_{str(population_size)}_nets_{SA_steps}_SA_{ST_steps_between_SA}_ST_{str(name_hash)}"
|
||||
|
||||
mixed_experiment = MixedSettingExperiment(
|
||||
population_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
train_nets,
|
||||
epochs,
|
||||
SA_steps,
|
||||
ST_steps_between_SA,
|
||||
batch_size,
|
||||
directory_name
|
||||
)
|
||||
pickle.dump(mixed_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
|
||||
experiments[i] = mixed_experiment
|
||||
|
||||
# Building history of fixpoint percentages for summary
|
||||
fixpoint_counters_history = mixed_experiment.fixpoint_counters_history
|
||||
if not fixpoints_percentages:
|
||||
fixpoints_percentages = mixed_experiment.fixpoint_counters_history
|
||||
else:
|
||||
# Using list comprehension to make the sum of all the percentages
|
||||
fixpoints_percentages = [fixpoints_percentages[i] + fixpoint_counters_history[i] for i in
|
||||
range(len(fixpoints_percentages))]
|
||||
|
||||
# Building a summary of all the runs
|
||||
directory_name = f"experiments/mixed/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{str(name_hash)}"
|
||||
os.mkdir(directory_name)
|
||||
|
||||
summary_pre_title = "mixed"
|
||||
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
|
||||
summary_pre_title)
|
||||
summary_fixpoint_percentage(runs, epochs, fixpoints_percentages, ST_steps_between_SA, SA_steps, directory_name,
|
||||
population_size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,151 +0,0 @@
|
||||
import copy
|
||||
import os.path
|
||||
import pickle
|
||||
import random
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_experiment
|
||||
from functionalities_test import test_for_fixpoints, is_identity_function
|
||||
from network import Net
|
||||
from visualization import bar_chart_fixpoints, box_plot, write_file
|
||||
|
||||
|
||||
def add_noise(input_data, epsilon=pow(10, -5)):
|
||||
|
||||
output = copy.deepcopy(input_data)
|
||||
for k in range(len(input_data)):
|
||||
output[k][0] += random.random() * epsilon
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class RobustnessExperiment:
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
ST_steps, directory_name) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
|
||||
self.net_learning_rate = net_learning_rate
|
||||
|
||||
self.ST_steps = ST_steps
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
self.id_functions = []
|
||||
|
||||
self.directory_name = directory_name
|
||||
os.mkdir(self.directory_name)
|
||||
|
||||
self.nets = []
|
||||
# Create population:
|
||||
self.populate_environment()
|
||||
print("Nets:\n", self.nets)
|
||||
|
||||
self.count_fixpoints()
|
||||
[print(net.is_fixpoint) for net in self.nets]
|
||||
self.test_robustness()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating robustness experiment %s" % i)
|
||||
|
||||
net_name = f"net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
self.nets.append(net)
|
||||
|
||||
def test_robustness(self):
|
||||
# test_for_fixpoints(self.fixpoint_counters, self.nets, self.id_functions)
|
||||
|
||||
zero_epsilon = pow(10, -5)
|
||||
data = [[0 for _ in range(10)] for _ in range(len(self.id_functions))]
|
||||
|
||||
for i in range(len(self.id_functions)):
|
||||
for j in range(10):
|
||||
original_net = self.id_functions[i]
|
||||
|
||||
# Creating a clone of the network. Not by copying it, but by creating a completely new network
|
||||
# and changing its weights to the original ones.
|
||||
original_net_clone = Net(original_net.input_size, original_net.hidden_size, original_net.out_size,
|
||||
original_net.name)
|
||||
# Extra safety for the value of the weights
|
||||
original_net_clone.load_state_dict(copy.deepcopy(original_net.state_dict()))
|
||||
|
||||
noisy_weights = add_noise(original_net_clone.input_weight_matrix(), epsilon=pow(10, -j))
|
||||
original_net_clone.apply_weights(noisy_weights)
|
||||
|
||||
# Testing if the new net is still an identity function after applying noise
|
||||
still_id_func = is_identity_function(original_net_clone, zero_epsilon)
|
||||
|
||||
# If the net is still an id. func. after applying the first run of noise, continue to apply it until otherwise
|
||||
while still_id_func and data[i][j] <= 1000:
|
||||
data[i][j] += 1
|
||||
|
||||
original_net_clone = original_net_clone.self_application(1, self.log_step_size)
|
||||
|
||||
still_id_func = is_identity_function(original_net_clone, zero_epsilon)
|
||||
|
||||
print(f"Data {data}")
|
||||
|
||||
if data.count(0) == 10:
|
||||
print(f"There is no network resisting the robustness test.")
|
||||
text = f"For this population of \n {self.population_size} networks \n there is no" \
|
||||
f" network resisting the robustness test."
|
||||
write_file(text, self.directory_name)
|
||||
else:
|
||||
box_plot(data, self.directory_name, self.population_size)
|
||||
|
||||
def count_fixpoints(self):
|
||||
exp_details = f"ST steps: {self.ST_steps}"
|
||||
|
||||
self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
|
||||
def run_robustness_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size,
|
||||
net_learning_rate, epochs, runs, run_name, name_hash):
|
||||
experiments = {}
|
||||
|
||||
check_folder("robustness")
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
ST_directory_name = f"experiments/robustness/{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
|
||||
robustness_experiment = RobustnessExperiment(
|
||||
population_size,
|
||||
batch_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
epochs,
|
||||
ST_directory_name
|
||||
)
|
||||
pickle.dump(robustness_experiment, open(f"{ST_directory_name}/full_experiment_pickle.p", "wb"))
|
||||
experiments[i] = robustness_experiment
|
||||
|
||||
# Building a summary of all the runs
|
||||
directory_name = f"experiments/robustness/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{str(name_hash)}"
|
||||
os.mkdir(directory_name)
|
||||
|
||||
summary_pre_title = "robustness"
|
||||
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
|
||||
summary_pre_title)
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,120 +0,0 @@
|
||||
import os.path
|
||||
import pickle
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_experiment
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from network import Net
|
||||
from visualization import bar_chart_fixpoints
|
||||
from visualization import plot_3d_self_application
|
||||
|
||||
|
||||
class SelfApplicationExperiment:
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size,
|
||||
net_learning_rate, application_steps, train_nets, directory_name, training_steps
|
||||
) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.SA_steps = application_steps #
|
||||
|
||||
self.train_nets = train_nets
|
||||
self.ST_steps = training_steps
|
||||
|
||||
self.directory_name = directory_name
|
||||
os.mkdir(self.directory_name)
|
||||
|
||||
""" Creating the nets & making the SA steps & (maybe) also training the networks. """
|
||||
self.nets = []
|
||||
# Create population:
|
||||
self.populate_environment()
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating SA experiment %s" % i)
|
||||
|
||||
net_name = f"SA_net_{str(i)}"
|
||||
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name
|
||||
)
|
||||
for _ in range(self.SA_steps):
|
||||
input_data = net.input_weight_matrix()
|
||||
target_data = net.create_target_weights(input_data)
|
||||
|
||||
if self.train_nets == "before_SA":
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
net.self_application(self.SA_steps, self.log_step_size)
|
||||
elif self.train_nets == "after_SA":
|
||||
net.self_application(self.SA_steps, self.log_step_size)
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
else:
|
||||
net.self_application(self.SA_steps, self.log_step_size)
|
||||
|
||||
self.nets.append(net)
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"SA_{str(len(self.nets))}_nets_3d_weights_PCA"
|
||||
plot_3d_self_application(self.nets, exp_name, self.directory_name, self.log_step_size)
|
||||
|
||||
def count_fixpoints(self):
|
||||
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
exp_details = f"{self.SA_steps} SA steps"
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
|
||||
def run_SA_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size,
|
||||
net_learning_rate, runs, run_name, name_hash, application_steps, train_nets, training_steps):
|
||||
experiments = {}
|
||||
|
||||
check_folder("self_application")
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
directory_name = f"experiments/self_application/{run_name}_run_{i}_{str(population_size)}_nets_{application_steps}_SA_{str(name_hash)}"
|
||||
|
||||
SA_experiment = SelfApplicationExperiment(
|
||||
population_size,
|
||||
batch_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
application_steps,
|
||||
train_nets,
|
||||
directory_name,
|
||||
training_steps
|
||||
)
|
||||
pickle.dump(SA_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
|
||||
experiments[i] = SA_experiment
|
||||
|
||||
# Building a summary of all the runs
|
||||
directory_name = f"experiments/self_application/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{application_steps}_SA_{str(name_hash)}"
|
||||
os.mkdir(directory_name)
|
||||
|
||||
summary_pre_title = "SA"
|
||||
summary_fixpoint_experiment(runs, population_size, application_steps, experiments, net_learning_rate,
|
||||
directory_name,
|
||||
summary_pre_title)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,116 +0,0 @@
|
||||
import os.path
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_experiment
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from network import Net
|
||||
from visualization import plot_loss, bar_chart_fixpoints
|
||||
from visualization import plot_3d_self_train
|
||||
|
||||
|
||||
|
||||
class SelfTrainExperiment:
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, directory_name) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.epochs = epochs
|
||||
|
||||
self.loss_history = []
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
self.directory_name = directory_name
|
||||
os.mkdir(self.directory_name)
|
||||
|
||||
self.nets = []
|
||||
# Create population:
|
||||
self.populate_environment()
|
||||
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
self.visualize_loss()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating ST experiment %s" % i)
|
||||
|
||||
net_name = f"ST_net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
|
||||
for _ in range(self.epochs):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
print(f"\nLast weight matrix (epoch: {self.epochs}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
|
||||
self.nets.append(net)
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
|
||||
return plot_3d_self_train(self.nets, exp_name, self.directory_name, self.log_step_size)
|
||||
|
||||
def count_fixpoints(self):
|
||||
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
exp_details = f"Self-train for {self.epochs} epochs"
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.nets)):
|
||||
net_loss_history = self.nets[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
|
||||
plot_loss(self.loss_history, self.directory_name)
|
||||
|
||||
|
||||
def run_ST_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, runs, run_name, name_hash):
|
||||
experiments = {}
|
||||
logging_directory = Path('output') / 'self_training'
|
||||
logging_directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
experiment_name = f"{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
this_exp_directory = logging_directory / experiment_name
|
||||
ST_experiment = SelfTrainExperiment(
|
||||
population_size,
|
||||
batch_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
epochs,
|
||||
this_exp_directory
|
||||
)
|
||||
with (this_exp_directory / 'full_experiment_pickle.p').open('wb') as f:
|
||||
pickle.dump(ST_experiment, f)
|
||||
experiments[i] = ST_experiment
|
||||
|
||||
# Building a summary of all the runs
|
||||
summary_name = f"/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
summary_directory_name = logging_directory / summary_name
|
||||
summary_directory_name.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
summary_pre_title = "ST"
|
||||
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, summary_directory_name,
|
||||
summary_pre_title)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,114 +0,0 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_experiment
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from network import SecondaryNet
|
||||
from visualization import plot_loss, bar_chart_fixpoints
|
||||
from visualization import plot_3d_self_train
|
||||
|
||||
|
||||
class SelfTrainExperimentSecondary:
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, directory: Path) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.epochs = epochs
|
||||
|
||||
self.loss_history = []
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
self.directory_name = Path(directory)
|
||||
self.directory_name.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.nets = []
|
||||
# Create population:
|
||||
self.populate_environment()
|
||||
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
self.visualize_loss()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating ST experiment %s" % i)
|
||||
|
||||
net_name = f"ST_net_{str(i)}"
|
||||
net = SecondaryNet(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
|
||||
for _ in range(self.epochs):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
print(f"\nLast weight matrix (epoch: {self.epochs}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
|
||||
self.nets.append(net)
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
|
||||
return plot_3d_self_train(self.nets, exp_name, self.directory_name, self.log_step_size)
|
||||
|
||||
def count_fixpoints(self):
|
||||
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
exp_details = f"Self-train for {self.epochs} epochs"
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.nets)):
|
||||
net_loss_history = self.nets[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
|
||||
plot_loss(self.loss_history, self.directory_name)
|
||||
|
||||
|
||||
def run_ST_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, runs, run_name, name_hash):
|
||||
experiments = {}
|
||||
logging_directory = Path('output') / 'self_training'
|
||||
logging_directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
experiment_name = f"{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
this_exp_directory = logging_directory / experiment_name
|
||||
ST_experiment = SelfTrainExperimentSecondary(
|
||||
population_size,
|
||||
batch_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
epochs,
|
||||
this_exp_directory
|
||||
)
|
||||
with (this_exp_directory / 'full_experiment_pickle.p').open('wb') as f:
|
||||
pickle.dump(ST_experiment, f)
|
||||
experiments[i] = ST_experiment
|
||||
|
||||
# Building a summary of all the runs
|
||||
summary_name = f"/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
summary_directory_name = logging_directory / summary_name
|
||||
summary_directory_name.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
summary_pre_title = "ST"
|
||||
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, summary_directory_name,
|
||||
summary_pre_title)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
raise NotImplementedError('Test this here!!!')
|
@ -1,190 +0,0 @@
|
||||
import random
|
||||
import os.path
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.helpers import check_folder, summary_fixpoint_percentage, summary_fixpoint_experiment
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from network import Net
|
||||
from visualization import plot_loss, bar_chart_fixpoints, plot_3d_soup, line_chart_fixpoints
|
||||
|
||||
|
||||
class SoupExperiment:
|
||||
def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, attack_chance,
|
||||
train_nets, ST_steps, epochs, log_step_size, directory: Union[str, Path]):
|
||||
super().__init__()
|
||||
self.population_size = population_size
|
||||
|
||||
self.net_input_size = net_i_size
|
||||
self.net_hidden_size = net_h_size
|
||||
self.net_out_size = net_o_size
|
||||
self.net_learning_rate = learning_rate
|
||||
self.attack_chance = attack_chance
|
||||
self.train_nets = train_nets
|
||||
# self.SA_steps = SA_steps
|
||||
self.ST_steps = ST_steps
|
||||
self.epochs = epochs
|
||||
self.log_step_size = log_step_size
|
||||
|
||||
self.loss_history = []
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
# <self.fixpoint_counters_history> is used for keeping track of the amount of fixpoints in %
|
||||
self.fixpoint_counters_history = []
|
||||
|
||||
self.directory = Path(directory)
|
||||
self.directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.population = []
|
||||
self.populate_environment()
|
||||
|
||||
self.evolve()
|
||||
self.fixpoint_percentage()
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
self.visualize_loss()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in tqdm(range(self.population_size)):
|
||||
loop_population_size.set_description("Populating soup experiment %s" % i)
|
||||
|
||||
net_name = f"soup_network_{i}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
self.population.append(net)
|
||||
|
||||
def population_self_train(self):
|
||||
# Self-training each network in the population
|
||||
for j in range(self.population_size):
|
||||
net = self.population[j]
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
def population_attack(self):
|
||||
# A network attacking another network with a given percentage
|
||||
if random.randint(1, 100) <= self.attack_chance:
|
||||
random_net1, random_net2 = random.sample(range(self.population_size), 2)
|
||||
random_net1 = self.population[random_net1]
|
||||
random_net2 = self.population[random_net2]
|
||||
print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
|
||||
random_net1.attack(random_net2)
|
||||
|
||||
def evolve(self):
|
||||
""" Evolving consists of attacking & self-training. """
|
||||
|
||||
loop_epochs = tqdm(range(self.epochs))
|
||||
for i in loop_epochs:
|
||||
loop_epochs.set_description("Evolving soup %s" % i)
|
||||
|
||||
# A network attacking another network with a given percentage
|
||||
self.population_attack()
|
||||
|
||||
# Self-training each network in the population
|
||||
self.population_self_train()
|
||||
|
||||
# Testing for fixpoints after each batch of ST steps to see relevant data
|
||||
if i % self.ST_steps == 0:
|
||||
test_for_fixpoints(self.fixpoint_counters, self.population)
|
||||
fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
|
||||
self.fixpoint_counters_history.append(fixpoints_percentage)
|
||||
|
||||
# Resetting the fixpoint counter. Last iteration not to be reset -
|
||||
# it is important for the bar_chart_fixpoints().
|
||||
if i < self.epochs:
|
||||
self.reset_fixpoint_counters()
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"soup_{self.population_size}_nets_{self.ST_steps}_training_{self.epochs}_epochs"
|
||||
return plot_3d_soup(self.population, exp_name, self.directory)
|
||||
|
||||
def count_fixpoints(self):
|
||||
test_for_fixpoints(self.fixpoint_counters, self.population)
|
||||
exp_details = f"Evolution steps: {self.epochs} epochs"
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def fixpoint_percentage(self):
|
||||
runs = self.epochs / self.ST_steps
|
||||
SA_steps = None
|
||||
line_chart_fixpoints(self.fixpoint_counters_history, runs, self.ST_steps, SA_steps, self.directory,
|
||||
self.population_size)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.population)):
|
||||
net_loss_history = self.population[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
|
||||
plot_loss(self.loss_history, self.directory)
|
||||
|
||||
def reset_fixpoint_counters(self):
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
|
||||
def run_soup_experiment(population_size, attack_chance, net_input_size, net_hidden_size, net_out_size,
|
||||
net_learning_rate, epochs, batch_size, runs, run_name, name_hash, ST_steps, train_nets):
|
||||
experiments = {}
|
||||
fixpoints_percentages = []
|
||||
|
||||
check_folder("soup")
|
||||
|
||||
# Running the experiments
|
||||
for i in range(runs):
|
||||
# FIXME: Make this a pathlib.Path() Operation
|
||||
directory_name = f"experiments/soup/{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
|
||||
soup_experiment = SoupExperiment(
|
||||
population_size,
|
||||
net_input_size,
|
||||
net_hidden_size,
|
||||
net_out_size,
|
||||
net_learning_rate,
|
||||
attack_chance,
|
||||
train_nets,
|
||||
ST_steps,
|
||||
epochs,
|
||||
batch_size,
|
||||
directory_name
|
||||
)
|
||||
pickle.dump(soup_experiment, open(f"{directory_name}/full_experiment_pickle.p", "wb"))
|
||||
experiments[i] = soup_experiment
|
||||
|
||||
# Building history of fixpoint percentages for summary
|
||||
fixpoint_counters_history = soup_experiment.fixpoint_counters_history
|
||||
if not fixpoints_percentages:
|
||||
fixpoints_percentages = soup_experiment.fixpoint_counters_history
|
||||
else:
|
||||
# Using list comprehension to make the sum of all the percentages
|
||||
fixpoints_percentages = [fixpoints_percentages[i] + fixpoint_counters_history[i] for i in
|
||||
range(len(fixpoints_percentages))]
|
||||
|
||||
# Creating a folder for the summary of the current runs
|
||||
# FIXME: Make this a pathlib.Path() Operation
|
||||
directory_name = f"experiments/soup/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||
os.mkdir(directory_name)
|
||||
|
||||
# Building a summary of all the runs
|
||||
summary_pre_title = "soup"
|
||||
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, directory_name,
|
||||
summary_pre_title)
|
||||
SA_steps = None
|
||||
summary_fixpoint_percentage(runs, epochs, fixpoints_percentages, ST_steps, SA_steps, directory_name,
|
||||
population_size)
|
||||
|
@ -1,50 +0,0 @@
|
||||
import random
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.soup_exp import SoupExperiment
|
||||
from functionalities_test import test_for_fixpoints
|
||||
|
||||
|
||||
class MeltingSoupExperiment(SoupExperiment):
|
||||
|
||||
def __init__(self, melt_chance, *args, keep_population_size=True, **kwargs):
|
||||
super(MeltingSoupExperiment, self).__init__(*args, **kwargs)
|
||||
self.keep_population_size = keep_population_size
|
||||
self.melt_chance = melt_chance
|
||||
|
||||
def population_melt(self):
|
||||
# A network melting with another network by a given percentage
|
||||
if random.randint(1, 100) <= self.melt_chance:
|
||||
random_net1_idx, random_net2_idx, destroy_idx = random.sample(range(self.population_size), 3)
|
||||
random_net1 = self.population[random_net1_idx]
|
||||
random_net2 = self.population[random_net2_idx]
|
||||
print(f"\n Melt: {random_net1.name} -> {random_net2.name}")
|
||||
melted_network = random_net1.melt(random_net2)
|
||||
if self.keep_population_size:
|
||||
del self.population[destroy_idx]
|
||||
self.population.append(melted_network)
|
||||
|
||||
def evolve(self):
|
||||
""" Evolving consists of attacking, melting & self-training. """
|
||||
|
||||
loop_epochs = tqdm(range(self.epochs))
|
||||
for i in loop_epochs:
|
||||
loop_epochs.set_description("Evolving soup %s" % i)
|
||||
|
||||
self.population_attack()
|
||||
|
||||
self.population_melt()
|
||||
|
||||
self.population_self_train()
|
||||
|
||||
# Testing for fixpoints after each batch of ST steps to see relevant data
|
||||
if i % self.ST_steps == 0:
|
||||
test_for_fixpoints(self.fixpoint_counters, self.population)
|
||||
fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
|
||||
self.fixpoint_counters_history.append(fixpoints_percentage)
|
||||
|
||||
# Resetting the fixpoint counter. Last iteration not to be reset -
|
||||
# it is important for the bar_chart_fixpoints().
|
||||
if i < self.epochs:
|
||||
self.reset_fixpoint_counters()
|
@ -1,203 +0,0 @@
|
||||
import copy
|
||||
import itertools
|
||||
from pathlib import Path
|
||||
import random
|
||||
import pickle
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from functionalities_test import is_identity_function, test_status
|
||||
from journal_basins import SpawnExperiment, mean_invariate_manhattan_distance
|
||||
from network import Net
|
||||
|
||||
from sklearn.metrics import mean_absolute_error as MAE
|
||||
from sklearn.metrics import mean_squared_error as MSE
|
||||
|
||||
|
||||
class SpawnLinspaceExperiment(SpawnExperiment):
|
||||
|
||||
def spawn_and_continue(self, number_clones: int = None):
|
||||
number_clones = number_clones or self.nr_clones
|
||||
|
||||
df = pd.DataFrame(
|
||||
columns=['clone', 'parent', 'parent2',
|
||||
'MAE_pre', 'MAE_post',
|
||||
'MSE_pre', 'MSE_post',
|
||||
'MIM_pre', 'MIM_post',
|
||||
'noise', 'status_pst'])
|
||||
|
||||
# For every initial net {i} after populating (that is fixpoint after first epoch);
|
||||
# parent = self.parents[0]
|
||||
# parent_clone = clone = Net(parent.input_size, parent.hidden_size, parent.out_size,
|
||||
# name=f"{parent.name}_clone_{0}", start_time=self.ST_steps)
|
||||
# parent_clone.apply_weights(torch.as_tensor(parent.create_target_weights(parent.input_weight_matrix())))
|
||||
# parent_clone = parent_clone.apply_noise(self.noise)
|
||||
# self.parents.append(parent_clone)
|
||||
pairwise_net_list = list(itertools.combinations(self.parents, 2))
|
||||
for net1, net2 in pairwise_net_list:
|
||||
# We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
|
||||
# to see full trajectory (but the clones will be very hard to see).
|
||||
# Make one target to compare distances to clones later when they have trained.
|
||||
net1.start_time = self.ST_steps - 150
|
||||
net1_input_data = net1.input_weight_matrix().detach()
|
||||
net1_target_data = net1.create_target_weights(net1_input_data).detach()
|
||||
|
||||
net2.start_time = self.ST_steps - 150
|
||||
net2_input_data = net2.input_weight_matrix().detach()
|
||||
net2_target_data = net2.create_target_weights(net2_input_data).detach()
|
||||
|
||||
if is_identity_function(net1) and is_identity_function(net2):
|
||||
# if True:
|
||||
# Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
|
||||
# To plot clones starting after first epoch (z=ST_steps), set that as start_time!
|
||||
# To make sure PCA will plot the same trajectory up until this point, we clone the
|
||||
# parent-net's weight history as well.
|
||||
|
||||
in_between_weights = np.linspace(net1_target_data, net2_target_data, number_clones, endpoint=False)
|
||||
# in_between_weights = np.logspace(net1_target_data, net2_target_data, number_clones, endpoint=False)
|
||||
|
||||
for j, in_between_weight in enumerate(in_between_weights):
|
||||
clone = Net(net1.input_size, net1.hidden_size, net1.out_size,
|
||||
name=f"{net1.name}_{net2.name}_clone_{str(j)}", start_time=self.ST_steps + 100)
|
||||
clone.apply_weights(torch.as_tensor(in_between_weight))
|
||||
|
||||
clone.s_train_weights_history = copy.deepcopy(net1.s_train_weights_history)
|
||||
clone.number_trained = copy.deepcopy(net1.number_trained)
|
||||
|
||||
# Pre Training distances (after noise application of course)
|
||||
clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix()).detach()
|
||||
MAE_pre = MAE(net1_target_data, clone_pre_weights)
|
||||
MSE_pre = MSE(net1_target_data, clone_pre_weights)
|
||||
MIM_pre = mean_invariate_manhattan_distance(net1_target_data, clone_pre_weights)
|
||||
|
||||
try:
|
||||
# Then finish training each clone {j} (for remaining epoch-1 * ST_steps) ..
|
||||
for _ in range(self.epochs - 1):
|
||||
for _ in range(self.ST_steps):
|
||||
clone.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
if any([torch.isnan(x).any() for x in clone.parameters()]):
|
||||
raise ValueError
|
||||
except ValueError:
|
||||
print("Ran into nan in 'in beetween weights' array.")
|
||||
df.loc[len(df)] = [j, net1.name, net2.name,
|
||||
MAE_pre, 0,
|
||||
MSE_pre, 0,
|
||||
MIM_pre, 0,
|
||||
self.noise, clone.is_fixpoint]
|
||||
continue
|
||||
|
||||
# Post Training distances for comparison
|
||||
clone_post_weights = clone.create_target_weights(clone.input_weight_matrix()).detach()
|
||||
MAE_post = MAE(net1_target_data, clone_post_weights)
|
||||
MSE_post = MSE(net1_target_data, clone_post_weights)
|
||||
MIM_post = mean_invariate_manhattan_distance(net1_target_data, clone_post_weights)
|
||||
|
||||
# .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
|
||||
test_status(clone)
|
||||
if is_identity_function(clone):
|
||||
print(f"Clone {j} (between {net1.name} and {net2.name}) is fixpoint."
|
||||
f"\nMSE({net1.name},{j}): {MSE_post}"
|
||||
f"\nMAE({net1.name},{j}): {MAE_post}"
|
||||
f"\nMIM({net1.name},{j}): {MIM_post}\n")
|
||||
self.nets.append(clone)
|
||||
|
||||
df.loc[len(df)] = [j, net1.name, net2.name,
|
||||
MAE_pre, MAE_post,
|
||||
MSE_pre, MSE_post,
|
||||
MIM_pre, MIM_post,
|
||||
self.noise, clone.is_fixpoint]
|
||||
|
||||
for net1, net2 in pairwise_net_list:
|
||||
try:
|
||||
value = 'MAE'
|
||||
c_selector = [f'{value}_pre', f'{value}_post']
|
||||
values = df.loc[(df['parent'] == net1.name) & (df['parent2'] == net2.name)][c_selector]
|
||||
this_min, this_max = values.values.min(), values.values.max()
|
||||
df.loc[(df['parent'] == net1.name) &
|
||||
(df['parent2'] == net2.name), c_selector] = (values - this_min) / (this_max - this_min)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
for parent in self.parents:
|
||||
for _ in range(self.epochs - 1):
|
||||
for _ in range(self.ST_steps):
|
||||
parent.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
self.df = df
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
|
||||
# Define number of runs & name:
|
||||
ST_runs = 1
|
||||
ST_runs_name = "test-27"
|
||||
ST_steps = 2000
|
||||
ST_epochs = 2
|
||||
ST_log_step_size = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
nr_clones = 25
|
||||
ST_population_size = 10
|
||||
ST_net_hidden_size = 2
|
||||
ST_net_learning_rate = 0.04
|
||||
ST_name_hash = random.getrandbits(32)
|
||||
|
||||
print(f"Running the Spawn experiment:")
|
||||
exp = SpawnLinspaceExperiment(
|
||||
population_size=ST_population_size,
|
||||
log_step_size=ST_log_step_size,
|
||||
net_input_size=NET_INPUT_SIZE,
|
||||
net_hidden_size=ST_net_hidden_size,
|
||||
net_out_size=NET_OUT_SIZE,
|
||||
net_learning_rate=ST_net_learning_rate,
|
||||
epochs=ST_epochs,
|
||||
st_steps=ST_steps,
|
||||
nr_clones=nr_clones,
|
||||
noise=1e-8,
|
||||
directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'linage'
|
||||
)
|
||||
df = exp.df
|
||||
|
||||
directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}' / 'linage'
|
||||
with (directory / f"experiment_pickle_{ST_name_hash}.p").open('wb') as f:
|
||||
pickle.dump(exp, f)
|
||||
print(f"\nSaved experiment to {directory}.")
|
||||
|
||||
# Boxplot with counts of nr_fixpoints, nr_other, nr_etc. on y-axis
|
||||
# sns.countplot(data=df, x="noise", hue="status_post")
|
||||
# plt.savefig(f"output/spawn_basin/{ST_name_hash}/fixpoint_status_countplot.png")
|
||||
|
||||
# Catplot (either kind="point" or "box") that shows before-after training distances to parent
|
||||
# mlt = df[["MIM_pre", "MIM_post", "noise"]].melt("noise", var_name="time", value_name='Average Distance')
|
||||
# sns.catplot(data=mlt, x="time", y="Average Distance", col="noise", kind="point", col_wrap=5, sharey=False)
|
||||
# plt.savefig(f"output/spawn_basin/{ST_name_hash}/clone_distance_catplot.png")
|
||||
|
||||
# Pointplot with pre and after parent Distances
|
||||
import seaborn as sns
|
||||
from matplotlib import pyplot as plt, ticker
|
||||
|
||||
# ptplt = sns.pointplot(data=exp.df, x='MAE_pre', y='MAE_post', join=False)
|
||||
ptplt = sns.scatterplot(x=exp.df['MAE_pre'], y=exp.df['MAE_post'])
|
||||
# ptplt.set(xscale='log', yscale='log')
|
||||
x0, x1 = ptplt.axes.get_xlim()
|
||||
y0, y1 = ptplt.axes.get_ylim()
|
||||
lims = [max(x0, y0), min(x1, y1)]
|
||||
# This is the x=y line using transforms
|
||||
ptplt.plot(lims, lims, 'w', linestyle='dashdot', transform=ptplt.axes.transData)
|
||||
ptplt.plot([0, 1], [0, 1], ':k', transform=ptplt.axes.transAxes)
|
||||
ptplt.set(xlabel='Mean Absolute Distance before Self-Training',
|
||||
ylabel='Mean Absolute Distance after Self-Training')
|
||||
# ptplt.axes.xaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: round(float(x), 2)))
|
||||
# ptplt.xticks(rotation=45)
|
||||
#for ind, label in enumerate(ptplt.get_xticklabels()):
|
||||
# if ind % 10 == 0: # every 10th label is kept
|
||||
# label.set_visible(True)
|
||||
# else:
|
||||
# label.set_visible(False)
|
||||
|
||||
filepath = exp.directory / 'mim_dist_plot.pdf'
|
||||
plt.tight_layout()
|
||||
plt.savefig(filepath, dpi=600, format='pdf', bbox_inches='tight')
|
@ -1,315 +0,0 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
from tqdm import tqdm
|
||||
import random
|
||||
import copy
|
||||
from functionalities_test import is_identity_function, test_status
|
||||
from network import Net
|
||||
from visualization import plot_3d_self_train, plot_loss
|
||||
import numpy as np
|
||||
from tabulate import tabulate
|
||||
from sklearn.metrics import mean_absolute_error as MAE
|
||||
from sklearn.metrics import mean_squared_error as MSE
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from matplotlib import pyplot as plt
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
def prng():
|
||||
return random.random()
|
||||
|
||||
def l1(tup):
|
||||
a, b = tup
|
||||
return abs(a - b)
|
||||
|
||||
|
||||
def mean_invariate_manhattan_distance(x, y):
|
||||
# One of these one-liners that might be smart or really dumb. Goal is to find pairwise
|
||||
# distances of ascending values, ie. sum (abs(min1_X-min1_Y), abs(min2_X-min2Y) ...) / mean.
|
||||
# Idea was to find weight sets that have same values but just in different positions, that would
|
||||
# make this distance 0.
|
||||
try:
|
||||
return np.mean(list(map(l1, zip(sorted(x.detach().numpy()), sorted(y.detach().numpy())))))
|
||||
except AttributeError:
|
||||
return np.mean(list(map(l1, zip(sorted(x.numpy()), sorted(y.numpy())))))
|
||||
|
||||
|
||||
def distance_matrix(nets, distance="MIM", print_it=True):
|
||||
matrix = [[0 for _ in range(len(nets))] for _ in range(len(nets))]
|
||||
for net in range(len(nets)):
|
||||
weights = nets[net].input_weight_matrix()[:, 0]
|
||||
for other_net in range(len(nets)):
|
||||
other_weights = nets[other_net].input_weight_matrix()[:, 0]
|
||||
if distance in ["MSE"]:
|
||||
matrix[net][other_net] = MSE(weights, other_weights)
|
||||
elif distance in ["MAE"]:
|
||||
matrix[net][other_net] = MAE(weights, other_weights)
|
||||
elif distance in ["MIM"]:
|
||||
matrix[net][other_net] = mean_invariate_manhattan_distance(weights, other_weights)
|
||||
|
||||
if print_it:
|
||||
print(f"\nDistance matrix (all to all) [{distance}]:")
|
||||
headers = [i.name for i in nets]
|
||||
print(tabulate(matrix, showindex=headers, headers=headers, tablefmt='orgtbl'))
|
||||
return matrix
|
||||
|
||||
|
||||
def distance_from_parent(nets, distance="MIM", print_it=True):
|
||||
list_of_matrices = []
|
||||
parents = list(filter(lambda x: "clone" not in x.name and is_identity_function(x), nets))
|
||||
distance_range = range(10)
|
||||
for parent in parents:
|
||||
parent_weights = parent.create_target_weights(parent.input_weight_matrix())
|
||||
clones = list(filter(lambda y: parent.name in y.name and parent.name != y.name, nets))
|
||||
matrix = [[0 for _ in distance_range] for _ in range(len(clones))]
|
||||
|
||||
for dist in distance_range:
|
||||
for idx, clone in enumerate(clones):
|
||||
clone_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
if distance in ["MSE"]:
|
||||
matrix[idx][dist] = MSE(parent_weights, clone_weights) < pow(10, -dist)
|
||||
elif distance in ["MAE"]:
|
||||
matrix[idx][dist] = MAE(parent_weights, clone_weights) < pow(10, -dist)
|
||||
elif distance in ["MIM"]:
|
||||
matrix[idx][dist] = mean_invariate_manhattan_distance(parent_weights, clone_weights) < pow(10,
|
||||
-dist)
|
||||
|
||||
if print_it:
|
||||
print(f"\nDistances from parent {parent.name} [{distance}]:")
|
||||
col_headers = [str(f"10e-{d}") for d in distance_range]
|
||||
row_headers = [str(f"clone_{i}") for i in range(len(clones))]
|
||||
print(tabulate(matrix, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
|
||||
list_of_matrices.append(matrix)
|
||||
|
||||
return list_of_matrices
|
||||
|
||||
|
||||
class SpawnExperiment:
|
||||
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, st_steps, nr_clones, noise, directory) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.epochs = epochs
|
||||
self.ST_steps = st_steps
|
||||
self.loss_history = []
|
||||
self.nets = []
|
||||
self.nr_clones = nr_clones
|
||||
self.noise = noise or 10e-5
|
||||
print("\nNOISE:", self.noise)
|
||||
|
||||
self.parents = []
|
||||
|
||||
self.directory = Path(directory)
|
||||
self.directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.populate_environment()
|
||||
self.spawn_and_continue()
|
||||
self.weights_evolution_3d_experiment()
|
||||
# self.visualize_loss()
|
||||
self.distance_matrix = distance_matrix(self.nets, print_it=False)
|
||||
self.parent_clone_distances = distance_from_parent(self.nets, print_it=False)
|
||||
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating experiment %s" % i)
|
||||
|
||||
net_name = f"ST_net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
self.nets.append(net)
|
||||
self.parents.append(net)
|
||||
|
||||
def spawn_and_continue(self, number_clones: int = None):
|
||||
number_clones = number_clones or self.nr_clones
|
||||
|
||||
df = pd.DataFrame(
|
||||
columns=['name', 'MAE_pre', 'MAE_post', 'MSE_pre', 'MSE_post', 'MIM_pre', 'MIM_post', 'noise',
|
||||
'status_post'])
|
||||
|
||||
# For every initial net {i} after populating (that is fixpoint after first epoch);
|
||||
for i in range(self.population_size):
|
||||
net = self.nets[i]
|
||||
# We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
|
||||
# to see full trajectory (but the clones will be very hard to see).
|
||||
# Make one target to compare distances to clones later when they have trained.
|
||||
net.start_time = self.ST_steps - 350
|
||||
net_input_data = net.input_weight_matrix()
|
||||
net_target_data = net.create_target_weights(net_input_data)
|
||||
|
||||
if is_identity_function(net):
|
||||
print(f"\nNet {i} is fixpoint")
|
||||
|
||||
# Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
|
||||
# To plot clones starting after first epoch (z=ST_steps), set that as start_time!
|
||||
# To make sure PCA will plot the same trajectory up until this point, we clone the
|
||||
# parent-net's weight history as well.
|
||||
for j in range(number_clones):
|
||||
clone = Net(net.input_size, net.hidden_size, net.out_size,
|
||||
f"ST_net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps)
|
||||
clone.load_state_dict(copy.deepcopy(net.state_dict()))
|
||||
rand_noise = prng() * self.noise
|
||||
clone = clone.apply_noise(rand_noise)
|
||||
clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history)
|
||||
clone.number_trained = copy.deepcopy(net.number_trained)
|
||||
|
||||
# Pre Training distances (after noise application of course)
|
||||
clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
MAE_pre = MAE(net_target_data, clone_pre_weights)
|
||||
MSE_pre = MSE(net_target_data, clone_pre_weights)
|
||||
MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)
|
||||
|
||||
# Then finish training each clone {j} (for remaining epoch-1 * ST_steps) ..
|
||||
for _ in range(self.epochs - 1):
|
||||
for _ in range(self.ST_steps):
|
||||
clone.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
# Post Training distances for comparison
|
||||
clone_post_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
MAE_post = MAE(net_target_data, clone_post_weights)
|
||||
MSE_post = MSE(net_target_data, clone_post_weights)
|
||||
MIM_post = mean_invariate_manhattan_distance(net_target_data, clone_post_weights)
|
||||
|
||||
# .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
|
||||
test_status(clone)
|
||||
if is_identity_function(clone):
|
||||
print(f"Clone {j} (of net_{i}) is fixpoint."
|
||||
f"\nMSE({i},{j}): {MSE_post}"
|
||||
f"\nMAE({i},{j}): {MAE_post}"
|
||||
f"\nMIM({i},{j}): {MIM_post}\n")
|
||||
self.nets.append(clone)
|
||||
|
||||
df.loc[clone.name] = [clone.name, MAE_pre, MAE_post, MSE_pre, MSE_post, MIM_pre, MIM_post, self.noise, clone.is_fixpoint]
|
||||
|
||||
# Finally take parent net {i} and finish it's training for comparison to clone development.
|
||||
for _ in range(self.epochs - 1):
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
net_weights_after = net.create_target_weights(net.input_weight_matrix())
|
||||
print(f"Parent net's distance to original position."
|
||||
f"\nMSE(OG,new): {MAE(net_target_data, net_weights_after)}"
|
||||
f"\nMAE(OG,new): {MSE(net_target_data, net_weights_after)}"
|
||||
f"\nMIM(OG,new): {mean_invariate_manhattan_distance(net_target_data, net_weights_after)}\n")
|
||||
|
||||
self.df = df
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
|
||||
return plot_3d_self_train(self.nets, exp_name, self.directory, self.log_step_size, plot_pca_together=True)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.nets)):
|
||||
net_loss_history = self.nets[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
plot_loss(self.loss_history, self.directory)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
|
||||
# Define number of runs & name:
|
||||
ST_runs = 1
|
||||
ST_runs_name = "test-27"
|
||||
ST_steps = 2500
|
||||
ST_epochs = 2
|
||||
ST_log_step_size = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
nr_clones = 10
|
||||
ST_population_size = 1
|
||||
ST_net_hidden_size = 2
|
||||
ST_net_learning_rate = 0.04
|
||||
ST_name_hash = random.getrandbits(32)
|
||||
|
||||
print(f"Running the Spawn experiment:")
|
||||
exp_list = []
|
||||
for noise_factor in range(2, 3):
|
||||
exp = SpawnExperiment(
|
||||
population_size=ST_population_size,
|
||||
log_step_size=ST_log_step_size,
|
||||
net_input_size=NET_INPUT_SIZE,
|
||||
net_hidden_size=ST_net_hidden_size,
|
||||
net_out_size=NET_OUT_SIZE,
|
||||
net_learning_rate=ST_net_learning_rate,
|
||||
epochs=ST_epochs,
|
||||
st_steps=ST_steps,
|
||||
nr_clones=nr_clones,
|
||||
noise=pow(10, -noise_factor),
|
||||
directory=Path('output') / 'spawn_basin' / f'{ST_name_hash}' / f'10e-{noise_factor}'
|
||||
)
|
||||
exp_list.append(exp)
|
||||
|
||||
directory = Path('output') / 'spawn_basin' / f'{ST_name_hash}'
|
||||
pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
|
||||
print(f"\nSaved experiment to {directory}.")
|
||||
|
||||
# Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent)
|
||||
df = pd.concat([exp.df for exp in exp_list])
|
||||
df = df.dropna().reset_index()
|
||||
df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"])/df.loc[i]["noise"] for i in range(len(df))]
|
||||
df["class"] = [ "approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))]
|
||||
|
||||
# Countplot of all fixpoint clone after training per class.
|
||||
ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=11.7/5.27, legend=False)
|
||||
ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15)
|
||||
ax.set_xticklabels(labels=('$\mathregular{10^{-10}}$', '$\mathregular{10^{-9}}$', '$\mathregular{10^{-8}}$', '$\mathregular{10^{-7}}$', '$\mathregular{10^{-6}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-4}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-2}}$', '$\mathregular{10^{-1}}$'), fontsize=15)
|
||||
plt.legend(bbox_to_anchor=(0.01, 0.85), loc=2, borderaxespad=0.)
|
||||
plt.legend(fontsize='large')
|
||||
plt.savefig(f"{directory}/clone_status_after_countplot_{ST_name_hash}.png")
|
||||
plt.clf()
|
||||
|
||||
# Catplot of before-after comparison of the clone's weights. Colors links depending on class (approaching, distancing, stationary (i.e., MAE=0)). Blue, orange and green are based on countplot above, should be save for colorblindness (see https://gist.github.com/mwaskom/b35f6ebc2d4b340b4f64a4e28e778486)-
|
||||
mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State", value_name="Distance")
|
||||
P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))]
|
||||
P = sns.color_palette(P, as_cmap=False)
|
||||
ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P, col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
|
||||
ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
|
||||
ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
|
||||
ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
|
||||
# plt.ticklabel_format(style='sci', axis='x')
|
||||
plt.savefig(f"{directory}/before_after_distance_catplot_{ST_name_hash}.png")
|
||||
plt.clf()
|
||||
|
||||
# Catplot of child_nets L1 Prediction "progress" compared to parents. Computes one round of accuracy first. If net is a parent net (not a clone), then we reset weights to timestep of cloning first (from the weight history). So 5k (end) -> 2.5k training (in this experiment, so careful with len(history)/2, this might only work here!)
|
||||
df_acc = pd.DataFrame(columns=["name", "noise", "l1_acc", "Network Type"])
|
||||
for i in range(len(exp_list)):
|
||||
noise = exp_list[i].noise
|
||||
print(f"\nNoise: {noise}")
|
||||
for network in exp_list[i].nets:
|
||||
is_parent = "clone" not in network.name
|
||||
if is_parent:
|
||||
network.apply_weights(torch.tensor(network.s_train_weights_history[int(len(network.s_train_weights_history)/2)][0]))
|
||||
input_data = network.input_weight_matrix()
|
||||
target_data = network.create_target_weights(input_data)
|
||||
predicted_values = network(input_data)
|
||||
mse_loss = F.mse_loss(target_data, predicted_values).item()
|
||||
l1_loss = F.l1_loss(target_data, predicted_values).item()
|
||||
|
||||
df_acc.loc[len(df_acc)+1] = [network.name, noise, l1_loss, "parents" if is_parent else "child_nets"]
|
||||
print("MSE:", mse_loss, "\t", "L1: ", l1_loss, "\t", network.name)
|
||||
|
||||
# Note: If there are outliers then showfliers=False is necessary or it will zoom way to far out. If parent and child_nets accuracy is too far apart this plot might not work (only shows either parents or part of the child_nets).
|
||||
ax = sns.catplot(data=df_acc, y="l1_acc", x="noise", hue="Network Type", kind="box", legend=False, showfliers=False, height=5.27, aspect=11.7/5.27, sharey=False)
|
||||
ax.map(plt.axhline, y=10**-6, ls='--')
|
||||
ax.map(plt.axhline, y=10**-7, ls='--')
|
||||
ax.set_axis_labels("Noise levels", "L1 Prediction Loss After Training", fontsize=15)
|
||||
ax.set_xticklabels(labels=('$\mathregular{10^{-10}}$', '$\mathregular{10^{-9}}$', '$\mathregular{10^{-8}}$', '$\mathregular{10^{-7}}$', '$\mathregular{10^{-6}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-4}}$', '$\mathregular{10^{-5}}$', '$\mathregular{10^{-2}}$', '$\mathregular{10^{-1}}$'), fontsize=15)
|
||||
plt.legend(bbox_to_anchor=(0.01, 0.85), loc=2, borderaxespad=0.)
|
||||
plt.legend(fontsize='large')
|
||||
plt.savefig(f"{directory}/parent_vs_children_accuracy_{ST_name_hash}.png")
|
||||
plt.clf()
|
@ -1,246 +0,0 @@
|
||||
import pickle
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
import random
|
||||
import copy
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from matplotlib.ticker import ScalarFormatter
|
||||
from tqdm import tqdm
|
||||
from tabulate import tabulate
|
||||
|
||||
from functionalities_test import is_identity_function, is_zero_fixpoint, test_for_fixpoints, is_divergent
|
||||
from network import Net
|
||||
from torch.nn import functional as F
|
||||
from visualization import plot_loss, bar_chart_fixpoints
|
||||
import seaborn as sns
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
def prng():
|
||||
return random.random()
|
||||
|
||||
|
||||
def generate_perfekt_synthetic_fixpoint_weights():
|
||||
return torch.tensor([[1.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0],
|
||||
[1.0], [0.0], [0.0], [0.0],
|
||||
[1.0], [0.0]
|
||||
], dtype=torch.float32)
|
||||
|
||||
|
||||
PALETTE = 10 * (
|
||||
"#377eb8",
|
||||
"#4daf4a",
|
||||
"#984ea3",
|
||||
"#e41a1c",
|
||||
"#ff7f00",
|
||||
"#a65628",
|
||||
"#f781bf",
|
||||
"#888888",
|
||||
"#a6cee3",
|
||||
"#b2df8a",
|
||||
"#cab2d6",
|
||||
"#fb9a99",
|
||||
"#fdbf6f",
|
||||
)
|
||||
|
||||
|
||||
class RobustnessComparisonExperiment:
|
||||
|
||||
@staticmethod
|
||||
def apply_noise(network, noise: int):
|
||||
# Changing the weights of a network to values + noise
|
||||
for layer_id, layer_name in enumerate(network.state_dict()):
|
||||
for line_id, line_values in enumerate(network.state_dict()[layer_name]):
|
||||
for weight_id, weight_value in enumerate(network.state_dict()[layer_name][line_id]):
|
||||
# network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
|
||||
if prng() < 0.5:
|
||||
network.state_dict()[layer_name][line_id][weight_id] = weight_value + noise
|
||||
else:
|
||||
network.state_dict()[layer_name][line_id][weight_id] = weight_value - noise
|
||||
|
||||
return network
|
||||
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, st_steps, synthetic, directory) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.epochs = epochs
|
||||
self.ST_steps = st_steps
|
||||
self.loss_history = []
|
||||
self.is_synthetic = synthetic
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
self.directory = Path(directory)
|
||||
self.directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.id_functions = []
|
||||
self.nets = self.populate_environment()
|
||||
self.count_fixpoints()
|
||||
self.time_to_vergence, self.time_as_fixpoint = self.test_robustness(
|
||||
seeds=population_size if self.is_synthetic else 1)
|
||||
|
||||
def populate_environment(self):
|
||||
nets = []
|
||||
if self.is_synthetic:
|
||||
''' Either use perfect / hand-constructed fixpoint ... '''
|
||||
net_name = f"net_{str(0)}_synthetic"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
net.apply_weights(generate_perfekt_synthetic_fixpoint_weights())
|
||||
nets.append(net)
|
||||
|
||||
else:
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating experiment %s" % i)
|
||||
|
||||
''' .. or use natural approach to train fixpoints from random initialisation. '''
|
||||
net_name = f"net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
for _ in range(self.epochs):
|
||||
net.self_train(self.ST_steps, self.log_step_size, self.net_learning_rate)
|
||||
nets.append(net)
|
||||
return nets
|
||||
|
||||
def test_robustness(self, print_it=True, noise_levels=10, seeds=10):
|
||||
assert (len(self.id_functions) == 1 and seeds > 1) or (len(self.id_functions) > 1 and seeds == 1)
|
||||
time_to_vergence = [[0 for _ in range(noise_levels)] for _ in
|
||||
range(seeds if self.is_synthetic else len(self.id_functions))]
|
||||
time_as_fixpoint = [[0 for _ in range(noise_levels)] for _ in
|
||||
range(seeds if self.is_synthetic else len(self.id_functions))]
|
||||
row_headers = []
|
||||
|
||||
# This checks wether to use synthetic setting with multiple seeds
|
||||
# or multi network settings with a singlee seed
|
||||
|
||||
df = pd.DataFrame(columns=['setting', 'Noise Level', 'Self Train Steps', 'absolute_loss',
|
||||
'Time to convergence', 'Time as fixpoint'])
|
||||
with tqdm(total=max(len(self.id_functions), seeds)) as pbar:
|
||||
for i, fixpoint in enumerate(self.id_functions): # 1 / n
|
||||
row_headers.append(fixpoint.name)
|
||||
for seed in range(seeds): # n / 1
|
||||
setting = seed if self.is_synthetic else i
|
||||
|
||||
for noise_level in range(noise_levels):
|
||||
steps = 0
|
||||
clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size,
|
||||
f"{fixpoint.name}_clone_noise_1e-{noise_level}")
|
||||
clone.load_state_dict(copy.deepcopy(fixpoint.state_dict()))
|
||||
clone = clone.apply_noise(pow(10, -noise_level))
|
||||
|
||||
while not is_zero_fixpoint(clone) and not is_divergent(clone):
|
||||
# -> before
|
||||
clone_weight_pre_application = clone.input_weight_matrix()
|
||||
target_data_pre_application = clone.create_target_weights(clone_weight_pre_application)
|
||||
|
||||
clone.self_application(1, self.log_step_size)
|
||||
time_to_vergence[setting][noise_level] += 1
|
||||
# -> after
|
||||
clone_weight_post_application = clone.input_weight_matrix()
|
||||
target_data_post_application = clone.create_target_weights(clone_weight_post_application)
|
||||
|
||||
absolute_loss = F.l1_loss(target_data_pre_application, target_data_post_application).item()
|
||||
|
||||
if is_identity_function(clone):
|
||||
time_as_fixpoint[setting][noise_level] += 1
|
||||
# When this raises a Type Error, we found a second order fixpoint!
|
||||
steps += 1
|
||||
|
||||
df.loc[df.shape[0]] = [setting, f'$\mathregular{{10^{{-{noise_level}}}}}$',
|
||||
steps, absolute_loss,
|
||||
time_to_vergence[setting][noise_level],
|
||||
time_as_fixpoint[setting][noise_level]]
|
||||
pbar.update(1)
|
||||
|
||||
# Get the measuremts at the highest time_time_to_vergence
|
||||
df_sorted = df.sort_values('Self Train Steps', ascending=False).drop_duplicates(['setting', 'Noise Level'])
|
||||
df_melted = df_sorted.reset_index().melt(id_vars=['setting', 'Noise Level', 'Self Train Steps'],
|
||||
value_vars=['Time to convergence', 'Time as fixpoint'],
|
||||
var_name="Measurement",
|
||||
value_name="Steps").sort_values('Noise Level')
|
||||
# Plotting
|
||||
# plt.rcParams.update({
|
||||
# "text.usetex": True,
|
||||
# "font.family": "sans-serif",
|
||||
# "font.size": 12,
|
||||
# "font.weight": 'bold',
|
||||
# "font.sans-serif": ["Helvetica"]})
|
||||
sns.set(style='whitegrid', font_scale=2)
|
||||
bf = sns.boxplot(data=df_melted, y='Steps', x='Noise Level', hue='Measurement', palette=PALETTE)
|
||||
synthetic = 'synthetic' if self.is_synthetic else 'natural'
|
||||
plt.tight_layout()
|
||||
|
||||
# sns.set(rc={'figure.figsize': (10, 50)})
|
||||
# bx = sns.catplot(data=df[df['absolute_loss'] < 1], y='absolute_loss', x='application_step', kind='box',
|
||||
# col='noise_level', col_wrap=3, showfliers=False)
|
||||
|
||||
filename = f"absolute_loss_perapplication_boxplot_grid_{'synthetic' if self.is_synthetic else 'wild'}.png"
|
||||
filepath = self.directory / filename
|
||||
plt.savefig(str(filepath))
|
||||
|
||||
if print_it:
|
||||
col_headers = [str(f"1e-{d}") for d in range(noise_levels)]
|
||||
|
||||
print(f"\nAppplications steps until divergence / zero: ")
|
||||
# print(tabulate(time_to_vergence, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
|
||||
print(f"\nTime as fixpoint: ")
|
||||
# print(tabulate(time_as_fixpoint, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
return time_as_fixpoint, time_to_vergence
|
||||
|
||||
def count_fixpoints(self):
|
||||
exp_details = f"ST steps: {self.ST_steps}"
|
||||
self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.nets)):
|
||||
net_loss_history = self.nets[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
plot_loss(self.loss_history, self.directory)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
|
||||
ST_steps = 1000
|
||||
ST_epochs = 5
|
||||
ST_log_step_size = 10
|
||||
ST_population_size = 10
|
||||
ST_net_hidden_size = 2
|
||||
ST_net_learning_rate = 0.004
|
||||
ST_name_hash = random.getrandbits(32)
|
||||
ST_synthetic = False
|
||||
|
||||
print(f"Running the robustness comparison experiment:")
|
||||
exp = RobustnessComparisonExperiment(
|
||||
population_size=ST_population_size,
|
||||
log_step_size=ST_log_step_size,
|
||||
net_input_size=NET_INPUT_SIZE,
|
||||
net_hidden_size=ST_net_hidden_size,
|
||||
net_out_size=NET_OUT_SIZE,
|
||||
net_learning_rate=ST_net_learning_rate,
|
||||
epochs=ST_epochs,
|
||||
st_steps=ST_steps,
|
||||
synthetic=ST_synthetic,
|
||||
directory=Path('output') / 'journal_robustness' / f'{ST_name_hash}'
|
||||
)
|
||||
|
||||
directory = Path('output') / 'journal_robustness' / f'{ST_name_hash}'
|
||||
pickle.dump(exp, open(f"{directory}/experiment_pickle_{ST_name_hash}.p", "wb"))
|
||||
print(f"\nSaved experiment to {directory}.")
|
@ -1,341 +0,0 @@
|
||||
import pickle
|
||||
|
||||
import random
|
||||
import copy
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import torch
|
||||
from matplotlib import pyplot as plt
|
||||
from sklearn.metrics import mean_absolute_error as MAE
|
||||
from sklearn.metrics import mean_squared_error as MSE
|
||||
from tabulate import tabulate
|
||||
from tqdm import tqdm
|
||||
|
||||
from functionalities_test import is_identity_function, test_status, is_zero_fixpoint, is_divergent, \
|
||||
is_secondary_fixpoint
|
||||
from journal_basins import mean_invariate_manhattan_distance
|
||||
from network import Net
|
||||
from visualization import plot_loss, plot_3d_soup
|
||||
|
||||
|
||||
def l1(tup):
|
||||
a, b = tup
|
||||
return abs(a - b)
|
||||
|
||||
|
||||
def distance_matrix(nets, distance="MIM", print_it=True):
|
||||
matrix = [[0 for _ in range(len(nets))] for _ in range(len(nets))]
|
||||
for net in range(len(nets)):
|
||||
weights = nets[net].input_weight_matrix()[:, 0]
|
||||
for other_net in range(len(nets)):
|
||||
other_weights = nets[other_net].input_weight_matrix()[:, 0]
|
||||
if distance in ["MSE"]:
|
||||
matrix[net][other_net] = MSE(weights, other_weights)
|
||||
elif distance in ["MAE"]:
|
||||
matrix[net][other_net] = MAE(weights, other_weights)
|
||||
elif distance in ["MIM"]:
|
||||
matrix[net][other_net] = mean_invariate_manhattan_distance(weights, other_weights)
|
||||
|
||||
if print_it:
|
||||
print(f"\nDistance matrix (all to all) [{distance}]:")
|
||||
headers = [i.name for i in nets]
|
||||
print(tabulate(matrix, showindex=headers, headers=headers, tablefmt='orgtbl'))
|
||||
return matrix
|
||||
|
||||
|
||||
def distance_from_parent(nets, distance="MIM", print_it=True):
|
||||
list_of_matrices = []
|
||||
parents = list(filter(lambda x: "clone" not in x.name and is_identity_function(x), nets))
|
||||
distance_range = range(10)
|
||||
for parent in parents:
|
||||
parent_weights = parent.create_target_weights(parent.input_weight_matrix())
|
||||
clones = list(filter(lambda y: parent.name in y.name and parent.name != y.name, nets))
|
||||
matrix = [[0 for _ in distance_range] for _ in range(len(clones))]
|
||||
|
||||
for dist in distance_range:
|
||||
for idx, clone in enumerate(clones):
|
||||
clone_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
if distance in ["MSE"]:
|
||||
matrix[idx][dist] = MSE(parent_weights, clone_weights) < pow(10, -dist)
|
||||
elif distance in ["MAE"]:
|
||||
matrix[idx][dist] = MAE(parent_weights, clone_weights) < pow(10, -dist)
|
||||
elif distance in ["MIM"]:
|
||||
matrix[idx][dist] = mean_invariate_manhattan_distance(parent_weights, clone_weights) < pow(10,
|
||||
-dist)
|
||||
|
||||
if print_it:
|
||||
print(f"\nDistances from parent {parent.name} [{distance}]:")
|
||||
col_headers = [str(f"10e-{d}") for d in distance_range]
|
||||
row_headers = [str(f"clone_{i}") for i in range(len(clones))]
|
||||
print(tabulate(matrix, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
|
||||
list_of_matrices.append(matrix)
|
||||
|
||||
return list_of_matrices
|
||||
|
||||
|
||||
class SoupSpawnExperiment:
|
||||
|
||||
|
||||
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||
epochs, st_steps, attack_chance, nr_clones, noise, directory) -> None:
|
||||
self.population_size = population_size
|
||||
self.log_step_size = log_step_size
|
||||
self.net_input_size = net_input_size
|
||||
self.net_hidden_size = net_hidden_size
|
||||
self.net_out_size = net_out_size
|
||||
self.net_learning_rate = net_learning_rate
|
||||
self.epochs = epochs
|
||||
self.ST_steps = st_steps
|
||||
self.attack_chance = attack_chance
|
||||
self.loss_history = []
|
||||
self.nr_clones = nr_clones
|
||||
self.noise = noise or 10e-5
|
||||
print("\nNOISE:", self.noise)
|
||||
|
||||
self.directory = Path(directory)
|
||||
self.directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Populating environment & evolving entities
|
||||
self.parents = []
|
||||
self.clones = []
|
||||
self.parents_with_clones = []
|
||||
self.parents_clones_id_functions = []
|
||||
|
||||
self.populate_environment()
|
||||
|
||||
self.spawn_and_continue()
|
||||
# self.weights_evolution_3d_experiment(self.parents, "only_parents")
|
||||
self.weights_evolution_3d_experiment(self.clones, "only_clones")
|
||||
self.weights_evolution_3d_experiment(self.parents_with_clones, "parents_with_clones")
|
||||
# self.weights_evolution_3d_experiment(self.parents_clones_id_functions, "id_f_with_parents")
|
||||
|
||||
# self.visualize_loss()
|
||||
self.distance_matrix = distance_matrix(self.parents_clones_id_functions, print_it=False)
|
||||
self.parent_clone_distances = distance_from_parent(self.parents_clones_id_functions, print_it=False)
|
||||
|
||||
# self.save()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in loop_population_size:
|
||||
loop_population_size.set_description("Populating experiment %s" % i)
|
||||
|
||||
net_name = f"parent_net_{str(i)}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
self.parents.append(net)
|
||||
self.parents_with_clones.append(net)
|
||||
|
||||
if is_identity_function(net):
|
||||
self.parents_clones_id_functions.append(net)
|
||||
print(f"\nNet {net.name} is identity function")
|
||||
|
||||
if is_divergent(net):
|
||||
print(f"\nNet {net.name} is divergent")
|
||||
|
||||
if is_zero_fixpoint(net):
|
||||
print(f"\nNet {net.name} is zero fixpoint")
|
||||
|
||||
if is_secondary_fixpoint(net):
|
||||
print(f"\nNet {net.name} is secondary fixpoint")
|
||||
|
||||
def evolve(self, population):
|
||||
print(f"Clone soup has a population of {len(population)} networks")
|
||||
|
||||
loop_epochs = tqdm(range(self.epochs - 1))
|
||||
for i in loop_epochs:
|
||||
loop_epochs.set_description("\nEvolving clone soup %s" % i)
|
||||
|
||||
# A network attacking another network with a given percentage
|
||||
if random.randint(1, 100) <= self.attack_chance:
|
||||
random_net1, random_net2 = random.sample(range(len(population)), 2)
|
||||
random_net1 = population[random_net1]
|
||||
random_net2 = population[random_net2]
|
||||
print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
|
||||
random_net1.attack(random_net2)
|
||||
|
||||
# Self-training each network in the population
|
||||
for j in range(len(population)):
|
||||
net = population[j]
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
def spawn_and_continue(self, number_clones: int = None):
|
||||
number_clones = number_clones or self.nr_clones
|
||||
|
||||
df = pd.DataFrame(
|
||||
columns=['name', 'parent', 'MAE_pre', 'MAE_post', 'MSE_pre', 'MSE_post', 'MIM_pre', 'MIM_post', 'noise',
|
||||
'status_post'])
|
||||
|
||||
# MAE_pre, MSE_pre, MIM_pre = 0, 0, 0
|
||||
|
||||
# For every initial net {i} after populating (that is fixpoint after first epoch);
|
||||
for i in range(len(self.parents)):
|
||||
net = self.parents[i]
|
||||
# We set parent start_time to just before this epoch ended, so plotting is zoomed in. Comment out to
|
||||
# to see full trajectory (but the clones will be very hard to see).
|
||||
# Make one target to compare distances to clones later when they have trained.
|
||||
net.start_time = self.ST_steps - 150
|
||||
net_input_data = net.input_weight_matrix()
|
||||
net_target_data = net.create_target_weights(net_input_data)
|
||||
|
||||
# print(f"\nNet {i} is fixpoint")
|
||||
|
||||
# Clone the fixpoint x times and add (+-)self.noise to weight-sets randomly;
|
||||
# To plot clones starting after first epoch (z=ST_steps), set that as start_time!
|
||||
# To make sure PCA will plot the same trajectory up until this point, we clone the
|
||||
# parent-net's weight history as well.
|
||||
for j in range(number_clones):
|
||||
clone = Net(net.input_size, net.hidden_size, net.out_size,
|
||||
f"net_{str(i)}_clone_{str(j)}", start_time=self.ST_steps)
|
||||
clone.load_state_dict(copy.deepcopy(net.state_dict()))
|
||||
clone = clone.apply_noise(self.noise)
|
||||
clone.s_train_weights_history = copy.deepcopy(net.s_train_weights_history)
|
||||
clone.number_trained = copy.deepcopy(net.number_trained)
|
||||
|
||||
# Pre Training distances (after noise application of course)
|
||||
clone_pre_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
MAE_pre = MAE(net_target_data, clone_pre_weights)
|
||||
MSE_pre = MSE(net_target_data, clone_pre_weights)
|
||||
MIM_pre = mean_invariate_manhattan_distance(net_target_data, clone_pre_weights)
|
||||
|
||||
df.loc[len(df)] = [clone.name, net.name, MAE_pre, 0, MSE_pre, 0, MIM_pre, 0, self.noise, ""]
|
||||
|
||||
net.child_nets.append(clone)
|
||||
self.clones.append(clone)
|
||||
self.parents_with_clones.append(clone)
|
||||
|
||||
self.evolve(self.clones)
|
||||
# evolve also with the parents together
|
||||
# self.evolve(self.parents_with_clones)
|
||||
|
||||
for i in range(len(self.parents)):
|
||||
net = self.parents[i]
|
||||
net_input_data = net.input_weight_matrix()
|
||||
net_target_data = net.create_target_weights(net_input_data)
|
||||
|
||||
for j in range(len(net.child_nets)):
|
||||
clone = net.child_nets[j]
|
||||
|
||||
# Post Training distances for comparison
|
||||
clone_post_weights = clone.create_target_weights(clone.input_weight_matrix())
|
||||
MAE_post = MAE(net_target_data, clone_post_weights)
|
||||
MSE_post = MSE(net_target_data, clone_post_weights)
|
||||
MIM_post = mean_invariate_manhattan_distance(net_target_data, clone_post_weights)
|
||||
|
||||
# .. log to data-frame and add to nets for 3d plotting if they are fixpoints themselves.
|
||||
test_status(clone)
|
||||
if is_identity_function(clone):
|
||||
print(f"Clone {j} (of net_{i}) is fixpoint."
|
||||
f"\nMSE({i},{j}): {MSE_post}"
|
||||
f"\nMAE({i},{j}): {MAE_post}"
|
||||
f"\nMIM({i},{j}): {MIM_post}\n")
|
||||
self.parents_clones_id_functions.append(clone)
|
||||
|
||||
# df.loc[df.name == clone.name, ["MAE_post", "MSE_post", "MIM_post"]] = [MAE_pre, MSE_pre, MIM_pre]
|
||||
|
||||
df.loc[df.name == clone.name, ["MAE_post", "MSE_post", "MIM_post", "status_post"]] = [MAE_post,
|
||||
MSE_post,
|
||||
MIM_post,
|
||||
clone.is_fixpoint]
|
||||
|
||||
# Finally take parent net {i} and finish it's training for comparison to clone development.
|
||||
for _ in range(self.epochs - 1):
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
net_weights_after = net.create_target_weights(net.input_weight_matrix())
|
||||
print(f"Parent net's distance to original position."
|
||||
f"\nMSE(OG,new): {MAE(net_target_data, net_weights_after)}"
|
||||
f"\nMAE(OG,new): {MSE(net_target_data, net_weights_after)}"
|
||||
f"\nMIM(OG,new): {mean_invariate_manhattan_distance(net_target_data, net_weights_after)}\n")
|
||||
|
||||
self.df = df
|
||||
|
||||
def weights_evolution_3d_experiment(self, nets_population, suffix):
|
||||
exp_name = f"soup_basins_{str(len(nets_population))}_nets_3d_weights_PCA_{suffix}"
|
||||
return plot_3d_soup(nets_population, exp_name, self.directory)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.parents)):
|
||||
net_loss_history = self.parents[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
plot_loss(self.loss_history, self.directory)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
|
||||
# Define number of runs & name:
|
||||
ST_runs = 3
|
||||
ST_runs_name = "test-27"
|
||||
soup_ST_steps = 1500
|
||||
soup_epochs = 2
|
||||
soup_log_step_size = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
nr_clones = 5
|
||||
soup_population_size = 3
|
||||
soup_net_hidden_size = 2
|
||||
soup_net_learning_rate = 0.04
|
||||
soup_attack_chance = 10
|
||||
soup_name_hash = random.getrandbits(32)
|
||||
|
||||
print(f"Running the Soup-Spawn experiment:")
|
||||
exp_list = []
|
||||
for noise_factor in range(2, 5):
|
||||
exp = SoupSpawnExperiment(
|
||||
population_size=soup_population_size,
|
||||
log_step_size=soup_log_step_size,
|
||||
net_input_size=NET_INPUT_SIZE,
|
||||
net_hidden_size=soup_net_hidden_size,
|
||||
net_out_size=NET_OUT_SIZE,
|
||||
net_learning_rate=soup_net_learning_rate,
|
||||
epochs=soup_epochs,
|
||||
st_steps=soup_ST_steps,
|
||||
attack_chance=soup_attack_chance,
|
||||
nr_clones=nr_clones,
|
||||
noise=pow(10, -noise_factor),
|
||||
directory=Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}' / f'10e-{noise_factor}'
|
||||
)
|
||||
exp_list.append(exp)
|
||||
|
||||
directory = Path('output') / 'soup_spawn_basin' / f'{soup_name_hash}'
|
||||
pickle.dump(exp_list, open(f"{directory}/experiment_pickle_{soup_name_hash}.p", "wb"))
|
||||
print(f"\nSaved experiment to {directory}.")
|
||||
|
||||
# Concat all dataframes, and add columns depending on where clone weights end up after training (rel. to parent)
|
||||
df = pd.concat([exp.df for exp in exp_list])
|
||||
df = df.dropna().reset_index()
|
||||
df["relative_distance"] = [ (df.loc[i]["MAE_pre"] - df.loc[i]["MAE_post"]) for i in range(len(df))]
|
||||
df["class"] = ["approaching" if df.loc[i]["relative_distance"] > 0 else "distancing" if df.loc[i]["relative_distance"] < 0 else "stationary" for i in range(len(df))]
|
||||
|
||||
# Countplot of all fixpoint clone after training per class. Uncomment and manually adjust xticklabels if x-ax size gets too small.
|
||||
ax = sns.catplot(kind="count", data=df, x="noise", hue="class", height=5.27, aspect=12.7 / 5.27)
|
||||
ax.set_axis_labels("Noise Levels", "Clone Fixpoints After Training Count ", fontsize=15)
|
||||
# ax.set_xticklabels(labels=('10e-10', '10e-9', '10e-8', '10e-7', '10e-6', '10e-5', '10e-4', '10e-3', '10e-2', '10e-1'), fontsize=15)
|
||||
plt.savefig(f"{directory}/clone_status_after_countplot_{soup_name_hash}.png")
|
||||
plt.clf()
|
||||
|
||||
# Catplot (either kind="point" or "box") that shows before-after training distances to parent
|
||||
mlt = df.melt(id_vars=["name", "noise", "class"], value_vars=["MAE_pre", "MAE_post"], var_name="State",
|
||||
value_name="Distance")
|
||||
P = ["blue" if mlt.loc[i]["class"] == "approaching" else "orange" if mlt.loc[i]["class"] == "distancing" else "green" for i in range(len(mlt))]
|
||||
# P = sns.color_palette(P, as_cmap=False)
|
||||
ax = sns.catplot(data=mlt, x="State", y="Distance", col="noise", hue="name", kind="point", palette=P,
|
||||
col_wrap=min(5, len(exp_list)), sharey=False, legend=False)
|
||||
ax.map(sns.boxplot, "State", "Distance", "noise", linewidth=0.8, order=["MAE_pre", "MAE_post"], whis=[0, 100])
|
||||
ax.set_axis_labels("", "Manhattan Distance To Parent Weights", fontsize=15)
|
||||
ax.set_xticklabels(labels=('after noise application', 'after training'), fontsize=15)
|
||||
plt.savefig(f"{directory}/before_after_distance_catplot_{soup_name_hash}.png")
|
||||
plt.clf()
|
@ -1,252 +0,0 @@
|
||||
import copy
|
||||
import random
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from matplotlib.ticker import ScalarFormatter
|
||||
from tqdm import tqdm
|
||||
from matplotlib import pyplot as plt
|
||||
from torch.nn import functional as F
|
||||
from tabulate import tabulate
|
||||
|
||||
from functionalities_test import test_for_fixpoints, is_zero_fixpoint, is_divergent, is_identity_function
|
||||
from network import Net
|
||||
from visualization import plot_loss, bar_chart_fixpoints, plot_3d_soup, line_chart_fixpoints
|
||||
|
||||
|
||||
def prng():
|
||||
return random.random()
|
||||
|
||||
|
||||
class SoupRobustnessExperiment:
|
||||
|
||||
def __init__(self, population_size, net_i_size, net_h_size, net_o_size, learning_rate, attack_chance,
|
||||
train_nets, ST_steps, epochs, log_step_size, directory: Union[str, Path]):
|
||||
super().__init__()
|
||||
self.population_size = population_size
|
||||
|
||||
self.net_input_size = net_i_size
|
||||
self.net_hidden_size = net_h_size
|
||||
self.net_out_size = net_o_size
|
||||
self.net_learning_rate = learning_rate
|
||||
self.attack_chance = attack_chance
|
||||
self.train_nets = train_nets
|
||||
# self.SA_steps = SA_steps
|
||||
self.ST_steps = ST_steps
|
||||
self.epochs = epochs
|
||||
self.log_step_size = log_step_size
|
||||
|
||||
self.loss_history = []
|
||||
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
# <self.fixpoint_counters_history> is used for keeping track of the amount of fixpoints in %
|
||||
self.fixpoint_counters_history = []
|
||||
self.id_functions = []
|
||||
|
||||
self.directory = Path(directory)
|
||||
self.directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.population = []
|
||||
self.populate_environment()
|
||||
|
||||
self.evolve()
|
||||
self.fixpoint_percentage()
|
||||
self.weights_evolution_3d_experiment()
|
||||
self.count_fixpoints()
|
||||
self.visualize_loss()
|
||||
|
||||
self.time_to_vergence, self.time_as_fixpoint = self.test_robustness()
|
||||
|
||||
def populate_environment(self):
|
||||
loop_population_size = tqdm(range(self.population_size))
|
||||
for i in tqdm(range(self.population_size)):
|
||||
loop_population_size.set_description("Populating soup experiment %s" % i)
|
||||
|
||||
net_name = f"soup_network_{i}"
|
||||
net = Net(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||
self.population.append(net)
|
||||
|
||||
def evolve(self):
|
||||
""" Evolving consists of attacking & self-training. """
|
||||
|
||||
loop_epochs = tqdm(range(self.epochs))
|
||||
for i in loop_epochs:
|
||||
loop_epochs.set_description("Evolving soup %s" % i)
|
||||
|
||||
# A network attacking another network with a given percentage
|
||||
if random.randint(1, 100) <= self.attack_chance:
|
||||
random_net1, random_net2 = random.sample(range(self.population_size), 2)
|
||||
random_net1 = self.population[random_net1]
|
||||
random_net2 = self.population[random_net2]
|
||||
print(f"\n Attack: {random_net1.name} -> {random_net2.name}")
|
||||
random_net1.attack(random_net2)
|
||||
|
||||
# Self-training each network in the population
|
||||
for j in range(self.population_size):
|
||||
net = self.population[j]
|
||||
|
||||
for _ in range(self.ST_steps):
|
||||
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||
|
||||
# Testing for fixpoints after each batch of ST steps to see relevant data
|
||||
if i % self.ST_steps == 0:
|
||||
test_for_fixpoints(self.fixpoint_counters, self.population)
|
||||
fixpoints_percentage = round(self.fixpoint_counters["identity_func"] / self.population_size, 1)
|
||||
self.fixpoint_counters_history.append(fixpoints_percentage)
|
||||
|
||||
# Resetting the fixpoint counter. Last iteration not to be reset -
|
||||
# it is important for the bar_chart_fixpoints().
|
||||
if i < self.epochs:
|
||||
self.reset_fixpoint_counters()
|
||||
|
||||
def test_robustness(self, print_it=True, noise_levels=10, seeds=10):
|
||||
# assert (len(self.id_functions) == 1 and seeds > 1) or (len(self.id_functions) > 1 and seeds == 1)
|
||||
is_synthetic = True if len(self.id_functions) > 1 and seeds == 1 else False
|
||||
avg_time_to_vergence = [[0 for _ in range(noise_levels)] for _ in
|
||||
range(seeds if is_synthetic else len(self.id_functions))]
|
||||
avg_time_as_fixpoint = [[0 for _ in range(noise_levels)] for _ in
|
||||
range(seeds if is_synthetic else len(self.id_functions))]
|
||||
row_headers = []
|
||||
data_pos = 0
|
||||
# This checks wether to use synthetic setting with multiple seeds
|
||||
# or multi network settings with a singlee seed
|
||||
|
||||
df = pd.DataFrame(columns=['seed', 'noise_level', 'application_step', 'absolute_loss'])
|
||||
for i, fixpoint in enumerate(self.id_functions): # 1 / n
|
||||
row_headers.append(fixpoint.name)
|
||||
for seed in range(seeds): # n / 1
|
||||
for noise_level in range(noise_levels):
|
||||
self_application_steps = 1
|
||||
clone = Net(fixpoint.input_size, fixpoint.hidden_size, fixpoint.out_size,
|
||||
f"{fixpoint.name}_clone_noise10e-{noise_level}")
|
||||
clone.load_state_dict(copy.deepcopy(fixpoint.state_dict()))
|
||||
clone = clone.apply_noise(pow(10, -noise_level))
|
||||
|
||||
while not is_zero_fixpoint(clone) and not is_divergent(clone):
|
||||
if is_identity_function(clone):
|
||||
avg_time_as_fixpoint[i][noise_level] += 1
|
||||
|
||||
# -> before
|
||||
clone_weight_pre_application = clone.input_weight_matrix()
|
||||
target_data_pre_application = clone.create_target_weights(clone_weight_pre_application)
|
||||
|
||||
clone.self_application(1, self.log_step_size)
|
||||
avg_time_to_vergence[i][noise_level] += 1
|
||||
# -> after
|
||||
clone_weight_post_application = clone.input_weight_matrix()
|
||||
target_data_post_application = clone.create_target_weights(clone_weight_post_application)
|
||||
|
||||
absolute_loss = F.l1_loss(target_data_pre_application, target_data_post_application).item()
|
||||
|
||||
setting = i if is_synthetic else seed
|
||||
|
||||
df.loc[data_pos] = [setting, noise_level, self_application_steps, absolute_loss]
|
||||
data_pos += 1
|
||||
self_application_steps += 1
|
||||
|
||||
# calculate the average:
|
||||
df = df.replace([np.inf, -np.inf], np.nan)
|
||||
df = df.dropna()
|
||||
# sns.set(rc={'figure.figsize': (10, 50)})
|
||||
sns.set_theme(style="ticks")
|
||||
bx = sns.catplot(data=df[df['absolute_loss'] < 1], y='absolute_loss', x='application_step', kind='box',
|
||||
col='noise_level', col_wrap=3, showfliers=False)
|
||||
|
||||
directory = Path('output') / 'robustness'
|
||||
filename = f"absolute_loss_perapplication_boxplot_grid.png"
|
||||
filepath = directory / filename
|
||||
|
||||
plt.savefig(str(filepath))
|
||||
|
||||
if print_it:
|
||||
col_headers = [str(f"10-{d}") for d in range(noise_levels)]
|
||||
|
||||
print(f"\nAppplications steps until divergence / zero: ")
|
||||
print(tabulate(avg_time_to_vergence, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
|
||||
print(f"\nTime as fixpoint: ")
|
||||
print(tabulate(avg_time_as_fixpoint, showindex=row_headers, headers=col_headers, tablefmt='orgtbl'))
|
||||
|
||||
return avg_time_as_fixpoint, avg_time_to_vergence
|
||||
|
||||
def weights_evolution_3d_experiment(self):
|
||||
exp_name = f"soup_{self.population_size}_nets_{self.ST_steps}_training_{self.epochs}_epochs"
|
||||
return plot_3d_soup(self.population, exp_name, self.directory)
|
||||
|
||||
def count_fixpoints(self):
|
||||
self.id_functions = test_for_fixpoints(self.fixpoint_counters, self.population)
|
||||
exp_details = f"Evolution steps: {self.epochs} epochs"
|
||||
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory, self.net_learning_rate,
|
||||
exp_details)
|
||||
|
||||
def fixpoint_percentage(self):
|
||||
runs = self.epochs / self.ST_steps
|
||||
SA_steps = None
|
||||
line_chart_fixpoints(self.fixpoint_counters_history, runs, self.ST_steps, SA_steps, self.directory,
|
||||
self.population_size)
|
||||
|
||||
def visualize_loss(self):
|
||||
for i in range(len(self.population)):
|
||||
net_loss_history = self.population[i].loss_history
|
||||
self.loss_history.append(net_loss_history)
|
||||
|
||||
plot_loss(self.loss_history, self.directory)
|
||||
|
||||
def reset_fixpoint_counters(self):
|
||||
self.fixpoint_counters = {
|
||||
"identity_func": 0,
|
||||
"divergent": 0,
|
||||
"fix_zero": 0,
|
||||
"fix_weak": 0,
|
||||
"fix_sec": 0,
|
||||
"other_func": 0
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
|
||||
soup_epochs = 100
|
||||
soup_log_step_size = 5
|
||||
soup_ST_steps = 20
|
||||
# soup_SA_steps = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
soup_population_size = 4
|
||||
soup_net_hidden_size = 2
|
||||
soup_net_learning_rate = 0.04
|
||||
|
||||
# soup_attack_chance in %
|
||||
soup_attack_chance = 10
|
||||
|
||||
# not used yet: soup_train_nets has 3 possible values "no", "before_SA", "after_SA".
|
||||
soup_train_nets = "no"
|
||||
soup_name_hash = random.getrandbits(32)
|
||||
soup_synthetic = True
|
||||
|
||||
print(f"Running the robustness comparison experiment:")
|
||||
SoupRobustnessExperiment(
|
||||
population_size=soup_population_size,
|
||||
net_i_size=NET_INPUT_SIZE,
|
||||
net_h_size=soup_net_hidden_size,
|
||||
net_o_size=NET_OUT_SIZE,
|
||||
learning_rate=soup_net_learning_rate,
|
||||
attack_chance=soup_attack_chance,
|
||||
train_nets=soup_train_nets,
|
||||
ST_steps=soup_ST_steps,
|
||||
epochs=soup_epochs,
|
||||
log_step_size=soup_log_step_size,
|
||||
directory=Path('output') / 'robustness' / f'{soup_name_hash}'
|
||||
)
|
150
main.py
150
main.py
@ -1,150 +0,0 @@
|
||||
from experiments import *
|
||||
import random
|
||||
|
||||
|
||||
# TODO maybe add also SA to the soup
|
||||
|
||||
def run_experiments(run_ST, run_SA, run_soup, run_mixed, run_robustness):
|
||||
if run_ST:
|
||||
print(f"Running the ST experiment:")
|
||||
run_ST_experiment(ST_population_size, ST_log_step_size, NET_INPUT_SIZE, ST_net_hidden_size, NET_OUT_SIZE,
|
||||
ST_net_learning_rate,
|
||||
ST_epochs, ST_runs, ST_runs_name, ST_name_hash)
|
||||
if run_SA:
|
||||
print(f"\n Running the SA experiment:")
|
||||
run_SA_experiment(SA_population_size, SA_log_step_size, NET_INPUT_SIZE, SA_net_hidden_size, NET_OUT_SIZE,
|
||||
SA_net_learning_rate, SA_runs, SA_runs_name, SA_name_hash,
|
||||
SA_steps, SA_train_nets, SA_ST_steps)
|
||||
if run_soup:
|
||||
print(f"\n Running the soup experiment:")
|
||||
run_soup_experiment(soup_population_size, soup_attack_chance, NET_INPUT_SIZE, soup_net_hidden_size,
|
||||
NET_OUT_SIZE, soup_net_learning_rate, soup_epochs, soup_log_step_size, soup_runs,
|
||||
soup_runs_name, soup_name_hash, soup_ST_steps, soup_train_nets)
|
||||
if run_mixed:
|
||||
print(f"\n Running the mixed experiment:")
|
||||
run_mixed_experiment(mixed_population_size, NET_INPUT_SIZE, mixed_net_hidden_size, NET_OUT_SIZE,
|
||||
mixed_net_learning_rate, mixed_train_nets, mixed_epochs, mixed_SA_steps,
|
||||
mixed_ST_steps_between_SA, mixed_log_step_size, mixed_name_hash, mixed_total_runs,
|
||||
mixed_runs_name)
|
||||
if run_robustness:
|
||||
print(f"Running the robustness experiment:")
|
||||
run_robustness_experiment(rob_population_size, rob_log_step_size, NET_INPUT_SIZE, rob_net_hidden_size,
|
||||
NET_OUT_SIZE, rob_net_learning_rate, rob_ST_steps, rob_runs, rob_runs_name,
|
||||
rob_name_hash)
|
||||
|
||||
if not run_ST and not run_SA and not run_soup and not run_mixed and not run_robustness:
|
||||
print(f"No experiments to be run.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Constants:
|
||||
NET_INPUT_SIZE = 4
|
||||
NET_OUT_SIZE = 1
|
||||
run_ST_experiment_bool = False
|
||||
run_SA_experiment_bool = False
|
||||
run_soup_experiment_bool = False
|
||||
run_mixed_experiment_bool = False
|
||||
run_robustness_bool = True
|
||||
|
||||
""" ------------------------------------- Self-training (ST) experiment ------------------------------------- """
|
||||
|
||||
# Define number of runs & name:
|
||||
ST_runs = 1
|
||||
ST_runs_name = "test-27"
|
||||
ST_epochs = 1000
|
||||
ST_log_step_size = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
ST_population_size = 1
|
||||
ST_net_hidden_size = 2
|
||||
|
||||
ST_net_learning_rate = 0.04
|
||||
|
||||
ST_name_hash = random.getrandbits(32)
|
||||
|
||||
""" ----------------------------------- Self-application (SA) experiment ----------------------------------- """
|
||||
# Define number of runs, name, etc.:
|
||||
SA_runs_name = "test-17"
|
||||
SA_runs = 2
|
||||
SA_steps = 100
|
||||
SA_app_batch_size = 5
|
||||
SA_train_batch_size = 5
|
||||
SA_log_step_size = 5
|
||||
|
||||
# Define number of networks & their architecture
|
||||
SA_population_size = 10
|
||||
SA_net_hidden_size = 2
|
||||
|
||||
SA_net_learning_rate = 0.04
|
||||
|
||||
# SA_train_nets has 3 possible values "no", "before_SA", "after_SA".
|
||||
SA_train_nets = "no"
|
||||
SA_ST_steps = 300
|
||||
|
||||
SA_name_hash = random.getrandbits(32)
|
||||
|
||||
""" -------------------------------------------- Soup experiment -------------------------------------------- """
|
||||
# Define number of runs, name, etc.:
|
||||
soup_runs = 1
|
||||
soup_runs_name = "test-16"
|
||||
soup_epochs = 100
|
||||
soup_log_step_size = 5
|
||||
soup_ST_steps = 20
|
||||
# soup_SA_steps = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
soup_population_size = 5
|
||||
soup_net_hidden_size = 2
|
||||
soup_net_learning_rate = 0.04
|
||||
|
||||
# soup_attack_chance in %
|
||||
soup_attack_chance = 10
|
||||
|
||||
# not used yet: soup_train_nets has 3 possible values "no", "before_SA", "after_SA".
|
||||
soup_train_nets = "no"
|
||||
|
||||
soup_name_hash = random.getrandbits(32)
|
||||
|
||||
""" ------------------------------------------- Mixed experiment -------------------------------------------- """
|
||||
|
||||
# Define number of runs, name, etc.:
|
||||
mixed_runs_name = "test-17"
|
||||
mixed_total_runs = 2
|
||||
|
||||
# Define number of networks & their architecture
|
||||
mixed_population_size = 5
|
||||
mixed_net_hidden_size = 2
|
||||
|
||||
mixed_epochs = 10
|
||||
# Set the <batch_size> to the same value as <ST_steps_between_SA> to see the weights plotted
|
||||
# ONLY after each epoch, and not after a certain amount of steps.
|
||||
mixed_log_step_size = 5
|
||||
mixed_ST_steps_between_SA = 50
|
||||
mixed_SA_steps = 4
|
||||
|
||||
mixed_net_learning_rate = 0.04
|
||||
|
||||
# mixed_train_nets has 2 possible values "before_SA", "after_SA".
|
||||
mixed_train_nets = "after_SA"
|
||||
|
||||
mixed_name_hash = random.getrandbits(32)
|
||||
|
||||
""" ----------------------------------------- Robustness experiment ----------------------------------------- """
|
||||
# Define number of runs & name:
|
||||
rob_runs = 1
|
||||
rob_runs_name = "test-07"
|
||||
rob_ST_steps = 1500
|
||||
rob_log_step_size = 10
|
||||
|
||||
# Define number of networks & their architecture
|
||||
rob_population_size = 1
|
||||
rob_net_hidden_size = 2
|
||||
|
||||
rob_net_learning_rate = 0.04
|
||||
|
||||
rob_name_hash = random.getrandbits(32)
|
||||
|
||||
""" ---------------------------------------- Running the experiment ----------------------------------------- """
|
||||
|
||||
run_experiments(run_ST_experiment_bool, run_SA_experiment_bool, run_soup_experiment_bool, run_mixed_experiment_bool,
|
||||
run_robustness_bool)
|
218
meta_task_exp.py
Normal file
218
meta_task_exp.py
Normal file
@ -0,0 +1,218 @@
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import platform
|
||||
|
||||
|
||||
import torchmetrics
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from torch import nn
|
||||
from torch.nn import Flatten
|
||||
from torch.utils.data import DataLoader
|
||||
from torchvision.datasets import MNIST
|
||||
from torchvision.transforms import ToTensor, Compose, Resize
|
||||
from tqdm import tqdm
|
||||
|
||||
# noinspection DuplicatedCode
|
||||
from experiments.meta_task_utility import ToFloat, new_storage_df, train_task, checkpoint_and_validate, flat_for_store, \
|
||||
plot_training_result, plot_training_particle_types, plot_network_connectivity_by_fixtype, \
|
||||
run_particle_dropout_and_plot
|
||||
|
||||
if platform.node() == 'CarbonX':
|
||||
debug = True
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
print("@ Warning, Debugging Config@!!!!!! @")
|
||||
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
|
||||
else:
|
||||
debug = False
|
||||
|
||||
from network import MetaNet
|
||||
from functionalities_test import test_for_fixpoints
|
||||
|
||||
WORKER = 10 if not debug else 2
|
||||
debug = False
|
||||
BATCHSIZE = 2000 if not debug else 50
|
||||
EPOCH = 50
|
||||
VALIDATION_FRQ = 3 if not debug else 1
|
||||
SELF_TRAIN_FRQ = 1 if not debug else 1
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
DATA_PATH = Path('data')
|
||||
DATA_PATH.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
if debug:
|
||||
torch.autograd.set_detect_anomaly(True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
training = True
|
||||
n_st = 300
|
||||
activation = None # nn.ReLU()
|
||||
|
||||
for weight_hidden_size in [4, 5]:
|
||||
|
||||
weight_hidden_size = weight_hidden_size
|
||||
residual_skip = True
|
||||
n_seeds = 3
|
||||
depth = 3
|
||||
width = 3
|
||||
out = 10
|
||||
|
||||
data_path = Path('data')
|
||||
data_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
|
||||
res_str = f'{"" if residual_skip else "_no_res"}'
|
||||
st_str = f'_nst_{n_st}'
|
||||
|
||||
config_str = f'{res_str}{ac_str}{st_str}'
|
||||
exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}'
|
||||
|
||||
if not training:
|
||||
# noinspection PyRedeclaration
|
||||
exp_path = Path('output') / 'add_st_50_5'
|
||||
|
||||
for seed in range(n_seeds):
|
||||
seed_path = exp_path / str(seed)
|
||||
|
||||
df_store_path = seed_path / 'train_store.csv'
|
||||
weight_store_path = seed_path / 'weight_store.csv'
|
||||
srnn_parameters = dict()
|
||||
|
||||
if training:
|
||||
# Check if files do exist on project location, warn and break.
|
||||
for path in [df_store_path, weight_store_path]:
|
||||
assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
|
||||
|
||||
utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
||||
try:
|
||||
train_dataset = MNIST(str(DATA_PATH), transform=utility_transforms)
|
||||
except RuntimeError:
|
||||
train_dataset = MNIST(str(DATA_PATH), transform=utility_transforms, download=True)
|
||||
train_loader = DataLoader(train_dataset, batch_size=BATCHSIZE, shuffle=True,
|
||||
drop_last=True, num_workers=WORKER)
|
||||
|
||||
interface = np.prod(train_dataset[0][0].shape)
|
||||
metanet = MetaNet(interface, depth=depth, width=width, out=out,
|
||||
residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
|
||||
activation=activation
|
||||
).to(DEVICE)
|
||||
|
||||
loss_fn = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.SGD(metanet.parameters(), lr=0.004, momentum=0.9)
|
||||
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', metanet.particle_parameter_count)
|
||||
|
||||
for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
|
||||
is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True
|
||||
is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True
|
||||
metanet = metanet.train()
|
||||
|
||||
# Init metrics, even we do not need:
|
||||
metric = torchmetrics.Accuracy()
|
||||
n_st_per_batch = n_st // len(train_loader)
|
||||
|
||||
for batch, (batch_x, batch_y) in tqdm(enumerate(train_loader),
|
||||
total=len(train_loader), desc='MetaNet Train - Batch'
|
||||
):
|
||||
# Self Train
|
||||
self_train_loss = metanet.combined_self_train(n_st_per_batch,
|
||||
reduction='mean', per_particle=False)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
st_step_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
|
||||
st_step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = st_step_log
|
||||
|
||||
# Task Train
|
||||
tsk_step_log, y_pred = train_task(metanet, optimizer, loss_fn, batch_x, batch_y)
|
||||
tsk_step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = tsk_step_log
|
||||
metric(y_pred.cpu(), batch_y.cpu())
|
||||
|
||||
if is_validation_epoch:
|
||||
metanet = metanet.eval()
|
||||
try:
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Train Accuracy', Score=metric.compute().item())
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
accuracy = checkpoint_and_validate(metanet, seed_path, epoch).item()
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy)
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
|
||||
if is_validation_epoch:
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(metanet.particles))
|
||||
counter_dict = dict(counter_dict)
|
||||
for key, value in counter_dict.items():
|
||||
val_step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = val_step_log
|
||||
tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
|
||||
|
||||
# FLUSH to disk
|
||||
if is_validation_epoch:
|
||||
for particle in metanet.particles:
|
||||
weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
train_store.to_csv(df_store_path, mode='a',
|
||||
header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a',
|
||||
header=not weight_store_path.exists(), index=False)
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', metanet.particle_parameter_count)
|
||||
|
||||
###########################################################
|
||||
# EPOCHS endet
|
||||
metanet = metanet.eval()
|
||||
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(metanet.particles))
|
||||
for key, value in dict(counter_dict).items():
|
||||
step_log = dict(Epoch=int(EPOCH)+1, Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
accuracy = checkpoint_and_validate(metanet, seed_path, EPOCH, final_model=True)
|
||||
validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy.item())
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
for particle in metanet.particles:
|
||||
weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
|
||||
# FLUSH to disk
|
||||
train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
|
||||
|
||||
plot_training_result(df_store_path)
|
||||
plot_training_particle_types(df_store_path)
|
||||
|
||||
try:
|
||||
model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
|
||||
except StopIteration:
|
||||
print('Model pattern did not trigger.')
|
||||
print(f'Search path was: {seed_path}:')
|
||||
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
# noinspection PyUnboundLocalVariable
|
||||
run_particle_dropout_and_plot(model_path)
|
||||
except (ValueError, NameError) as e:
|
||||
print(e)
|
||||
try:
|
||||
plot_network_connectivity_by_fixtype(model_path)
|
||||
except (ValueError, NameError)as e:
|
||||
print(e)
|
||||
|
||||
if n_seeds >= 2:
|
||||
pass
|
188
meta_task_exp_small.py
Normal file
188
meta_task_exp_small.py
Normal file
@ -0,0 +1,188 @@
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchmetrics
|
||||
from torch import nn
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
|
||||
from experiments.meta_task_small_utility import AddTaskDataset, checkpoint_and_validate, train_task
|
||||
from network import MetaNet
|
||||
from functionalities_test import test_for_fixpoints
|
||||
from experiments.meta_task_utility import new_storage_df, flat_for_store, plot_training_result, \
|
||||
plot_training_particle_types, run_particle_dropout_and_plot, plot_network_connectivity_by_fixtype
|
||||
|
||||
WORKER = 0
|
||||
BATCHSIZE = 50
|
||||
EPOCH = 30
|
||||
VALIDATION_FRQ = 3
|
||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
training = True
|
||||
n_st = 500
|
||||
activation = None # nn.ReLU()
|
||||
|
||||
for weight_hidden_size in [3,4,5]:
|
||||
|
||||
tsk_threshold = 0.85
|
||||
weight_hidden_size = weight_hidden_size
|
||||
residual_skip = True
|
||||
n_seeds = 3
|
||||
depth = 3
|
||||
width = 3
|
||||
out = 1
|
||||
|
||||
data_path = Path('data')
|
||||
data_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
ac_str = f'_{activation.__class__.__name__}' if activation is not None else ''
|
||||
res_str = f'{"" if residual_skip else "_no_res"}'
|
||||
# dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
|
||||
|
||||
config_str = f'{res_str}'
|
||||
exp_path = Path('output') / f'add_st_{EPOCH}_{weight_hidden_size}{config_str}{ac_str}'
|
||||
|
||||
if not training:
|
||||
# noinspection PyRedeclaration
|
||||
exp_path = Path('output') / 'mn_st_n_2_100_4'
|
||||
|
||||
for seed in range(n_seeds):
|
||||
seed_path = exp_path / str(seed)
|
||||
|
||||
model_path = seed_path / '0000_trained_model.zip'
|
||||
df_store_path = seed_path / 'train_store.csv'
|
||||
weight_store_path = seed_path / 'weight_store.csv'
|
||||
srnn_parameters = dict()
|
||||
|
||||
if training:
|
||||
# Check if files do exist on project location, warn and break.
|
||||
for path in [model_path, df_store_path, weight_store_path]:
|
||||
assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
|
||||
|
||||
train_data = AddTaskDataset()
|
||||
valid_data = AddTaskDataset()
|
||||
train_load = DataLoader(train_data, batch_size=BATCHSIZE, shuffle=True,
|
||||
drop_last=True, num_workers=WORKER)
|
||||
vali_load = DataLoader(valid_data, batch_size=BATCHSIZE, shuffle=False,
|
||||
drop_last=True, num_workers=WORKER)
|
||||
|
||||
interface = np.prod(train_data[0][0].shape)
|
||||
metanet = MetaNet(interface, depth=depth, width=width, out=out,
|
||||
residual_skip=residual_skip, weight_hidden_size=weight_hidden_size,
|
||||
activation=activation
|
||||
).to(DEVICE)
|
||||
|
||||
loss_fn = nn.MSELoss()
|
||||
optimizer = torch.optim.SGD(metanet.parameters(), lr=0.004, momentum=0.9)
|
||||
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', metanet.particle_parameter_count)
|
||||
|
||||
for epoch in tqdm(range(EPOCH), desc=f'Train - Epochs'):
|
||||
is_validation_epoch = epoch % VALIDATION_FRQ == 0
|
||||
metanet = metanet.train()
|
||||
|
||||
# Init metrics, even we do not need:
|
||||
metric = torchmetrics.MeanAbsoluteError()
|
||||
n_st_per_batch = n_st // len(train_load)
|
||||
|
||||
for batch, (batch_x, batch_y) in tqdm(enumerate(train_load),
|
||||
total=len(train_load), desc='MetaNet Train - Batch'
|
||||
):
|
||||
# Self Train
|
||||
self_train_loss = metanet.combined_self_train(n_st_per_batch,
|
||||
reduction='mean', per_particle=False)
|
||||
# noinspection PyUnboundLocalVariable
|
||||
st_step_log = dict(Metric='Self Train Loss', Score=self_train_loss.item())
|
||||
st_step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = st_step_log
|
||||
|
||||
# Task Train
|
||||
tsk_step_log, y_pred = train_task(metanet, optimizer, loss_fn, batch_x, batch_y)
|
||||
tsk_step_log.update(dict(Epoch=epoch, Batch=batch))
|
||||
train_store.loc[train_store.shape[0]] = tsk_step_log
|
||||
metric(y_pred.cpu(), batch_y.cpu())
|
||||
|
||||
if is_validation_epoch:
|
||||
metanet = metanet.eval()
|
||||
if metric.total.item():
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Train Accuracy', Score=metric.compute().item())
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
|
||||
accuracy = checkpoint_and_validate(metanet, seed_path, epoch, vali_load).item()
|
||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy)
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
|
||||
if is_validation_epoch:
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(metanet.particles))
|
||||
counter_dict = dict(counter_dict)
|
||||
for key, value in counter_dict.items():
|
||||
val_step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = val_step_log
|
||||
tqdm.write(f'Fixpoint Tester Results: {counter_dict}')
|
||||
|
||||
# FLUSH to disk
|
||||
if is_validation_epoch:
|
||||
for particle in metanet.particles:
|
||||
weight_log = (epoch, particle.name, *flat_for_store(particle.parameters()))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
train_store.to_csv(df_store_path, mode='a',
|
||||
header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a',
|
||||
header=not weight_store_path.exists(), index=False)
|
||||
train_store = new_storage_df('train', None)
|
||||
weight_store = new_storage_df('weights', metanet.particle_parameter_count)
|
||||
|
||||
###########################################################
|
||||
# EPOCHS endet
|
||||
metanet = metanet.eval()
|
||||
|
||||
counter_dict = defaultdict(lambda: 0)
|
||||
# This returns ID-functions
|
||||
_ = test_for_fixpoints(counter_dict, list(metanet.particles))
|
||||
for key, value in dict(counter_dict).items():
|
||||
step_log = dict(Epoch=int(EPOCH), Batch=BATCHSIZE, Metric=key, Score=value)
|
||||
train_store.loc[train_store.shape[0]] = step_log
|
||||
accuracy = checkpoint_and_validate(metanet, seed_path, EPOCH, vali_load, final_model=True)
|
||||
validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE,
|
||||
Metric='Test Accuracy', Score=accuracy.item())
|
||||
for particle in metanet.particles:
|
||||
weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters())))
|
||||
weight_store.loc[weight_store.shape[0]] = weight_log
|
||||
|
||||
train_store.loc[train_store.shape[0]] = validation_log
|
||||
train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False)
|
||||
weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False)
|
||||
|
||||
plot_training_result(df_store_path)
|
||||
plot_training_particle_types(df_store_path)
|
||||
|
||||
try:
|
||||
model_path = next(seed_path.glob(f'*e{EPOCH}.tp'))
|
||||
except StopIteration:
|
||||
print('Model pattern did not trigger.')
|
||||
print(f'Search path was: {seed_path}:')
|
||||
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
run_particle_dropout_and_plot(model_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
try:
|
||||
plot_network_connectivity_by_fixtype(model_path)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
|
||||
if n_seeds >= 2:
|
||||
pass
|
@ -53,8 +53,9 @@ class MultiplyByXTaskDataset(Dataset):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
net = Net(5, 4, 1)
|
||||
net = Net(5, 4, 1, lr=0.004)
|
||||
multiplication_target = 0.03
|
||||
st_steps = 0
|
||||
|
||||
loss_fn = nn.MSELoss()
|
||||
optimizer = torch.optim.SGD(net.parameters(), lr=0.004, momentum=0.9)
|
||||
@ -68,31 +69,16 @@ if __name__ == '__main__':
|
||||
mean_self_tain_loss = []
|
||||
|
||||
for batch, (batch_x, batch_y) in tenumerate(dataloader):
|
||||
self_train_loss, _ = net.self_train(2, save_history=False, learning_rate=0.004)
|
||||
|
||||
for _ in range(2):
|
||||
optimizer.zero_grad()
|
||||
input_data = net.input_weight_matrix()
|
||||
target_data = net.create_target_weights(input_data)
|
||||
output = net(input_data)
|
||||
self_train_loss = loss_fn(output, target_data)
|
||||
self_train_loss.backward()
|
||||
optimizer.step()
|
||||
self_train_loss, _ = net.self_train(1000 // 20, save_history=False)
|
||||
is_fixpoint = functionalities_test.is_identity_function(net)
|
||||
if not is_fixpoint:
|
||||
st_steps += 2
|
||||
|
||||
optimizer.zero_grad()
|
||||
batch_x_emb = torch.zeros(batch_x.shape[0], 5)
|
||||
batch_x_emb[:, -1] = batch_x.squeeze()
|
||||
y = net(batch_x_emb)
|
||||
loss = loss_fn(y, batch_y)
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if is_fixpoint:
|
||||
tqdm.write(f'is fixpoint after st : {is_fixpoint}')
|
||||
tqdm.write(f'is fixpoint after st : {is_fixpoint}, first reached after st_steps: {st_steps}')
|
||||
tqdm.write(f'is fixpoint after tsk: {functionalities_test.is_identity_function(net)}')
|
||||
|
||||
mean_batch_loss.append(loss.detach())
|
||||
#mean_batch_loss.append(loss.detach())
|
||||
mean_self_tain_loss.append(self_train_loss.detach())
|
||||
|
||||
train_frame.loc[train_frame.shape[0]] = dict(Epoch=epoch, Batch=batch,
|
67
network.py
67
network.py
@ -75,7 +75,7 @@ class Net(nn.Module):
|
||||
i += size
|
||||
return self
|
||||
|
||||
def __init__(self, i_size: int, h_size: int, o_size: int, name=None, start_time=1) -> None:
|
||||
def __init__(self, i_size: int, h_size: int, o_size: int, name=None, start_time=1, lr=0.004) -> None:
|
||||
super().__init__()
|
||||
self.start_time = start_time
|
||||
|
||||
@ -104,6 +104,7 @@ class Net(nn.Module):
|
||||
|
||||
self._weight_pos_enc_and_mask = None
|
||||
self.apply(xavier_init)
|
||||
self.optimizer = torch.optim.SGD(self.parameters(), lr=lr, momentum=0.9)
|
||||
|
||||
@property
|
||||
def _weight_pos_enc(self):
|
||||
@ -117,14 +118,17 @@ class Net(nn.Module):
|
||||
torch.cat(
|
||||
(
|
||||
# Those are the weights
|
||||
torch.full((x.numel(), 1), 0, device=d),
|
||||
torch.full((x.numel(), 1), 0, device=d, requires_grad=False),
|
||||
# Layer enumeration
|
||||
torch.full((x.numel(), 1), layer_id, device=d),
|
||||
torch.full((x.numel(), 1), layer_id, device=d, requires_grad=False),
|
||||
# Cell Enumeration
|
||||
torch.arange(layer.out_features, device=d).repeat_interleave(layer.in_features).view(-1, 1),
|
||||
torch.arange(layer.out_features, device=d, requires_grad=False
|
||||
).repeat_interleave(layer.in_features).view(-1, 1),
|
||||
# Weight Enumeration within the Cells
|
||||
torch.arange(layer.in_features, device=d).view(-1, 1).repeat(layer.out_features, 1),
|
||||
*(torch.full((x.numel(), 1), 0, device=d) for _ in range(self.input_size-4))
|
||||
torch.arange(layer.in_features, device=d, requires_grad=False
|
||||
).view(-1, 1).repeat(layer.out_features, 1),
|
||||
*(torch.full((x.numel(), 1), 0, device=d, requires_grad=False
|
||||
) for _ in range(self.input_size-4))
|
||||
), dim=1)
|
||||
)
|
||||
# Finalize
|
||||
@ -138,7 +142,7 @@ class Net(nn.Module):
|
||||
|
||||
# computations
|
||||
# create a mask where pos is 0 if it is to be replaced
|
||||
mask = torch.ones_like(weight_matrix)
|
||||
mask = torch.ones_like(weight_matrix, requires_grad=False)
|
||||
mask[:, 0] = 0
|
||||
|
||||
self._weight_pos_enc_and_mask = weight_matrix.detach(), mask.detach()
|
||||
@ -175,22 +179,20 @@ class Net(nn.Module):
|
||||
def self_train(self,
|
||||
training_steps: int,
|
||||
log_step_size: int = 0,
|
||||
learning_rate: float = 0.0004,
|
||||
save_history: bool = True
|
||||
save_history: bool = False,
|
||||
reduction: str = 'mean'
|
||||
) -> (Tensor, list):
|
||||
""" Training a network to predict its own weights in order to self-replicate. """
|
||||
|
||||
optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
|
||||
|
||||
for training_step in range(training_steps):
|
||||
self.number_trained += 1
|
||||
optimizer.zero_grad()
|
||||
self.optimizer.zero_grad()
|
||||
input_data = self.input_weight_matrix()
|
||||
target_data = self.create_target_weights(input_data)
|
||||
output = self(input_data)
|
||||
loss = F.mse_loss(output, target_data)
|
||||
loss = F.mse_loss(output, target_data, reduction=reduction)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
self.optimizer.step()
|
||||
|
||||
if save_history:
|
||||
# Saving the history of the weights after a certain amount of steps (aka log_step_size) for research.
|
||||
@ -207,7 +209,6 @@ class Net(nn.Module):
|
||||
self.s_train_weights_history.append(weights.T.detach().numpy())
|
||||
self.loss_history.append(loss.item())
|
||||
|
||||
|
||||
# Saving weights only at the end of a soup/mixed exp. epoch.
|
||||
if save_history:
|
||||
if "soup" in self.name or "mixed" in self.name:
|
||||
@ -216,7 +217,7 @@ class Net(nn.Module):
|
||||
self.loss_history.append(loss.item())
|
||||
|
||||
self.trained = True
|
||||
return loss, self.loss_history
|
||||
return loss.detach(), self.loss_history
|
||||
|
||||
def self_application(self, SA_steps: int, log_step_size: Union[int, None] = None):
|
||||
""" Inputting the weights of a network to itself for a number of steps, without backpropagation. """
|
||||
@ -463,21 +464,33 @@ class MetaNet(nn.Module):
|
||||
def particles(self):
|
||||
return (cell for metalayer in self.all_layers for cell in metalayer.particles)
|
||||
|
||||
def combined_self_train(self, optimizer, n_st_steps, reduction='mean'):
|
||||
def combined_self_train(self, n_st_steps, reduction='mean', per_particle=True):
|
||||
|
||||
losses = []
|
||||
for particle in self.particles:
|
||||
|
||||
if per_particle:
|
||||
for particle in self.particles:
|
||||
loss, _ = particle.self_train(n_st_steps, reduction=reduction)
|
||||
losses.append(loss.detach())
|
||||
else:
|
||||
optim = torch.optim.SGD(self.parameters(), lr=0.004, momentum=0.9)
|
||||
for _ in range(n_st_steps):
|
||||
optimizer.zero_grad()
|
||||
# Intergrate optimizer and backward function
|
||||
input_data = particle.input_weight_matrix()
|
||||
target_data = particle.create_target_weights(input_data)
|
||||
output = particle(input_data)
|
||||
losses.append(F.mse_loss(output, target_data, reduction=reduction))
|
||||
losses.backward()
|
||||
optimizer.step()
|
||||
optim.zero_grad()
|
||||
train_losses = []
|
||||
for particle in self.particles:
|
||||
# Intergrate optimizer and backward function
|
||||
input_data = particle.input_weight_matrix()
|
||||
target_data = particle.create_target_weights(input_data)
|
||||
output = particle(input_data)
|
||||
loss = F.mse_loss(output, target_data, reduction=reduction)
|
||||
|
||||
train_losses.append(loss)
|
||||
train_losses = torch.hstack(train_losses).sum(dim=-1, keepdim=True)
|
||||
train_losses.backward()
|
||||
optim.step()
|
||||
losses.append(train_losses.detach())
|
||||
losses = torch.hstack(losses).sum(dim=-1, keepdim=True)
|
||||
return losses.detach()
|
||||
return losses
|
||||
|
||||
@property
|
||||
def hyperparams(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user