Second order function
This commit is contained in:
114
experiments/self_train_secondary_exp.py
Normal file
114
experiments/self_train_secondary_exp.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
import pickle
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from experiments.helpers import check_folder, summary_fixpoint_experiment
|
||||||
|
from functionalities_test import test_for_fixpoints
|
||||||
|
from network import SecondaryNet
|
||||||
|
from visualization import plot_loss, bar_chart_fixpoints
|
||||||
|
from visualization import plot_3d_self_train
|
||||||
|
|
||||||
|
|
||||||
|
class SelfTrainExperimentSecondary:
|
||||||
|
def __init__(self, population_size, log_step_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||||
|
epochs, directory: Path) -> None:
|
||||||
|
self.population_size = population_size
|
||||||
|
self.log_step_size = log_step_size
|
||||||
|
self.net_input_size = net_input_size
|
||||||
|
self.net_hidden_size = net_hidden_size
|
||||||
|
self.net_out_size = net_out_size
|
||||||
|
|
||||||
|
self.net_learning_rate = net_learning_rate
|
||||||
|
self.epochs = epochs
|
||||||
|
|
||||||
|
self.loss_history = []
|
||||||
|
|
||||||
|
self.fixpoint_counters = {
|
||||||
|
"identity_func": 0,
|
||||||
|
"divergent": 0,
|
||||||
|
"fix_zero": 0,
|
||||||
|
"fix_weak": 0,
|
||||||
|
"fix_sec": 0,
|
||||||
|
"other_func": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
self.directory_name = Path(directory)
|
||||||
|
self.directory_name.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
self.nets = []
|
||||||
|
# Create population:
|
||||||
|
self.populate_environment()
|
||||||
|
|
||||||
|
self.weights_evolution_3d_experiment()
|
||||||
|
self.count_fixpoints()
|
||||||
|
self.visualize_loss()
|
||||||
|
|
||||||
|
def populate_environment(self):
|
||||||
|
loop_population_size = tqdm(range(self.population_size))
|
||||||
|
for i in loop_population_size:
|
||||||
|
loop_population_size.set_description("Populating ST experiment %s" % i)
|
||||||
|
|
||||||
|
net_name = f"ST_net_{str(i)}"
|
||||||
|
net = SecondaryNet(self.net_input_size, self.net_hidden_size, self.net_out_size, net_name)
|
||||||
|
|
||||||
|
for _ in range(self.epochs):
|
||||||
|
net.self_train(1, self.log_step_size, self.net_learning_rate)
|
||||||
|
|
||||||
|
print(f"\nLast weight matrix (epoch: {self.epochs}):\n{net.input_weight_matrix()}\nLossHistory: {net.loss_history[-10:]}")
|
||||||
|
self.nets.append(net)
|
||||||
|
|
||||||
|
def weights_evolution_3d_experiment(self):
|
||||||
|
exp_name = f"ST_{str(len(self.nets))}_nets_3d_weights_PCA"
|
||||||
|
return plot_3d_self_train(self.nets, exp_name, self.directory_name, self.log_step_size)
|
||||||
|
|
||||||
|
def count_fixpoints(self):
|
||||||
|
test_for_fixpoints(self.fixpoint_counters, self.nets)
|
||||||
|
exp_details = f"Self-train for {self.epochs} epochs"
|
||||||
|
bar_chart_fixpoints(self.fixpoint_counters, self.population_size, self.directory_name, self.net_learning_rate,
|
||||||
|
exp_details)
|
||||||
|
|
||||||
|
def visualize_loss(self):
|
||||||
|
for i in range(len(self.nets)):
|
||||||
|
net_loss_history = self.nets[i].loss_history
|
||||||
|
self.loss_history.append(net_loss_history)
|
||||||
|
|
||||||
|
plot_loss(self.loss_history, self.directory_name)
|
||||||
|
|
||||||
|
|
||||||
|
def run_ST_experiment(population_size, batch_size, net_input_size, net_hidden_size, net_out_size, net_learning_rate,
|
||||||
|
epochs, runs, run_name, name_hash):
|
||||||
|
experiments = {}
|
||||||
|
logging_directory = Path('output') / 'self_training'
|
||||||
|
logging_directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Running the experiments
|
||||||
|
for i in range(runs):
|
||||||
|
experiment_name = f"{run_name}_run_{i}_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||||
|
this_exp_directory = logging_directory / experiment_name
|
||||||
|
ST_experiment = SelfTrainExperimentSecondary(
|
||||||
|
population_size,
|
||||||
|
batch_size,
|
||||||
|
net_input_size,
|
||||||
|
net_hidden_size,
|
||||||
|
net_out_size,
|
||||||
|
net_learning_rate,
|
||||||
|
epochs,
|
||||||
|
this_exp_directory
|
||||||
|
)
|
||||||
|
with (this_exp_directory / 'full_experiment_pickle.p').open('wb') as f:
|
||||||
|
pickle.dump(ST_experiment, f)
|
||||||
|
experiments[i] = ST_experiment
|
||||||
|
|
||||||
|
# Building a summary of all the runs
|
||||||
|
summary_name = f"/summary_{run_name}_{runs}_runs_{str(population_size)}_nets_{epochs}_epochs_{str(name_hash)}"
|
||||||
|
summary_directory_name = logging_directory / summary_name
|
||||||
|
summary_directory_name.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
summary_pre_title = "ST"
|
||||||
|
summary_fixpoint_experiment(runs, population_size, epochs, experiments, net_learning_rate, summary_directory_name,
|
||||||
|
summary_pre_title)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
raise NotImplementedError('Test this here!!!')
|
60
network.py
60
network.py
@@ -3,6 +3,7 @@ import copy
|
|||||||
import random
|
import random
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
@@ -114,7 +115,6 @@ class Net(nn.Module):
|
|||||||
""" Training a network to predict its own weights in order to self-replicate. """
|
""" Training a network to predict its own weights in order to self-replicate. """
|
||||||
|
|
||||||
optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
|
optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
|
||||||
self.trained = True
|
|
||||||
|
|
||||||
for training_step in range(training_steps):
|
for training_step in range(training_steps):
|
||||||
self.number_trained += 1
|
self.number_trained += 1
|
||||||
@@ -145,9 +145,10 @@ class Net(nn.Module):
|
|||||||
self.s_train_weights_history.append(weights.T.detach().numpy())
|
self.s_train_weights_history.append(weights.T.detach().numpy())
|
||||||
self.loss_history.append(loss.detach().numpy().item())
|
self.loss_history.append(loss.detach().numpy().item())
|
||||||
|
|
||||||
|
self.trained = True
|
||||||
return weights.detach().numpy(), loss, self.loss_history
|
return weights.detach().numpy(), loss, self.loss_history
|
||||||
|
|
||||||
def self_application(self, SA_steps: int, log_step_size: Union[int, None] = None):
|
def self_application(self, SA_steps: int, log_step_size: Union[int, None] = None):
|
||||||
""" Inputting the weights of a network to itself for a number of steps, without backpropagation. """
|
""" Inputting the weights of a network to itself for a number of steps, without backpropagation. """
|
||||||
|
|
||||||
for i in range(SA_steps):
|
for i in range(SA_steps):
|
||||||
@@ -190,3 +191,58 @@ class Net(nn.Module):
|
|||||||
self.state_dict()[layer_name][line_id][weight_id] = weight_value - noise_size * prng()
|
self.state_dict()[layer_name][line_id][weight_id] = weight_value - noise_size * prng()
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class SecondaryNet(Net):
|
||||||
|
|
||||||
|
def self_train(self, training_steps: int, log_step_size: int, learning_rate: float) -> (np.ndarray, Tensor, list):
|
||||||
|
""" Training a network to predict its own weights in order to self-replicate. """
|
||||||
|
|
||||||
|
optimizer = optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
|
||||||
|
df = pd.DataFrame(columns=['step', 'loss', 'first_to_target_loss', 'second_to_target_loss', 'second_to_first_loss'])
|
||||||
|
is_diverged = False
|
||||||
|
for training_step in range(training_steps):
|
||||||
|
self.number_trained += 1
|
||||||
|
optimizer.zero_grad()
|
||||||
|
input_data = self.input_weight_matrix()
|
||||||
|
target_data = self.create_target_weights(input_data)
|
||||||
|
|
||||||
|
intermediate_output = self(input_data)
|
||||||
|
second_input = copy.deepcopy(input_data)
|
||||||
|
second_input[:, 0] = intermediate_output.squeeze()
|
||||||
|
|
||||||
|
output = self(second_input)
|
||||||
|
second_to_target_loss = F.mse_loss(output, target_data)
|
||||||
|
first_to_target_loss = F.mse_loss(intermediate_output, target_data * -1)
|
||||||
|
second_to_first_loss = F.mse_loss(intermediate_output, output)
|
||||||
|
if any([torch.isnan(x) or torch.isinf(x) for x in [second_to_first_loss, first_to_target_loss, second_to_target_loss]]):
|
||||||
|
print('is nan')
|
||||||
|
is_diverged = True
|
||||||
|
break
|
||||||
|
|
||||||
|
loss = second_to_target_loss + first_to_target_loss
|
||||||
|
df.loc[df.shape[0]] = [df.shape[0], loss.detach().numpy().item(),
|
||||||
|
first_to_target_loss.detach().numpy().item(),
|
||||||
|
second_to_target_loss.detach().numpy().item(),
|
||||||
|
second_to_first_loss.detach().numpy().item()]
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
self.trained = True
|
||||||
|
return df, is_diverged
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
is_div = True
|
||||||
|
while is_div:
|
||||||
|
net = SecondaryNet(4, 2, 1, "SecondaryNet")
|
||||||
|
data_df, is_div = net.self_train(20000, 25, 1e-4)
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
# data_df = data_df[::-1] # Reverse
|
||||||
|
fig = sns.lineplot(data=data_df[[x for x in data_df.columns if x != 'step']])
|
||||||
|
# fig.set(yscale='log')
|
||||||
|
print(data_df.iloc[-1])
|
||||||
|
print(data_df.iloc[0])
|
||||||
|
plt.show()
|
||||||
|
print("done")
|
||||||
|
Reference in New Issue
Block a user