network test
This commit is contained in:
@ -279,9 +279,12 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
self_train = True
|
self_train = True
|
||||||
training = True
|
training = True
|
||||||
train_to_id_first = False
|
train_to_id_first = True
|
||||||
train_to_task_first = False
|
train_to_task_first = False
|
||||||
train_to_task_first_sequential = True
|
train_to_task_first_sequential = False
|
||||||
|
force_st_for_n_from_last_epochs = 5
|
||||||
|
|
||||||
|
use_sparse_network = False
|
||||||
|
|
||||||
tsk_threshold = 0.855
|
tsk_threshold = 0.855
|
||||||
self_train_alpha = 1
|
self_train_alpha = 1
|
||||||
@ -299,8 +302,11 @@ if __name__ == '__main__':
|
|||||||
res_str = f'{"" if residual_skip else "_no_res"}'
|
res_str = f'{"" if residual_skip else "_no_res"}'
|
||||||
# dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
|
# dr_str = f'{f"_dr_{dropout}" if dropout != 0 else ""}'
|
||||||
id_str = f'{f"_StToId" if train_to_id_first else ""}'
|
id_str = f'{f"_StToId" if train_to_id_first else ""}'
|
||||||
tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first else ""}'
|
tsk_str = f'{f"_Tsk_{tsk_threshold}" if train_to_task_first and tsk_threshold != 1 else ""}'
|
||||||
exp_path = Path('output') / f'mn_{st_str}_{EPOCH}_{weight_hidden_size}{a_str}{res_str}{id_str}{tsk_str}'
|
f_str = f'_f_{force_st_for_n_from_last_epochs}' if \
|
||||||
|
force_st_for_n_from_last_epochs and train_to_task_first_sequential and train_to_task_first \
|
||||||
|
else ""
|
||||||
|
exp_path = Path('output') / f'mn_{st_str}_{EPOCH}_{weight_hidden_size}{a_str}{res_str}{id_str}{tsk_str}{f_str}'
|
||||||
|
|
||||||
for seed in range(n_seeds):
|
for seed in range(n_seeds):
|
||||||
seed_path = exp_path / str(seed)
|
seed_path = exp_path / str(seed)
|
||||||
@ -309,6 +315,8 @@ if __name__ == '__main__':
|
|||||||
df_store_path = seed_path / 'train_store.csv'
|
df_store_path = seed_path / 'train_store.csv'
|
||||||
weight_store_path = seed_path / 'weight_store.csv'
|
weight_store_path = seed_path / 'weight_store.csv'
|
||||||
srnn_parameters = dict()
|
srnn_parameters = dict()
|
||||||
|
for path in [model_path, df_store_path, weight_store_path]:
|
||||||
|
assert not path.exists(), f'Path "{path}" already exists. Check your configuration!'
|
||||||
|
|
||||||
if training:
|
if training:
|
||||||
utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
utility_transforms = Compose([ToTensor(), ToFloat(), Resize((15, 15)), Flatten(start_dim=0)])
|
||||||
@ -319,15 +327,18 @@ if __name__ == '__main__':
|
|||||||
d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
|
d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)
|
||||||
|
|
||||||
interface = np.prod(dataset[0][0].shape)
|
interface = np.prod(dataset[0][0].shape)
|
||||||
sparse_metanet = SparseNetwork(interface, depth=5, width=6, out=10, residual_skip=residual_skip,
|
|
||||||
weight_hidden_size=weight_hidden_size,).to(DEVICE)
|
|
||||||
dense_metanet = MetaNet(interface, depth=5, width=6, out=10, residual_skip=residual_skip,
|
dense_metanet = MetaNet(interface, depth=5, width=6, out=10, residual_skip=residual_skip,
|
||||||
weight_hidden_size=weight_hidden_size,).to(DEVICE)
|
weight_hidden_size=weight_hidden_size,).to(DEVICE)
|
||||||
|
sparse_metanet = SparseNetwork(interface, depth=5, width=6, out=10, residual_skip=residual_skip,
|
||||||
|
weight_hidden_size=weight_hidden_size
|
||||||
|
).to(DEVICE) if use_sparse_network else dense_metanet
|
||||||
meta_weight_count = sum(p.numel() for p in next(dense_metanet.particles).parameters())
|
meta_weight_count = sum(p.numel() for p in next(dense_metanet.particles).parameters())
|
||||||
|
|
||||||
loss_fn = nn.CrossEntropyLoss()
|
loss_fn = nn.CrossEntropyLoss()
|
||||||
dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.008, momentum=0.9)
|
dense_optimizer = torch.optim.SGD(dense_metanet.parameters(), lr=0.008, momentum=0.9)
|
||||||
sparse_optimizer = torch.optim.SGD(sparse_metanet.parameters(), lr=0.008, momentum=0.9)
|
sparse_optimizer = torch.optim.SGD(
|
||||||
|
sparse_metanet.parameters(), lr=0.008, momentum=0.9
|
||||||
|
) if use_sparse_network else dense_optimizer
|
||||||
|
|
||||||
train_store = new_storage_df('train', None)
|
train_store = new_storage_df('train', None)
|
||||||
weight_store = new_storage_df('weights', meta_weight_count)
|
weight_store = new_storage_df('weights', meta_weight_count)
|
||||||
@ -341,12 +352,18 @@ if __name__ == '__main__':
|
|||||||
metric = torchmetrics.Accuracy()
|
metric = torchmetrics.Accuracy()
|
||||||
else:
|
else:
|
||||||
metric = None
|
metric = None
|
||||||
init_st = train_to_id_first and not all(x.is_fixpoint == ft.identity_func for x in dense_metanet.particles)
|
init_st = train_to_id_first and not all(
|
||||||
|
x.is_fixpoint == ft.identity_func for x in dense_metanet.particles
|
||||||
|
)
|
||||||
|
force_st = (force_st_for_n_from_last_epochs >= (EPOCH - epoch)
|
||||||
|
) and train_to_task_first_sequential and force_st_for_n_from_last_epochs
|
||||||
for batch, (batch_x, batch_y) in tqdm(enumerate(d), total=len(d), desc='MetaNet Train - Batch'):
|
for batch, (batch_x, batch_y) in tqdm(enumerate(d), total=len(d), desc='MetaNet Train - Batch'):
|
||||||
# Self Train
|
# Self Train
|
||||||
if self_train and not init_tsk and (is_self_train_epoch or init_st):
|
|
||||||
|
if self_train and ((not init_tsk and (is_self_train_epoch or init_st)) or force_st):
|
||||||
# Transfer weights
|
# Transfer weights
|
||||||
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
if use_sparse_network:
|
||||||
|
sparse_metanet = sparse_metanet.replace_weights_by_particles(dense_metanet.particles)
|
||||||
# Zero your gradients for every batch!
|
# Zero your gradients for every batch!
|
||||||
sparse_optimizer.zero_grad()
|
sparse_optimizer.zero_grad()
|
||||||
self_train_loss = sparse_metanet.combined_self_train() * self_train_alpha
|
self_train_loss = sparse_metanet.combined_self_train() * self_train_alpha
|
||||||
@ -357,7 +374,8 @@ if __name__ == '__main__':
|
|||||||
Metric='Self Train Loss', Score=self_train_loss.item())
|
Metric='Self Train Loss', Score=self_train_loss.item())
|
||||||
train_store.loc[train_store.shape[0]] = step_log
|
train_store.loc[train_store.shape[0]] = step_log
|
||||||
# Transfer weights
|
# Transfer weights
|
||||||
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
if use_sparse_network:
|
||||||
|
dense_metanet = dense_metanet.replace_particles(sparse_metanet.particle_weights)
|
||||||
if not init_st:
|
if not init_st:
|
||||||
# Zero your gradients for every batch!
|
# Zero your gradients for every batch!
|
||||||
dense_optimizer.zero_grad()
|
dense_optimizer.zero_grad()
|
||||||
@ -381,7 +399,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if is_validation_epoch:
|
if is_validation_epoch:
|
||||||
dense_metanet = dense_metanet.eval()
|
dense_metanet = dense_metanet.eval()
|
||||||
if train_to_id_first <= epoch:
|
if not init_st:
|
||||||
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
validation_log = dict(Epoch=int(epoch), Batch=BATCHSIZE,
|
||||||
Metric='Train Accuracy', Score=metric.compute().item())
|
Metric='Train Accuracy', Score=metric.compute().item())
|
||||||
train_store.loc[train_store.shape[0]] = validation_log
|
train_store.loc[train_store.shape[0]] = validation_log
|
||||||
@ -438,9 +456,14 @@ if __name__ == '__main__':
|
|||||||
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
print(f'Found Models are: {list(seed_path.rglob(".tp"))}')
|
||||||
exit(1)
|
exit(1)
|
||||||
latest_model = torch.load(model_path, map_location=DEVICE).eval()
|
latest_model = torch.load(model_path, map_location=DEVICE).eval()
|
||||||
|
try:
|
||||||
run_particle_dropout_and_plot(seed_path)
|
run_particle_dropout_and_plot(seed_path)
|
||||||
plot_network_connectivity_by_fixtype(model_path)
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
try:
|
||||||
|
plot_network_connectivity_by_fixtype(model_path)
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
if n_seeds >= 2:
|
if n_seeds >= 2:
|
||||||
pass
|
pass
|
||||||
|
@ -48,7 +48,7 @@ if __name__ == '__main__':
|
|||||||
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
WORKER = 0
|
WORKER = 0
|
||||||
BATCHSIZE = 500
|
BATCHSIZE = 500
|
||||||
MNIST_TRANSFORM = Compose([ Resize((15, 15)), ToTensor(), Normalize((0.1307,), (0.3081,)), Flatten(start_dim=0)])
|
MNIST_TRANSFORM = Compose([Resize((15, 15)), ToTensor(), Normalize((0.1307,), (0.3081,)), Flatten(start_dim=0)])
|
||||||
torch.manual_seed(42)
|
torch.manual_seed(42)
|
||||||
data_path = Path('data')
|
data_path = Path('data')
|
||||||
data_path.mkdir(exist_ok=True, parents=True)
|
data_path.mkdir(exist_ok=True, parents=True)
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
|
||||||
|
import functionalities_test
|
||||||
from network import Net
|
from network import Net
|
||||||
from functionalities_test import is_identity_function
|
from functionalities_test import is_identity_function
|
||||||
from tqdm import tqdm,trange
|
from tqdm import tqdm,trange
|
||||||
@ -118,12 +121,12 @@ class SparseLayer(nn.Module):
|
|||||||
def test_sparse_layer():
|
def test_sparse_layer():
|
||||||
net = SparseLayer(500) #50 parallel nets
|
net = SparseLayer(500) #50 parallel nets
|
||||||
loss_fn = torch.nn.MSELoss(reduction="sum")
|
loss_fn = torch.nn.MSELoss(reduction="sum")
|
||||||
optimizer = torch.optim.SGD(net.weights, lr=0.004, momentum=0.9)
|
optimizer = torch.optim.SGD(net.parameters(), lr=0.004, momentum=0.9)
|
||||||
# optimizer = torch.optim.SGD([layer.coalesce().values() for layer in net.sparse_sub_layer], lr=0.004, momentum=0.9)
|
# optimizer = torch.optim.SGD([layer.coalesce().values() for layer in net.sparse_sub_layer], lr=0.004, momentum=0.9)
|
||||||
|
|
||||||
for train_iteration in trange(1000):
|
for train_iteration in trange(1000):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
X,Y = net.get_self_train_inputs_and_targets()
|
X, Y = net.get_self_train_inputs_and_targets()
|
||||||
out = net(X)
|
out = net(X)
|
||||||
|
|
||||||
loss = loss_fn(out, Y)
|
loss = loss_fn(out, Y)
|
||||||
@ -132,10 +135,10 @@ def test_sparse_layer():
|
|||||||
# print("OUT", out.shape)
|
# print("OUT", out.shape)
|
||||||
# print("LOSS", loss.item())
|
# print("LOSS", loss.item())
|
||||||
|
|
||||||
loss.backward(retain_graph=True)
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
epsilon=pow(10, -5)
|
epsilon = pow(10, -5)
|
||||||
# is each of the networks self-replicating?
|
# is each of the networks self-replicating?
|
||||||
print(f"identity_fn after {train_iteration+1} self-train iterations: {sum([torch.allclose(out[i], Y[i], rtol=0, atol=epsilon) for i in range(net.nr_nets)])}/{net.nr_nets}")
|
print(f"identity_fn after {train_iteration+1} self-train iterations: {sum([torch.allclose(out[i], Y[i], rtol=0, atol=epsilon) for i in range(net.nr_nets)])}/{net.nr_nets}")
|
||||||
|
|
||||||
@ -261,6 +264,26 @@ def test_sparse_net():
|
|||||||
metanet = SparseNetwork(data_dim, depth=3, width=5, out=10)
|
metanet = SparseNetwork(data_dim, depth=3, width=5, out=10)
|
||||||
batchx, batchy = next(iter(d))
|
batchx, batchy = next(iter(d))
|
||||||
metanet(batchx)
|
metanet(batchx)
|
||||||
|
print(f"identity_fn after {train_iteration+1} self-train iterations: {sum([torch.allclose(out[i], Y[i], rtol=0, atol=epsilon) for i in range(net.nr_nets)])}/{net.nr_nets}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_sparse_net_sef_train():
|
||||||
|
net = SparseNetwork(30, 5, 6, 10)
|
||||||
|
optimizer = torch.optim.SGD(net.parameters(), lr=0.008, momentum=0.9)
|
||||||
|
epochs = 120
|
||||||
|
|
||||||
|
for _ in trange(epochs):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss = net.combined_self_train()
|
||||||
|
|
||||||
|
loss.backward(retain_graph=True)
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# is each of the networks self-replicating?
|
||||||
|
counter = defaultdict(lambda: 0)
|
||||||
|
id_functions = functionalities_test.test_for_fixpoints(counter, list(net.particles))
|
||||||
|
counter = dict(counter)
|
||||||
|
print(f"identity_fn after {epochs+1} self-train epochs: {counter}")
|
||||||
|
|
||||||
|
|
||||||
def test_manual_for_loop():
|
def test_manual_for_loop():
|
||||||
@ -284,7 +307,8 @@ def test_manual_for_loop():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_sparse_layer()
|
# test_sparse_layer()
|
||||||
|
test_sparse_net_sef_train()
|
||||||
# test_sparse_net()
|
# test_sparse_net()
|
||||||
# for comparison
|
# for comparison
|
||||||
test_manual_for_loop()
|
# test_manual_for_loop()
|
Reference in New Issue
Block a user