self-replicating-neural-net.../sparse_tensor_combined.ipynb
2022-02-09 14:35:55 +01:00

499 lines
21 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from network import Net\n",
"import torch\n",
"from typing import List\n",
"from functionalities_test import is_identity_function\n",
"from tqdm import tqdm,trange\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def construct_sparse_COO_layer(nets:List[Net], layer_idx:int) -> torch.Tensor:\n",
" assert layer_idx <= len(list(nets[0].parameters()))\n",
" values = []\n",
" indices = []\n",
" for net_idx,net in enumerate(nets):\n",
" layer = list(net.parameters())[layer_idx]\n",
" \n",
" for cell_idx,cell in enumerate(layer):\n",
" # E.g., position of cell weights (with 2 cells per hidden layer) in first sparse layer of N nets: \n",
" \n",
" # [4x2 weights_net0] [4x2x(n-1) 0s]\n",
" # [4x2 weights] [4x2 weights_net0] [4x2x(n-2) 0s]\n",
" # ... etc\n",
" # [4x2x(n-1) 0s] [4x2 weights_netN]\n",
" \n",
" \n",
" # -> 4x2 weights on the diagonal = [shifted Nr_cellss*B down for AxB cells, and Nr_nets(*A weights)to the right] \n",
" for i in range(len(cell)):\n",
" indices.append([len(layer)*net_idx + cell_idx, net_idx*len(cell) + i ])\n",
" #indices.append([2*net_idx + cell_idx, net_idx*len(cell) + i ])\n",
"\n",
" [values.append(weight) for weight in cell]\n",
"\n",
" # for i in range(4):\n",
" # indices.append([idx+idx+1, i+(idx*4)])\n",
" #for l in next(net.parameters()):\n",
" #[values.append(w) for w in l]\n",
" #print(indices, values)\n",
"\n",
" #s = torch.sparse_coo_tensor(list(zip(*indices)), values, (2*nr_nets, 4*nr_nets))\n",
" # sparse tensor dimension = (nr_cells*nr_nets , nr_weights/cell * nr_nets), i.e.,\n",
" # layer 1: (2x4) -> (2*N, 4*N)\n",
" # layer 2: (2x2) -> (2*N, 2*N)\n",
" # layer 3: (1x2) -> (2*N, 1*N)\n",
" s = torch.sparse_coo_tensor(list(zip(*indices)), values, (len(layer)*nr_nets, len(cell)*nr_nets))\n",
" #print(s.to_dense())\n",
" #print(s.to_dense().shape)\n",
" return s\n",
"\n",
"\n",
"# for each net append to the combined sparse tensor\n",
"# construct sparse tensor for each layer, with Nets of (4,2,1), each net appends\n",
"# - [4x2] weights in the first (input) layer\n",
"# - [2x2] weights in the second (hidden) layer\n",
"# - [2x1] weights in the third (output) layer\n",
"#modules = [ construct_sparse_tensor_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
"#modules\n",
"#for layer_idx in range(len(list(nets[0].parameters()))):\n",
"# sparse_tensor = construct_sparse_tensor_layer(nets, layer_idx)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nr_nets = 50\n",
"nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
"print(f\"before: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")\n",
"\n",
"modules = [ construct_sparse_COO_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
"print( id(list(nets[0].parameters())[0][0,0]) == id(modules[0][0,0]))\n",
"\n",
"loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
"optimizer = torch.optim.SGD([param for net in nets for param in net.parameters()], lr=0.004, momentum=0.9)\n",
"#optimizer = torch.optim.SGD([module for module in modules], lr=0.004, momentum=0.9)\n",
"\n",
"\n",
"for train_iteration in range(1000):\n",
" optimizer.zero_grad() \n",
" X = torch.hstack( [net.input_weight_matrix() for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights, nr_weights)\n",
" Y = torch.hstack( [net.create_target_weights(net.input_weight_matrix()) for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights,1)\n",
" #print(\"X \", X.shape, \"Y\", Y.shape)\n",
"\n",
" modules = [ construct_sparse_COO_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
"\n",
" X1 = torch.sparse.mm(modules[0], X)\n",
" #print(\"X1\", X1.shape, X1)\n",
"\n",
" X2 = torch.sparse.mm(modules[1], X1)\n",
" #print(\"X2\", X2.shape)\n",
"\n",
" X3 = torch.sparse.mm(modules[2], X2)\n",
" #print(\"X3\", X3.shape)\n",
"\n",
" loss = loss_fn(X3, Y)\n",
" #print(loss)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
"print(f\"after {train_iteration+1} iterations of combined self_train: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nr_nets = 500\n",
"nets = [Net(5,2,1) for _ in range(nr_nets)]\n",
"loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
"rounds = 1000\n",
"\n",
"for net in tqdm(nets):\n",
" optimizer = torch.optim.SGD(net.parameters(), lr=0.004, momentum=0.9)\n",
" for i in range(rounds):\n",
" optimizer.zero_grad()\n",
" input_data = net.input_weight_matrix()\n",
" target_data = net.create_target_weights(input_data)\n",
" output = net(input_data)\n",
" loss = loss_fn(output, target_data)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
"sum([is_identity_function(net) for net in nets])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def construct_sparse_CRS_layer(nets:List[Net], layer_idx:int) -> torch.Tensor:\n",
" assert layer_idx <= len(list(nets[0].parameters()))\n",
" \n",
" s = torch.cat( [\n",
" torch.cat(\n",
" (\n",
" torch.zeros(( len(list(net.parameters())[layer_idx]) ,len(list(net.parameters())[layer_idx][0])*net_idx)), \n",
" list(net.parameters())[layer_idx], \n",
" torch.zeros((len(list(net.parameters())[layer_idx]), len(list(net.parameters())[layer_idx][0])*(len(nets)-(net_idx+1))))\n",
" )\n",
" , dim=1) for net_idx, net in enumerate(nets)\n",
" ]).to_sparse_csr()\n",
"\n",
" print(s.shape)\n",
" return s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nr_nets = 5\n",
"nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
"print(f\"before: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")\n",
"\n",
"#modules = [ construct_sparse_tensor_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
"print( id(list(nets[0].parameters())[0][0,0]) == id(modules[0][0,0]))\n",
"\n",
"loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
"optimizer = torch.optim.SGD([param for net in nets for param in net.parameters()], lr=0.004, momentum=0.9)\n",
"#optimizer = torch.optim.SGD([module for module in modules], lr=0.004, momentum=0.9)\n",
"\n",
"\n",
"for train_iteration in range(1):\n",
" optimizer.zero_grad() \n",
" X = torch.hstack( [net.input_weight_matrix() for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights, nr_weights)\n",
" Y = torch.hstack( [net.create_target_weights(net.input_weight_matrix()) for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights,1)\n",
" #print(\"X \", X.shape, \"Y\", Y.shape)\n",
"\n",
" num_layers = len(list(nets[0].parameters()))\n",
" modules = [ construct_sparse_CRS_layer(nets, layer_idx) for layer_idx in range(num_layers)]\n",
"\n",
" X1 = modules[0].matmul(X)\n",
" print(\"X1\", X1.shape, X1.is_sparse)\n",
"\n",
" X2 = modules[1].matmul(X1)\n",
" print(\"X2\", X2.shape, X2.is_sparse)\n",
"\n",
" X3 = modules[2].matmul(X2)\n",
" print(\"X3\", X3.shape, X3.is_sparse)\n",
"\n",
" loss = loss_fn(X3, Y)\n",
" #print(loss)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
"print(f\"after {train_iteration+1} iterations of combined self_train: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nr_nets = 2\n",
"nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
"\n",
"def cat_COO_layer(nets, layer_idx):\n",
" i = [[0,i] for i in range(nr_nets*len(list(net.parameters())[layer_idx]))]\n",
" v = torch.cat( [\n",
" torch.cat(\n",
" (\n",
" torch.zeros(( len(list(net.parameters())[layer_idx]) ,len(list(net.parameters())[layer_idx][0])*net_idx)), \n",
" list(net.parameters())[layer_idx], \n",
" torch.zeros((len(list(net.parameters())[layer_idx]), len(list(net.parameters())[layer_idx][0])*(len(nets)-(net_idx+1))))\n",
" )\n",
" , dim=1) for net_idx, net in enumerate(nets)\n",
" ])\n",
" #print(i,v)\n",
" s = torch.sparse_coo_tensor(list(zip(*i)), v)\n",
" print(s[0].to_dense().shape, s[0].is_sparse)\n",
" return s[0]\n",
"\n",
"cat_COO_layer(nets, 0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nr_nets = 5\n",
"nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
"print(f\"before: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")\n",
"\n",
"nr_layers = len(list(nets[0].parameters()))\n",
"modules = [ cat_COO_layer(nets, layer_idx) for layer_idx in range(nr_layers) ]\n",
"\n",
"loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
"optimizer = torch.optim.SGD([param for net in nets for param in net.parameters()], lr=0.004, momentum=0.9)\n",
"#optimizer = torch.optim.SGD([module for module in modules], lr=0.004, momentum=0.9)\n",
"\n",
"\n",
"for train_iteration in range(1):\n",
" optimizer.zero_grad() \n",
" X = torch.hstack( [net.input_weight_matrix() for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights, nr_weights)\n",
" Y = torch.hstack( [net.create_target_weights(net.input_weight_matrix()) for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights,1)\n",
" print(\"X \", X.shape, \"Y\", Y.shape)\n",
"\n",
" X1 = torch.sparse.mm(modules[0], X)\n",
" print(\"X1\", X1.shape)\n",
"\n",
" X2 = torch.sparse.mm(modules[1], X1)\n",
" print(\"X2\", X2.shape)\n",
"\n",
" X3 = torch.sparse.mm(modules[2], X2)\n",
" print(\"X3\", X3.shape)\n",
"\n",
" loss = loss_fn(X3, Y)\n",
" #print(loss)\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
"print(f\"after {train_iteration+1} iterations of combined self_train: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class SparseLayer():\n",
" def __init__(self, nr_nets, interface=5, depth=3, width=2, out=1):\n",
" self.nr_nets = nr_nets\n",
" self.interface_dim = interface\n",
" self.depth_dim = depth\n",
" self.hidden_dim = width\n",
" self.out_dim = out\n",
" self.dummy_net = Net(self.interface_dim, self.hidden_dim, self.out_dim)\n",
" \n",
" self.sparse_sub_layer = []\n",
" self.weights = []\n",
" for layer_id in range(depth):\n",
" layer, weights = self.coo_sparse_layer(layer_id)\n",
" self.sparse_sub_layer.append(layer)\n",
" self.weights.append(weights)\n",
" \n",
" def coo_sparse_layer(self, layer_id):\n",
" layer_shape = list(self.dummy_net.parameters())[layer_id].shape\n",
" #print(layer_shape) #(out_cells, in_cells) -> (2,5), (2,2), (1,2)\n",
"\n",
" sparse_diagonal = np.eye(self.nr_nets).repeat(layer_shape[0], axis=-2).repeat(layer_shape[1], axis=-1)\n",
" indices = np.argwhere(sparse_diagonal == 1).T\n",
" values = torch.nn.Parameter(torch.randn((self.nr_nets * (layer_shape[0]*layer_shape[1]) )))\n",
" #values = torch.randn((self.nr_nets * layer_shape[0]*layer_shape[1] ))\n",
" s = torch.sparse_coo_tensor(indices, values, sparse_diagonal.shape, requires_grad=True)\n",
" print(f\"L{layer_id}:\", s.shape)\n",
" return s, values\n",
"\n",
" def get_self_train_inputs_and_targets(self):\n",
" encoding_matrix, mask = self.dummy_net._weight_pos_enc\n",
"\n",
" # view weights of each sublayer in equal chunks, each column representing weights of one selfrepNN\n",
" # i.e., first interface*hidden weights of layer1, first hidden*hidden weights of layer2 and first hidden*out weights of layer3 = first net\n",
" weights = [layer.view(-1, int(len(layer)/self.nr_nets)) for layer in self.weights] #[nr_layers*[nr_net*nr_weights_layer_i]]\n",
" weights_per_net = [torch.cat([layer[i] for layer in weights]).view(-1,1) for i in range(self.nr_nets)] #[nr_net*[nr_weights]]\n",
" inputs = torch.hstack([encoding_matrix * mask + weights_per_net[i].expand(-1, encoding_matrix.shape[-1]) * (1 - mask) for i in range(self.nr_nets)]) #(16, 25)\n",
" targets = torch.hstack(weights_per_net)\n",
" return inputs.T, targets.T\n",
"\n",
" def __call__(self, x):\n",
" X1 = torch.sparse.mm(self.sparse_sub_layer[0], x)\n",
" #print(\"X1\", X1.shape)\n",
"\n",
" X2 = torch.sparse.mm(self.sparse_sub_layer[1], X1)\n",
" #print(\"X2\", X2.shape)\n",
"\n",
" X3 = torch.sparse.mm(self.sparse_sub_layer[2], X2)\n",
" #print(\"X3\", X3.shape)\n",
" \n",
" return X3\n",
"\n",
"net = SparseLayer(5)\n",
"loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
"optimizer = torch.optim.SGD([weight for weight in net.weights], lr=0.004, momentum=0.9)\n",
"#optimizer = torch.optim.SGD([layer for layer in net.sparse_sub_layer], lr=0.004, momentum=0.9)\n",
"\n",
"for train_iteration in trange(10):\n",
" optimizer.zero_grad() \n",
" X,Y = net.get_self_train_inputs_and_targets()\n",
" out = net(X)\n",
" \n",
" loss = loss_fn(out, Y)\n",
"\n",
" # print(\"X:\", X.shape, \"Y:\", Y.shape)\n",
" # print(\"OUT\", out.shape)\n",
" # print(\"LOSS\", loss.item())\n",
" \n",
" loss.backward(retain_graph=True)\n",
" optimizer.step()\n",
"\n",
" \n",
"\n",
"epsilon=pow(10, -5)\n",
"# is the (the whole layer) self-replicating? -> wrong\n",
"#print(torch.allclose(out, Y,rtol=0, atol=epsilon))\n",
"\n",
"# is each of the networks self-replicating?\n",
"print(f\"identity_fn after {train_iteration+1} self-train iterations: {sum([torch.allclose(out[i], Y[i], rtol=0, atol=epsilon) for i in range(net.nr_nets)])}/{net.nr_nets}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# for layer in net.weights:\n",
"# n=int(len(layer)/net.nr_nets)\n",
"# print( [layer[i:i+n] for i in range(0, len(layer), n)])\n",
"\n",
"encoding_matrix, mask = Net(5,2,1)._weight_pos_enc\n",
"print(encoding_matrix, mask)\n",
"# view weights of each sublayer in equal chunks, each column representing weights of one selfrepNN\n",
"# i.e., first interface*hidden weights of layer1, first hidden*hidden weights of layer2 and first hidden*out weights of layer3 = first net\n",
"weights = [layer.view(-1, int(len(layer)/net.nr_nets)) for layer in net.weights]\n",
"weights_per_net = [torch.cat([layer[i] for layer in weights]).view(-1,1) for i in range(net.nr_nets)]\n",
"\n",
"inputs = torch.hstack([encoding_matrix * mask + weights_per_net[i].expand(-1, encoding_matrix.shape[-1]) * (1 - mask) for i in range(net.nr_nets)]) #16, 25\n",
"\n",
"targets = torch.hstack(weights_per_net)\n",
"targets.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from pathlib import Path\n",
"import torch\n",
"from torch.nn import Flatten\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from torchvision.datasets import MNIST\n",
"from torchvision.transforms import ToTensor, Compose, Resize\n",
"from tqdm import tqdm, trange\n",
"\n",
"\n",
"utility_transforms = Compose([ Resize((10, 10)), ToTensor(), Flatten(start_dim=0)])\n",
"data_path = Path('data')\n",
"WORKER = 8\n",
"BATCHSIZE = 10\n",
"EPOCH = 1\n",
"DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"\n",
"dataset = MNIST(str(data_path), transform=utility_transforms)\n",
"d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def embed_batch(x, repeat_dim):\n",
" # x of shape (batchsize, flat_img_dim)\n",
" x = x.unsqueeze(-1) #(batchsize, flat_img_dim, 1)\n",
" return torch.cat( (torch.zeros( x.shape[0], x.shape[1], 4), x), dim=2).repeat(1,1,repeat_dim) #(batchsize, flat_img_dim, encoding_dim*repeat_dim)\n",
"\n",
"def embed_vector(x, repeat_dim):\n",
" # x of shape [flat_img_dim]\n",
" x = x.unsqueeze(-1) #(flat_img_dim, 1)\n",
" return torch.cat( (torch.zeros( x.shape[0], 4), x), dim=1).repeat(1,repeat_dim) #(flat_img_dim, encoding_dim*repeat_dim)\n",
"\n",
"class SparseNetwork():\n",
" def __init__(self, input_dim, depth, width, out):\n",
" self.input_dim = input_dim\n",
" self.depth_dim = depth\n",
" self.hidden_dim = width\n",
" self.out_dim = out\n",
" self.sparse_layers = []\n",
" self.sparse_layers.append( SparseLayer( self.input_dim * self.hidden_dim ))\n",
" self.sparse_layers.extend([ SparseLayer( self.hidden_dim * self.hidden_dim ) for layer_idx in range(self.depth_dim - 2)])\n",
" self.sparse_layers.append( SparseLayer( self.hidden_dim * self.out_dim ))\n",
"\n",
" def __call__(self, x):\n",
" \n",
" for sparse_layer in self.sparse_layers[:-1]:\n",
" # batch pass (one by one, sparse bmm doesn't support grad)\n",
" if len(x.shape) > 1:\n",
" embedded_inpt = embed_batch(x, sparse_layer.nr_nets)\n",
" x = torch.stack([sparse_layer(inpt.T).sum(dim=1).view(self.hidden_dim, x.shape[1]).sum(dim=1) for inpt in embedded_inpt]) #[batchsize, hidden*inpt_dim, feature_dim]\n",
" # vector\n",
" else:\n",
" embedded_inpt = embed_vector(x, sparse_layer.nr_nets)\n",
" x = sparse_layer(embedded_inpt.T).sum(dim=1).view(self.hidden_dim, x.shape[1]).sum(dim=1)\n",
" print(\"out\", x.shape)\n",
" \n",
" # output layer\n",
" sparse_layer = self.sparse_layers[-1]\n",
" if len(x.shape) > 1:\n",
" embedded_inpt = embed_batch(x, sparse_layer.nr_nets)\n",
" x = torch.stack([sparse_layer(inpt.T).sum(dim=1).view(self.out_dim, x.shape[1]).sum(dim=1) for inpt in embedded_inpt]) #[batchsize, hidden*inpt_dim, feature_dim]\n",
" else:\n",
" embedded_inpt = embed_vector(x, sparse_layer.nr_nets)\n",
" x = sparse_layer(embedded_inpt.T).sum(dim=1).view(self.out_dim, x.shape[1]).sum(dim=1)\n",
" print(\"out\", x.shape)\n",
" return x\n",
"\n",
"data_dim = np.prod(dataset[0][0].shape)\n",
"metanet = SparseNetwork(data_dim, depth=3, width=5, out=10)\n",
"batchx, batchy = next(iter(d))\n",
"batchx.shape, batchy.shape\n",
"metanet(batchx)"
]
}
],
"metadata": {
"interpreter": {
"hash": "8bcba732c17ca4dacffea8ad1176c852d4229b36b9060a5f633fff752e5396ea"
},
"kernelspec": {
"display_name": "Python 3.8.12 64-bit ('masterthesis': conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}