From df182d652ac684bb15ba480c8ae7116a60c21566 Mon Sep 17 00:00:00 2001
From: Maximilian Zorn <m.zorn@campus.lmu.de>
Date: Fri, 4 Feb 2022 16:48:19 +0100
Subject: [PATCH] sparse tensor combined train draft notebook

---
 sparse_tensor_combined.ipynb | 242 +++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 sparse_tensor_combined.ipynb

diff --git a/sparse_tensor_combined.ipynb b/sparse_tensor_combined.ipynb
new file mode 100644
index 0000000..e9eb95b
--- /dev/null
+++ b/sparse_tensor_combined.ipynb
@@ -0,0 +1,242 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 222,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from network import Net\n",
+    "import torch\n",
+    "from typing import List\n",
+    "from functionalities_test import is_identity_function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 255,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nr_nets = 5\n",
+    "nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
+    "\n",
+    "loss_fn = torch.nn.MSELoss()\n",
+    "optimizer = torch.optim.SGD([param for net in nets for param in net.parameters()], lr=0.004, momentum=0.9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 256,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 256,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sum([is_identity_function(net) for net in nets])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 247,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(torch.Size([14, 20]), torch.Size([14, 5]))"
+      ]
+     },
+     "execution_count": 247,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X = torch.hstack( [net.input_weight_matrix() for net in nets] ) #(nr_nets*nr_weights, nr_weights)\n",
+    "Y = torch.hstack( [net.create_target_weights(net.input_weight_matrix()) for net in nets] ) #(nr_nets*nr_weights,1)\n",
+    "X.shape, Y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 270,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[tensor(indices=tensor([[ 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,\n",
+       "                          3,  3,  4,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,\n",
+       "                          7,  7,  7,  7,  8,  8,  8,  8,  9,  9,  9,  9],\n",
+       "                        [ 0,  1,  2,  3,  0,  1,  2,  3,  4,  5,  6,  7,  4,  5,\n",
+       "                          6,  7,  8,  9, 10, 11,  8,  9, 10, 11, 12, 13, 14, 15,\n",
+       "                         12, 13, 14, 15, 16, 17, 18, 19, 16, 17, 18, 19]]),\n",
+       "        values=tensor([-0.0282,  0.1612,  0.0717, -0.1370, -0.0789,  0.0990,\n",
+       "                       -0.0642,  0.1385, -0.1046,  0.1522, -0.0691,  0.0848,\n",
+       "                       -0.1419, -0.0465, -0.0385,  0.1453, -0.0263,  0.1401,\n",
+       "                        0.0758,  0.1022,  0.1218, -0.1423,  0.0556,  0.0150,\n",
+       "                        0.0598, -0.0347, -0.0717,  0.1173,  0.0126, -0.0164,\n",
+       "                       -0.0359,  0.0895,  0.1545, -0.1091,  0.0925,  0.0687,\n",
+       "                        0.1330,  0.1297,  0.0305,  0.1811]),\n",
+       "        size=(10, 20), nnz=40, layout=torch.sparse_coo, requires_grad=True),\n",
+       " tensor(indices=tensor([[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9,\n",
+       "                         9],\n",
+       "                        [0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 8,\n",
+       "                         9]]),\n",
+       "        values=tensor([-0.1608,  0.0952,  0.0369,  0.0105, -0.0277,  0.0216,\n",
+       "                        0.0991,  0.1250,  0.0618,  0.2241,  0.0602,  0.1144,\n",
+       "                       -0.0330, -0.1240,  0.0456, -0.1208, -0.1859,  0.1333,\n",
+       "                        0.1235, -0.1774]),\n",
+       "        size=(10, 10), nnz=20, layout=torch.sparse_coo, requires_grad=True),\n",
+       " tensor(indices=tensor([[0, 0, 1, 1, 2, 2, 3, 3, 4, 4],\n",
+       "                        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]),\n",
+       "        values=tensor([ 0.0585,  0.1856, -0.0987,  0.2342, -0.0376,  0.0765,\n",
+       "                       -0.1395,  0.1574, -0.0103, -0.0933]),\n",
+       "        size=(5, 10), nnz=10, layout=torch.sparse_coo, requires_grad=True)]"
+      ]
+     },
+     "execution_count": 270,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def construct_sparse_tensor_layer(nets:List[Net], layer_idx:int) -> torch.Tensor:\n",
+    "    assert layer_idx <= len(list(nets[0].parameters()))\n",
+    "    values = []\n",
+    "    indices = []\n",
+    "    for net_idx,net in enumerate(nets):\n",
+    "        layer = list(net.parameters())[layer_idx]\n",
+    "        \n",
+    "        for cell_idx,cell in enumerate(layer):\n",
+    "            # E.g., position of cell weights (with 2 cells per hidden layer) in first sparse layer of N nets: \n",
+    "            # [4x2 weights_net0] [4x2x(n-1) 0s]\n",
+    "            # [4x2 weights] [4x2 weights_net0] [4x2x(n-2) 0s]\n",
+    "            # ... etc\n",
+    "            # [4x2x(n-1) 0s] [4x2 weights_netN]\n",
+    "            # -> 4x2 weights on the diagonal = [shifted Nr_cellss*B down for AxB cells, and Nr_nets(*A weights)to the right] \n",
+    "            for i in range(len(cell)):\n",
+    "                indices.append([len(layer)*net_idx + cell_idx,    net_idx*len(cell) + i ])\n",
+    "                #indices.append([2*net_idx + cell_idx,    net_idx*len(cell) + i ])\n",
+    "\n",
+    "            [values.append(weight) for weight in cell]\n",
+    "            # for i in range(4):\n",
+    "            #     indices.append([idx+idx+1,  i+(idx*4)])\n",
+    "            #for l in next(net.parameters()):\n",
+    "            #[values.append(w) for w in l]\n",
+    "    #print(indices, values)\n",
+    "\n",
+    "    #s = torch.sparse_coo_tensor(list(zip(*indices)), values, (2*nr_nets, 4*nr_nets))\n",
+    "    # sparse tensor dimension = (nr_cells*nr_nets , nr_weights/cell * nr_nets), i.e.,\n",
+    "    # layer 1: (2x4) -> (2*N, 4*N)\n",
+    "    # layer 2: (2x2) -> (2*N, 2*N)\n",
+    "    # layer 3: (1x2) -> (2*N, 1*N)\n",
+    "    s = torch.sparse_coo_tensor(list(zip(*indices)), values, (len(layer)*nr_nets, len(cell)*nr_nets),requires_grad=True)\n",
+    "    #print(s.to_dense())\n",
+    "    #print(s.to_dense().shape)\n",
+    "    return s\n",
+    "\n",
+    "\n",
+    "# for each net append to the combined sparse tensor\n",
+    "# construct sparse tensor for each layer, with Nets of (4,2,1), each net appends\n",
+    "# - [4x2] weights in the first (input) layer\n",
+    "# - [2x2] weights in the second (hidden) layer\n",
+    "# - [2x1] weights in the third (output) layer\n",
+    "modules = [ construct_sparse_tensor_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
+    "modules\n",
+    "#for layer_idx in range(len(list(nets[0].parameters()))):\n",
+    "#    sparse_tensor = construct_sparse_tensor_layer(nets, layer_idx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 295,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "before: 0/5000 identity_fns\n",
+      "after 1 iterations of combined self_train: 0/5000 identity_fns\n"
+     ]
+    }
+   ],
+   "source": [
+    "nr_nets = 5000\n",
+    "nets = [Net(4,2,1) for _ in range(nr_nets)]\n",
+    "print(f\"before: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")\n",
+    "\n",
+    "loss_fn = torch.nn.MSELoss(reduction=\"sum\")\n",
+    "optimizer = torch.optim.SGD([param for net in nets for param in net.parameters()], lr=0.004, momentum=0.9)\n",
+    "\n",
+    "\n",
+    "for train_iteration in range(1):\n",
+    "    optimizer.zero_grad()  \n",
+    "    X = torch.hstack( [net.input_weight_matrix() for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights, nr_weights)\n",
+    "    Y = torch.hstack( [net.create_target_weights(net.input_weight_matrix()) for net in nets] ).requires_grad_(True).T #(nr_nets*nr_weights,1)\n",
+    "    #print(\"X \", X.shape, \"Y\", Y.shape)\n",
+    "\n",
+    "    modules = [ construct_sparse_tensor_layer(nets, layer_idx) for layer_idx in range(len(list(nets[0].parameters()))) ]\n",
+    "\n",
+    "    X1 = torch.sparse.mm(modules[0], X)\n",
+    "    #print(\"X1\", X1.shape, X1)\n",
+    "\n",
+    "    X2 = torch.sparse.mm(modules[1], X1)\n",
+    "    #print(\"X2\", X2.shape)\n",
+    "\n",
+    "    X3 = torch.sparse.mm(modules[2], X2)\n",
+    "    #print(\"X3\", X3.shape)\n",
+    "\n",
+    "    loss = loss_fn(X3, Y)\n",
+    "    #print(loss)\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "\n",
+    "print(f\"after {train_iteration+1} iterations of combined self_train: {sum([is_identity_function(net) for net in nets])}/{len(nets)} identity_fns\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "8bcba732c17ca4dacffea8ad1176c852d4229b36b9060a5f633fff752e5396ea"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.8.12 64-bit ('masterthesis': conda)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}