Readme Update und Residuals GN Training
This commit is contained in:
		| @@ -31,10 +31,15 @@ | |||||||
|  |  | ||||||
| ### Tasks für Steffen: | ### Tasks für Steffen: | ||||||
|  |  | ||||||
| - [ ] Training mit kleineren GNs -| Running | - [x] Training mit kleineren GNs | ||||||
|  |   - Accuracy leidet enorm (_0.56_) | ||||||
|  |      | ||||||
|  |   - Es entstehen mehr SRNN | ||||||
|  |   - Der Dropout Effekt wird stärker (diff_ohne_SRNN = _0.0_) | ||||||
|  |      | ||||||
| - [ ] Weiter Trainieren -> 500 Epochs? | - [ ] Weiter Trainieren -> 500 Epochs? | ||||||
| - [ ] Loss Gewichtung anpassen | - [ ] Loss Gewichtung anpassen | ||||||
| - [ ] Training ohne Residual Skip Connection | - [ ] Training ohne Residual Skip Connection | - Running | ||||||
| - [ ] Test mit Baseline Dense Network  | - [ ] Test mit Baseline Dense Network  | ||||||
|   - [ ] mit vergleichbaren Neuron Count |   - [ ] mit vergleichbaren Neuron Count | ||||||
|   - [ ] mit gesamt Weight Count |   - [ ] mit gesamt Weight Count | ||||||
|   | |||||||
| @@ -133,7 +133,7 @@ def checkpoint_and_validate(model, out_path, epoch_n, final_model=False): | |||||||
|  |  | ||||||
| def plot_training_result(path_to_dataframe): | def plot_training_result(path_to_dataframe): | ||||||
|     # load from Drive |     # load from Drive | ||||||
|     df = pd.read_csv(path_to_dataframe, index_col=0) |     df = pd.read_csv(path_to_dataframe, index_col=False) | ||||||
|  |  | ||||||
|     # Set up figure |     # Set up figure | ||||||
|     fig, ax1 = plt.subplots()  # initializes figure and plots |     fig, ax1 = plt.subplots()  # initializes figure and plots | ||||||
| @@ -163,6 +163,9 @@ def plot_training_result(path_to_dataframe): | |||||||
|     else: |     else: | ||||||
|         plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300) |         plt.savefig(Path(path_to_dataframe.parent / 'training_lineplot.png'), dpi=300) | ||||||
|  |  | ||||||
|  | def flat_for_store(parameters): | ||||||
|  |     return (x.item() for y in parameters for x in y.detach().flatten()) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|  |  | ||||||
| @@ -175,7 +178,7 @@ if __name__ == '__main__': | |||||||
|     data_path = Path('data') |     data_path = Path('data') | ||||||
|     data_path.mkdir(exist_ok=True, parents=True) |     data_path.mkdir(exist_ok=True, parents=True) | ||||||
|  |  | ||||||
|     run_path = Path('output') / 'mn_st_smaller' |     run_path = Path('output') / 'mn_st_NoRes' | ||||||
|     model_path = run_path / '0000_trained_model.zip' |     model_path = run_path / '0000_trained_model.zip' | ||||||
|     df_store_path = run_path / 'train_store.csv' |     df_store_path = run_path / 'train_store.csv' | ||||||
|     weight_store_path = run_path / 'weight_store.csv' |     weight_store_path = run_path / 'weight_store.csv' | ||||||
| @@ -189,14 +192,14 @@ if __name__ == '__main__': | |||||||
|         d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER) |         d = DataLoader(dataset, batch_size=BATCHSIZE, shuffle=True, drop_last=True, num_workers=WORKER) | ||||||
|  |  | ||||||
|         interface = np.prod(dataset[0][0].shape) |         interface = np.prod(dataset[0][0].shape) | ||||||
|         metanet = MetaNet(interface, depth=5, width=6, out=10).to(DEVICE) |         metanet = MetaNet(interface, depth=5, width=6, out=10, residual_skip=False).to(DEVICE) | ||||||
|         meta_weight_count = sum(p.numel() for p in next(metanet.particles).parameters()) |         meta_weight_count = sum(p.numel() for p in next(metanet.particles).parameters()) | ||||||
|  |  | ||||||
|         loss_fn = nn.CrossEntropyLoss() |         loss_fn = nn.CrossEntropyLoss() | ||||||
|         optimizer = torch.optim.SGD(metanet.parameters(), lr=0.008, momentum=0.9) |         optimizer = torch.optim.SGD(metanet.parameters(), lr=0.008, momentum=0.9) | ||||||
|  |  | ||||||
|         train_store = new_storage_df('train', None) |         train_store = new_storage_df('train', None) | ||||||
|         weight_store = new_storage_df('train', meta_weight_count) |         weight_store = new_storage_df('weights', meta_weight_count) | ||||||
|         for epoch in tqdm(range(EPOCH), desc='MetaNet Train - Epochs'): |         for epoch in tqdm(range(EPOCH), desc='MetaNet Train - Epochs'): | ||||||
|             is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True |             is_validation_epoch = epoch % VALIDATION_FRQ == 0 if not debug else True | ||||||
|             is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True |             is_self_train_epoch = epoch % SELF_TRAIN_FRQ == 0 if not debug else True | ||||||
| @@ -254,29 +257,30 @@ if __name__ == '__main__': | |||||||
|                         step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value) |                         step_log = dict(Epoch=int(epoch), Batch=BATCHSIZE, Metric=key, Score=value) | ||||||
|                         train_store.loc[train_store.shape[0]] = step_log |                         train_store.loc[train_store.shape[0]] = step_log | ||||||
|                 for particle in metanet.particles: |                 for particle in metanet.particles: | ||||||
|                     weight_log = (epoch, particle.name, *(x for y in particle.parameters() for x in y)) |                     weight_log = (epoch, particle.name, *flat_for_store(particle.parameters())) | ||||||
|                 train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists()) |                     weight_store.loc[weight_store.shape[0]] = weight_log | ||||||
|                 weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists()) |                 train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False) | ||||||
|  |                 weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False) | ||||||
|                 train_store = new_storage_df('train', None) |                 train_store = new_storage_df('train', None) | ||||||
|                 weight_store = new_storage_df('train', meta_weight_count) |                 weight_store = new_storage_df('weights', meta_weight_count) | ||||||
|  |  | ||||||
|         metanet.eval() |         metanet.eval() | ||||||
|         accuracy = checkpoint_and_validate(metanet, run_path, EPOCH, final_model=True) |         accuracy = checkpoint_and_validate(metanet, run_path, EPOCH, final_model=True) | ||||||
|         validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE, |         validation_log = dict(Epoch=EPOCH, Batch=BATCHSIZE, | ||||||
|                               Metric='Test Accuracy', Score=accuracy.item()) |                               Metric='Test Accuracy', Score=accuracy.item()) | ||||||
|         for particle in metanet.particles: |         for particle in metanet.particles: | ||||||
|             weight_log = (EPOCH, particle.name, *(x for y in particle.parameters() for x in y)) |             weight_log = (EPOCH, particle.name, *(flat_for_store(particle.parameters()))) | ||||||
|             weight_store.loc[weight_store.shape[0]] = weight_log |             weight_store.loc[weight_store.shape[0]] = weight_log | ||||||
|  |  | ||||||
|         train_store.loc[train_store.shape[0]] = validation_log |         train_store.loc[train_store.shape[0]] = validation_log | ||||||
|         train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists()) |         train_store.to_csv(df_store_path, mode='a', header=not df_store_path.exists(), index=False) | ||||||
|         weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists()) |         weight_store.to_csv(weight_store_path, mode='a', header=not weight_store_path.exists(), index=False) | ||||||
|  |  | ||||||
|     if plotting: |     if plotting: | ||||||
|         plot_training_result(df_store_path) |         plot_training_result(df_store_path) | ||||||
|  |  | ||||||
|     if particle_analysis: |     if particle_analysis: | ||||||
|         model_path = next(run_path.glob(f'*e{EPOCH}.tp')) |         model_path = next(run_path.glob(f'*e100.tp')) | ||||||
|         latest_model = torch.load(model_path, map_location=DEVICE).eval() |         latest_model = torch.load(model_path, map_location=DEVICE).eval() | ||||||
|         counter_dict = defaultdict(lambda: 0) |         counter_dict = defaultdict(lambda: 0) | ||||||
|         _ = test_for_fixpoints(counter_dict, list(latest_model.particles)) |         _ = test_for_fixpoints(counter_dict, list(latest_model.particles)) | ||||||
| @@ -284,10 +288,38 @@ if __name__ == '__main__': | |||||||
|         zero_ident = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero('identity_func') |         zero_ident = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero('identity_func') | ||||||
|         zero_other = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero('other_func') |         zero_other = torch.load(model_path, map_location=DEVICE).eval().replace_with_zero('other_func') | ||||||
|         if as_sparse_network_test: |         if as_sparse_network_test: | ||||||
|             acc_pre = validate(model_path, ratio=1) |             acc_pre = validate(model_path, ratio=0.01).item() | ||||||
|             ident_ckpt = set_checkpoint(zero_ident, model_path.parent, -1, final_model=True) |             ident_ckpt = set_checkpoint(zero_ident, model_path.parent, -1, final_model=True) | ||||||
|             ident_acc_post = validate(ident_ckpt, ratio=1) |             ident_acc_post = validate(ident_ckpt, ratio=0.01).item() | ||||||
|             tqdm.write(f'Zero_ident diff = {abs(ident_acc_post-acc_pre)}') |             tqdm.write(f'Zero_ident diff = {abs(ident_acc_post-acc_pre)}') | ||||||
|             other_ckpt = set_checkpoint(zero_other, model_path.parent, -2, final_model=True) |             other_ckpt = set_checkpoint(zero_other, model_path.parent, -2, final_model=True) | ||||||
|             other_acc_post = validate(other_ckpt, ratio=1) |             other_acc_post = validate(other_ckpt, ratio=0.01).item() | ||||||
|             tqdm.write(f'Zero_other diff = {abs(other_acc_post - acc_pre)}') |             tqdm.write(f'Zero_other diff = {abs(other_acc_post - acc_pre)}') | ||||||
|  |  | ||||||
|  |             if plotting: | ||||||
|  |                 plt.clf() | ||||||
|  |                 fig, ax = plt.subplots(ncols=2) | ||||||
|  |                 data = [acc_pre, ident_acc_post, other_acc_post] | ||||||
|  |                 labels = ['Full Network', 'Sparse, No Identity', 'Sparse, No Other'] | ||||||
|  |                 for idx, (score, name) in enumerate(zip(data, labels)): | ||||||
|  |                     l = sns.barplot(y=[score], x=['Networks'], color=sns.color_palette()[idx], label=name, ax=ax[0]) | ||||||
|  |                 # noinspection PyUnboundLocalVariable | ||||||
|  |                 for idx, patch in enumerate(l.patches): | ||||||
|  |                     if idx != 0: | ||||||
|  |                         # we recenter the bar | ||||||
|  |                         patch.set_x(patch.get_x() + idx * 0.035) | ||||||
|  |  | ||||||
|  |                 ax[0].set_title('Accuracy after particle dropout') | ||||||
|  |                 ax[0].set_xlabel('Accuracy') | ||||||
|  |                 # ax[0].legend() | ||||||
|  |  | ||||||
|  |                 counter_dict['full_network'] = sum(counter_dict.values()) | ||||||
|  |                 ax[1].pie(counter_dict.values(), labels=counter_dict.keys(), colors=sns.color_palette()[:3], ) | ||||||
|  |                 ax[1].set_title('Particle Count for ') | ||||||
|  |                 # ax[1].set_xlabel('') | ||||||
|  |  | ||||||
|  |                 plt.tight_layout() | ||||||
|  |                 if debug: | ||||||
|  |                     plt.show() | ||||||
|  |                 else: | ||||||
|  |                     plt.savefig(Path(run_path / 'dropout_stacked_barplot.png'), dpi=300) | ||||||
|   | |||||||
							
								
								
									
										10
									
								
								network.py
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								network.py
									
									
									
									
									
								
							| @@ -296,7 +296,7 @@ class MetaCell(nn.Module): | |||||||
|         self.name = name |         self.name = name | ||||||
|         self.interface = interface |         self.interface = interface | ||||||
|         self.weight_interface = 5 |         self.weight_interface = 5 | ||||||
|         self.net_hidden_size = 3 |         self.net_hidden_size = 4 | ||||||
|         self.net_ouput_size = 1 |         self.net_ouput_size = 1 | ||||||
|         self.meta_weight_list = nn.ModuleList() |         self.meta_weight_list = nn.ModuleList() | ||||||
|         self.meta_weight_list.extend( |         self.meta_weight_list.extend( | ||||||
| @@ -371,7 +371,7 @@ class MetaLayer(nn.Module): | |||||||
|  |  | ||||||
| class MetaNet(nn.Module): | class MetaNet(nn.Module): | ||||||
|  |  | ||||||
|     def __init__(self, interface=4, depth=3, width=4, out=1, activation=None): |     def __init__(self, interface=4, depth=3, width=4, out=1, activation=None, residual_skip=True): | ||||||
|         super().__init__() |         super().__init__() | ||||||
|         self.activation = activation |         self.activation = activation | ||||||
|         self.out = out |         self.out = out | ||||||
| @@ -382,14 +382,14 @@ class MetaNet(nn.Module): | |||||||
|         self._meta_layer_list = nn.ModuleList() |         self._meta_layer_list = nn.ModuleList() | ||||||
|         self._meta_layer_list.append(MetaLayer(name=f'L{0}', |         self._meta_layer_list.append(MetaLayer(name=f'L{0}', | ||||||
|                                                interface=self.interface, |                                                interface=self.interface, | ||||||
|                                                width=self.width) |                                                width=self.width, residual_skip=residual_skip) | ||||||
|                                      ) |                                      ) | ||||||
|         self._meta_layer_list.extend([MetaLayer(name=f'L{layer_idx + 1}', |         self._meta_layer_list.extend([MetaLayer(name=f'L{layer_idx + 1}', | ||||||
|                                                 interface=self.width, width=self.width |                                                 interface=self.width, width=self.width, residual_skip=residual_skip | ||||||
|                                                 ) for layer_idx in range(self.depth - 2)] |                                                 ) for layer_idx in range(self.depth - 2)] | ||||||
|                                      ) |                                      ) | ||||||
|         self._meta_layer_list.append(MetaLayer(name=f'L{len(self._meta_layer_list)}', |         self._meta_layer_list.append(MetaLayer(name=f'L{len(self._meta_layer_list)}', | ||||||
|                                                interface=self.width, width=self.out) |                                                interface=self.width, width=self.out, residual_skip=residual_skip) | ||||||
|                                      ) |                                      ) | ||||||
|  |  | ||||||
|     def replace_with_zero(self, ident_key): |     def replace_with_zero(self, ident_key): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Steffen Illium
					Steffen Illium