Small bugfixes

This commit is contained in:
Steffen 2021-03-18 12:12:43 +01:00
parent fecf4923c2
commit 2c9cb2e94a
2 changed files with 108 additions and 99 deletions

10
main.py
View File

@ -10,12 +10,16 @@ from ml_lib.utils.config import parse_comandline_args_add_defaults
from ml_lib.utils.loggers import Logger from ml_lib.utils.loggers import Logger
import variables as v import variables as v
from ml_lib.utils.tools import fix_all_random_seeds
warnings.filterwarnings('ignore', category=FutureWarning) warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning) warnings.filterwarnings('ignore', category=UserWarning)
def run_lightning_loop(h_params, data_class, model_class, additional_callbacks=None): def run_lightning_loop(h_params, data_class, model_class, seed=69, additional_callbacks=None):
fix_all_random_seeds(seed)
with Logger.from_argparse_args(h_params) as logger: with Logger.from_argparse_args(h_params) as logger:
# Callbacks # Callbacks
# ============================================================================= # =============================================================================
@ -79,13 +83,13 @@ def run_lightning_loop(h_params, data_class, model_class, additional_callbacks=N
if __name__ == '__main__': if __name__ == '__main__':
# Parse comandline args, read config and get model # Parse comandline args, read config and get model
cmd_args, found_data_class, found_model_class = parse_comandline_args_add_defaults('_parameters.ini') cmd_args, found_data_class, found_model_class, found_seed = parse_comandline_args_add_defaults('_parameters.ini')
# To NameSpace # To NameSpace
hparams = Namespace(**cmd_args) hparams = Namespace(**cmd_args)
# Start # Start
# ----------------- # -----------------
run_lightning_loop(hparams, found_data_class, found_model_class) run_lightning_loop(hparams, found_data_class, found_model_class, found_seed)
print('done') print('done')
pass pass

View File

@ -4,7 +4,6 @@ from argparse import Namespace
import warnings import warnings
import torch import torch
from performer_pytorch import Performer
from torch import nn from torch import nn
from einops import rearrange, repeat from einops import rearrange, repeat
@ -15,119 +14,125 @@ from util.module_mixins import CombinedModelMixins
MIN_NUM_PATCHES = 16 MIN_NUM_PATCHES = 16
try:
from performer_pytorch import Performer
class VisualPerformer(CombinedModelMixins, class VisualPerformer(CombinedModelMixins,
LightningBaseModule LightningBaseModule
): ):
def __init__(self, in_shape, n_classes, weight_init, activation, def __init__(self, in_shape, n_classes, weight_init, activation,
embedding_size, heads, attn_depth, patch_size, use_residual, embedding_size, heads, attn_depth, patch_size, use_residual,
use_bias, use_norm, dropout, lat_dim, loss, scheduler, use_bias, use_norm, dropout, lat_dim, loss, scheduler,
lr, weight_decay, sto_weight_avg, lr_warm_restart_epochs, opt_reset_interval): lr, weight_decay, sto_weight_avg, lr_warm_restart_epochs, opt_reset_interval):
# TODO: Move this to parent class, or make it much easieer to access... But How... # TODO: Move this to parent class, or make it much easieer to access... But How...
a = dict(locals()) a = dict(locals())
params = {arg: a[arg] for arg in inspect.signature(self.__init__).parameters.keys() if arg != 'self'} params = {arg: a[arg] for arg in inspect.signature(self.__init__).parameters.keys() if arg != 'self'}
super(VisualPerformer, self).__init__(params) super(VisualPerformer, self).__init__(params)
self.in_shape = in_shape self.in_shape = in_shape
assert len(self.in_shape) == 3, 'There need to be three Dimensions' assert len(self.in_shape) == 3, 'There need to be three Dimensions'
channels, height, width = self.in_shape channels, height, width = self.in_shape
# Model Paramters # Model Paramters
# ============================================================================= # =============================================================================
# Additional parameters # Additional parameters
self.embed_dim = self.params.embedding_size self.embed_dim = self.params.embedding_size
# Automatic Image Shaping # Automatic Image Shaping
self.patch_size = self.params.patch_size self.patch_size = self.params.patch_size
image_size = (max(height, width) // self.patch_size) * self.patch_size image_size = (max(height, width) // self.patch_size) * self.patch_size
self.image_size = image_size + self.patch_size if image_size < max(height, width) else image_size self.image_size = image_size + self.patch_size if image_size < max(height, width) else image_size
# This should be obsolete # This should be obsolete
assert self.image_size % self.patch_size == 0, 'image dimensions must be divisible by the patch size' assert self.image_size % self.patch_size == 0, 'image dimensions must be divisible by the patch size'
num_patches = (self.image_size // self.patch_size) ** 2 num_patches = (self.image_size // self.patch_size) ** 2
patch_dim = channels * self.patch_size ** 2 patch_dim = channels * self.patch_size ** 2
assert num_patches >= MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for ' + \ assert num_patches >= MIN_NUM_PATCHES, f'your number of patches ({num_patches}) is way too small for ' + \
f'attention. Try decreasing your patch size' f'attention. Try decreasing your patch size'
# Correct the Embedding Dim # Correct the Embedding Dim
if not self.embed_dim % self.params.heads == 0: if not self.embed_dim % self.params.heads == 0:
self.embed_dim = (self.embed_dim // self.params.heads) * self.params.heads self.embed_dim = (self.embed_dim // self.params.heads) * self.params.heads
message = ('Embedding Dimension was fixed to be devideable by the number' + message = ('Embedding Dimension was fixed to be devideable by the number' +
f' of attention heads, is now: {self.embed_dim}') f' of attention heads, is now: {self.embed_dim}')
for func in print, warnings.warn: for func in print, warnings.warn:
func(message) func(message)
# Utility Modules # Utility Modules
self.autopad = AutoPadToShape((self.image_size, self.image_size)) self.autopad = AutoPadToShape((self.image_size, self.image_size))
# Modules with Parameters # Modules with Parameters
self.performer = Performer( self.performer = Performer(
dim=self.embed_dim, # dimension dim=self.embed_dim, # dimension
depth=self.params.attn_depth, # layers depth=self.params.attn_depth, # layers
heads=self.params.heads, # heads heads=self.params.heads, # heads
causal=True, # auto-regressive or not causal=True, # auto-regressive or not
nb_features=None, # 256, # number of random features, if not set, will default to nb_features=None, # 256, # number of random features, if not set, will default to
# (d * log(d)), where d is the dimension of each head # (d * log(d)), where d is the dimension of each head
feature_redraw_interval=1000, # how frequently to redraw the projection matrix, feature_redraw_interval=1000, # how frequently to redraw the projection matrix,
# the more frequent, the slower the training # the more frequent, the slower the training
generalized_attention=False, # defaults to softmax approximation, generalized_attention=False, # defaults to softmax approximation,
# but can be set to True for generalized attention # but can be set to True for generalized attention
kernel_fn=self.params.activation(), # the kernel function to be used, kernel_fn=self.params.activation(), # the kernel function to be used,
# if generalized attention is turned on, defaults to Relu # if generalized attention is turned on, defaults to Relu
reversible=True, # reversible layers, from Reformer paper reversible=True, # reversible layers, from Reformer paper
ff_chunks=10, # chunk feedforward layer, from Reformer paper ff_chunks=10, # chunk feedforward layer, from Reformer paper
use_scalenorm=False, # use scale norm, from 'Transformers without Tears' paper use_scalenorm=False, # use scale norm, from 'Transformers without Tears' paper
use_rezero=False, # use rezero, from 'Rezero is all you need' paper use_rezero=False, # use rezero, from 'Rezero is all you need' paper
ff_glu=True, # use GLU variant for feedforward ff_glu=True, # use GLU variant for feedforward
ff_dropout=self.params.dropout, # feedforward dropout ff_dropout=self.params.dropout, # feedforward dropout
attn_dropout=self.params.dropout, # post-attn dropout attn_dropout=self.params.dropout, # post-attn dropout
local_attn_heads=self.params.heads // 2, # 4 heads are local attention, 4 others are global performers local_attn_heads=self.params.heads // 2, # 4 heads are local attention, 4 others are global performers
local_window_size=(patch_dim // self.params.heads) * 2 # window size of local attention local_window_size=(patch_dim // self.params.heads) * 2 # window size of local attention
) )
self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, self.embed_dim)) self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, self.embed_dim))
self.patch_to_embedding = nn.Linear(patch_dim, self.embed_dim) if self.params.embedding_size \ self.patch_to_embedding = nn.Linear(patch_dim, self.embed_dim) if self.params.embedding_size \
else F_x(self.embed_dim) else F_x(self.embed_dim)
self.cls_token = nn.Parameter(torch.randn(1, 1, self.embed_dim)) self.cls_token = nn.Parameter(torch.randn(1, 1, self.embed_dim))
self.dropout = nn.Dropout(self.params.dropout) self.dropout = nn.Dropout(self.params.dropout)
self.to_cls_token = nn.Identity() self.to_cls_token = nn.Identity()
self.mlp_head = nn.Sequential( self.mlp_head = nn.Sequential(
nn.LayerNorm(self.embed_dim), nn.LayerNorm(self.embed_dim),
nn.Linear(self.embed_dim, self.params.lat_dim), nn.Linear(self.embed_dim, self.params.lat_dim),
nn.GELU(), nn.GELU(),
nn.Dropout(self.params.dropout), nn.Dropout(self.params.dropout),
nn.Linear(self.params.lat_dim, n_classes), nn.Linear(self.params.lat_dim, n_classes),
nn.Softmax() nn.Softmax()
) )
def forward(self, x): def forward(self, x):
""" """
:param x: the sequence to the encoder (required). :param x: the sequence to the encoder (required).
:return: :return:
""" """
tensor = self.autopad(x) tensor = self.autopad(x)
p = self.params.patch_size p = self.params.patch_size
tensor = rearrange(tensor, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p) tensor = rearrange(tensor, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
tensor = self.patch_to_embedding(tensor) tensor = self.patch_to_embedding(tensor)
b, n, _ = tensor.shape b, n, _ = tensor.shape
cls_tokens = repeat(self.cls_token, '() n d -> b n d', b=b) cls_tokens = repeat(self.cls_token, '() n d -> b n d', b=b)
tensor = torch.cat((cls_tokens, tensor), dim=1) tensor = torch.cat((cls_tokens, tensor), dim=1)
tensor += self.pos_embedding[:, :(n + 1)] tensor += self.pos_embedding[:, :(n + 1)]
tensor = self.dropout(tensor) tensor = self.dropout(tensor)
tensor = self.performer(tensor) tensor = self.performer(tensor)
tensor = self.to_cls_token(tensor[:, 0]) tensor = self.to_cls_token(tensor[:, 0])
tensor = self.mlp_head(tensor) tensor = self.mlp_head(tensor)
return Namespace(main_out=tensor) return Namespace(main_out=tensor)
def additional_scores(self, outputs): def additional_scores(self, outputs):
return MultiClassScores(self)(outputs) return MultiClassScores(self)(outputs)
except ImportError: # pragma: do not provide model class
print('You want to use `performer_pytorch` plugins which are not installed yet,' # pragma: no-cover
' install it with `pip install performer_pytorch`.')