Merge remote-tracking branch 'origin/master'
# Conflicts: # models/transformer_model.py # multi_run.py
This commit is contained in:
@ -31,7 +31,7 @@ activation = gelu
|
||||
use_bias = True
|
||||
use_norm = True
|
||||
use_residual = True
|
||||
dropout = 0.2
|
||||
dropout = 0.21
|
||||
|
||||
lat_dim = 32
|
||||
patch_size = 8
|
||||
@ -48,7 +48,7 @@ use_norm = True
|
||||
dropout = 0.2
|
||||
lat_dim = 32
|
||||
features = 64
|
||||
filters = [16, 32, 64, 128]
|
||||
filters = [16, 32, 64]
|
||||
|
||||
[VisualTransformer]
|
||||
weight_init = xavier_normal_
|
||||
|
@ -14,10 +14,12 @@ class CNNBaseline(CombinedModelMixins,
|
||||
LightningBaseModule
|
||||
):
|
||||
|
||||
def __init__(self, in_shape, n_classes, weight_init, activation, use_bias, use_norm, dropout, lat_dim, features,
|
||||
def __init__(self, in_shape, n_classes, weight_init, activation,
|
||||
use_bias, use_norm, dropout, lat_dim, features,
|
||||
filters,
|
||||
lr, weight_decay, sto_weight_avg, lr_warm_restart_epochs, opt_reset_interval,
|
||||
loss, scheduler):
|
||||
loss, scheduler, lr_scheduler_parameter
|
||||
):
|
||||
|
||||
# TODO: Move this to parent class, or make it much easieer to access....
|
||||
a = dict(locals())
|
||||
|
@ -21,7 +21,7 @@ class VisualTransformer(CombinedModelMixins,
|
||||
):
|
||||
|
||||
def __init__(self, in_shape, n_classes, weight_init, activation,
|
||||
embedding_size, heads, attn_depth, patch_size, use_residual,
|
||||
embedding_size, heads, attn_depth, patch_size, use_residual, variable_length,
|
||||
use_bias, use_norm, dropout, lat_dim, loss, scheduler, mlp_dim, head_dim,
|
||||
lr, weight_decay, sto_weight_avg, lr_scheduler_parameter, opt_reset_interval):
|
||||
|
||||
@ -75,7 +75,7 @@ class VisualTransformer(CombinedModelMixins,
|
||||
nn.Linear(self.embed_dim, self.params.lat_dim),
|
||||
nn.GELU(),
|
||||
nn.Dropout(self.params.dropout),
|
||||
nn.Linear(self.params.lat_dim, self.params.n_classes),
|
||||
nn.Linear(self.params.lat_dim, n_classes),
|
||||
nn.Softmax()
|
||||
)
|
||||
|
||||
@ -91,12 +91,15 @@ class VisualTransformer(CombinedModelMixins,
|
||||
tensor = rearrange(tensor, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=p, p2=p)
|
||||
b, n, _ = tensor.shape
|
||||
|
||||
# mask
|
||||
lengths = torch.count_nonzero(tensor, dim=-1)
|
||||
mask = (lengths == torch.zeros_like(lengths))
|
||||
# CLS-token awareness
|
||||
# mask = torch.cat((torch.zeros(b, 1), mask), dim=-1)
|
||||
# mask = repeat(mask, 'b n -> b h n', h=self.params.heads)
|
||||
if self.params.variable_length and mask is None:
|
||||
# mask
|
||||
lengths = torch.count_nonzero(tensor, dim=-1)
|
||||
mask = (lengths == torch.zeros_like(lengths))
|
||||
# CLS-token awareness
|
||||
# mask = torch.cat((torch.zeros(b, 1), mask), dim=-1)
|
||||
# mask = repeat(mask, 'b n -> b h n', h=self.params.heads)
|
||||
else:
|
||||
mask = mask
|
||||
|
||||
tensor = self.patch_to_embedding(tensor)
|
||||
|
||||
|
@ -152,7 +152,7 @@ class TestMixin:
|
||||
class_names = {val: key for val, key in ['negative', 'positive']}
|
||||
|
||||
df = pd.DataFrame(data=dict(filename=[Path(x).name for x in sorted_y.keys()],
|
||||
prediction=y_max.cpu().numpy()))
|
||||
prediction=[class_names[x.item()] for x in y_max.cpu()]))
|
||||
result_file = Path(self.logger.log_dir / 'predictions.csv')
|
||||
if result_file.exists():
|
||||
try:
|
||||
|
@ -26,7 +26,7 @@ class OptimizerMixin:
|
||||
optimizer_dict.update(optimizer=optimizer)
|
||||
|
||||
if self.params.scheduler == CosineAnnealingWarmRestarts.__name__:
|
||||
scheduler = CosineAnnealingWarmRestarts(optimizer, self.params.lr_scheduler_parameter)
|
||||
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=self.params.lr_scheduler_parameter)
|
||||
elif self.params.scheduler == LambdaLR.__name__:
|
||||
lr_reduce_ratio = self.params.lr_scheduler_parameter
|
||||
scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: lr_reduce_ratio ** epoch)
|
||||
|
Reference in New Issue
Block a user