from argparse import Namespace

import torch
from torch import nn
from torch.nn import ModuleList

from ml_lib.modules.blocks import ConvModule, LinearModule
from ml_lib.modules.util import (LightningBaseModule, Splitter)
from util.module_mixins import (BaseOptimizerMixin, BaseTrainMixin, BaseValMixin, DatasetMixin,
                                BaseDataloadersMixin)


class BandwiseConvMultiheadClassifier(DatasetMixin,
                                      BaseDataloadersMixin,
                                      BaseTrainMixin,
                                      BaseValMixin,
                                      BaseOptimizerMixin,
                                      LightningBaseModule
                                      ):

    def training_step(self, batch_xy, batch_nb, *args, **kwargs):
        batch_x, batch_y = batch_xy
        y = self(batch_x)
        y, bands_y = y.main_out, y.bands
        bands_y_losses = [self.bce_loss(band_y, batch_y) for band_y in bands_y]
        return_dict = {f'band_{band_idx}_loss': band_y for band_idx, band_y in enumerate(bands_y_losses)}

        last_bce_loss = self.bce_loss(y, batch_y)
        return_dict.update(last_bce_loss=last_bce_loss)

        bands_y_losses.append(last_bce_loss)
        combined_loss = torch.stack(bands_y_losses).mean()

        return_dict.update(loss=combined_loss)
        return return_dict

    def validation_step(self, batch_xy, batch_idx, *args, **kwargs):
        batch_x, batch_y = batch_xy
        y = self(batch_x)
        y, bands_y = y.main_out, y.bands
        bands_y_losses = [self.bce_loss(band_y, batch_y) for band_y in bands_y]
        return_dict = {f'band_{band_idx}_val_loss': band_y for band_idx, band_y in enumerate(bands_y_losses)}

        last_bce_loss = self.bce_loss(y, batch_y)
        return_dict.update(last_val_bce_loss=last_bce_loss)

        bands_y_losses.append(last_bce_loss)
        combined_loss = torch.stack(bands_y_losses).mean()

        return_dict.update(val_bce_loss=combined_loss,
                           batch_idx=batch_idx, y=y, batch_y=batch_y
                           )
        return return_dict

    def __init__(self, hparams):
        super(BandwiseConvMultiheadClassifier, self).__init__(hparams)

        # Dataset
        # =============================================================================
        self.dataset = self.build_dataset()

        # Model Paramters
        # =============================================================================
        # Additional parameters
        self.in_shape = self.dataset.train_dataset.sample_shape
        self.conv_filters = self.params.filters
        self.n_band_sections = 4
        k = 3  # Base Kernel Value

        # Modules
        # =============================================================================
        self.split = Splitter(self.in_shape, self.n_band_sections)

        self.band_list = ModuleList()
        for band in range(self.n_band_sections):
            last_shape = self.split.shape
            conv_list = ModuleList()
            for filters in self.conv_filters:
                conv_list.append(ConvModule(last_shape, filters, (k, k), conv_stride=(2, 2), conv_padding=2,
                                            **self.params.module_kwargs))
                last_shape = conv_list[-1].shape
                # self.conv_list.append(ConvModule(last_shape, 1, 1, conv_stride=1, **self.params.module_kwargs))
                # last_shape = self.conv_list[-1].shape
            self.band_list.append(conv_list)

        self.bandwise_deep_list_1 = ModuleList([
            LinearModule(self.band_list[0][-1].shape, self.params.lat_dim, **self.params.module_kwargs)
            for _ in range(self.n_band_sections)])
        self.bandwise_deep_list_2 = ModuleList([
            LinearModule(self.params.lat_dim, self.params.lat_dim * 2, **self.params.module_kwargs)
            for _ in range(self.n_band_sections)])
        self.bandwise_latent_list = ModuleList([
            LinearModule(self.params.lat_dim * 2, self.params.lat_dim, **self.params.module_kwargs)
            for _ in range(self.n_band_sections)])
        self.bandwise_classifier_list = ModuleList([
            LinearModule(self.params.lat_dim, 1, bias=self.params.bias, activation=nn.Sigmoid)
            for _ in range(self.n_band_sections)])

        self.full_1 = LinearModule(self.n_band_sections, self.params.lat_dim, **self.params.module_kwargs)
        self.full_2 = LinearModule(self.full_1.shape, self.params.lat_dim * 2, **self.params.module_kwargs)
        self.full_3 = LinearModule(self.full_2.shape, self.params.lat_dim, **self.params.module_kwargs)
        self.full_out = LinearModule(self.full_3.shape, 1, bias=self.params.bias, activation=nn.Sigmoid)

    def forward(self, batch, **kwargs):
        tensors = self.split(batch)
        for idx, (tensor, convs) in enumerate(zip(tensors, self.band_list)):
            for conv in convs:
                tensor = conv(tensor)

            tensor = self.bandwise_deep_list_1[idx](tensor)
            tensor = self.bandwise_deep_list_2[idx](tensor)
            tensor = self.bandwise_latent_list[idx](tensor)
            tensors[idx] = self.bandwise_classifier_list[idx](tensor)

        tensor = torch.cat(tensors, dim=1)
        tensor = self.full_1(tensor)
        tensor = self.full_2(tensor)
        tensor = self.full_3(tensor)
        tensor = self.full_out(tensor)
        return Namespace(main_out=tensor, bands=tensors)