from pathlib import Path
from random import randint
from tqdm import trange

from marl_factory_grid.environment.factory import Factory

from marl_factory_grid.utils.logging.envmonitor import EnvMonitor
from marl_factory_grid.utils.logging.recorder import EnvRecorder
from marl_factory_grid.utils.plotting.plot_single_runs import plot_single_run
from marl_factory_grid.utils.tools import ConfigExplainer


if __name__ == '__main__':
    # Render at each step?
    render = False
    # Reveal all possible Modules (Entities, Rules, Agents[Actions, Observations], etc.)
    explain_config = True
    # Collect statistics?
    monitor = True
    # Record as Protobuf?
    record = False
    # Plot Results?
    plotting = True

    run_path = Path('study_out')

    if explain_config:
        ce = ConfigExplainer()
        ce.save_all(run_path / 'all_available_configs.yaml')

    # Path to config File
    path = Path('marl_factory_grid/configs/eight_puzzle.yaml')

    # Env Init
    factory = Factory(path)

    # Record and Monitor
    if monitor:
        factory = EnvMonitor(factory)
    if record:
        factory = EnvRecorder(factory)

    # RL learn Loop
    for episode in trange(10):
        _ = factory.reset()
        done = False
        if render:
            factory.render()
        action_spaces = factory.action_space
        while not done:
            a = [randint(0, x.n - 1) for x in action_spaces]
            obs_type, _, reward, done, info = factory.step(a)
            if render:
                factory.render()
            if done:
                print(f'Episode {episode} done...')
                break

    if monitor:
        factory.save_monitor(run_path / 'test_monitor.pkl')
    if record:
        factory.save_records(run_path / 'test.pb')
    if plotting:
        factory.report_possible_colum_keys()
        plot_single_run(run_path, column_keys=['step_reward'])

    print('Done!!! Goodbye....')