marl-factory-grid/random_testrun.py
Steffen Illium 803d0dae7f Multiple Fixes:
- Config Explainer
 - Rewards
 - Destination Reach Condition
 - Additional Step Callback
2023-11-24 14:43:49 +01:00

68 lines
1.9 KiB
Python

from pathlib import Path
from random import randint
from tqdm import trange
from marl_factory_grid.environment.factory import Factory
from marl_factory_grid.utils.logging.envmonitor import EnvMonitor
from marl_factory_grid.utils.logging.recorder import EnvRecorder
from marl_factory_grid.utils.plotting.plot_single_runs import plot_single_run
from marl_factory_grid.utils.tools import ConfigExplainer
if __name__ == '__main__':
# Render at each step?
render = False
# Reveal all possible Modules (Entities, Rules, Agents[Actions, Observations], etc.)
explain_config = True
# Collect statistics?
monitor = True
# Record as Protobuf?
record = False
# Plot Results?
plotting = True
run_path = Path('study_out')
if explain_config:
ce = ConfigExplainer()
ce.save_all(run_path / 'all_available_configs.yaml')
# Path to config File
path = Path('marl_factory_grid/configs/eight_puzzle.yaml')
# Env Init
factory = Factory(path)
# Record and Monitor
if monitor:
factory = EnvMonitor(factory)
if record:
factory = EnvRecorder(factory)
# RL learn Loop
for episode in trange(10):
_ = factory.reset()
done = False
if render:
factory.render()
action_spaces = factory.action_space
while not done:
a = [randint(0, x.n - 1) for x in action_spaces]
obs_type, _, reward, done, info = factory.step(a)
if render:
factory.render()
if done:
print(f'Episode {episode} done...')
break
if monitor:
factory.save_monitor(run_path / 'test_monitor.pkl')
if record:
factory.save_records(run_path / 'test.pb')
if plotting:
factory.report_possible_colum_keys()
plot_single_run(run_path, column_keys=['step_reward'])
print('Done!!! Goodbye....')