mirror of
				https://github.com/illiumst/marl-factory-grid.git
				synced 2025-10-31 04:37:25 +01:00 
			
		
		
		
	my update
This commit is contained in:
		| @@ -1,40 +0,0 @@ | ||||
| from common import BaseLearner, TrajectoryBuffer | ||||
|  | ||||
|  | ||||
| class AWRLearner(BaseLearner): | ||||
|     def __init__(self, *args,  buffer_size=1e5, **kwargs): | ||||
|         super(AWRLearner, self).__init__(*args, **kwargs) | ||||
|         assert self.train_every[0] == 'episode', 'AWR only supports the episodic RL setting!' | ||||
|         self.buffer = TrajectoryBuffer(buffer_size) | ||||
|  | ||||
|     def train(self): | ||||
|         # convert to trajectory format | ||||
|         pass | ||||
|  | ||||
| import numpy as np | ||||
| from matplotlib import pyplot as plt | ||||
| import pandas as pd | ||||
| import seaborn as sns | ||||
|  | ||||
| sns.set(font_scale=1.25, rc={'text.usetex': True}) | ||||
| data = np.array([[689, 74], [71, 647]]) | ||||
| cats = ['Mask', 'No Mask'] | ||||
| df = pd.DataFrame(data/np.sum(data), index=cats, columns=cats) | ||||
|  | ||||
| group_counts = ['{0:0.0f}'.format(value) for value in | ||||
|                 data.flatten()] | ||||
| group_percentages = [f'{value*100:.2f}' + r'$\%$' for value in | ||||
|                      data.flatten()/np.sum(data)] | ||||
|  | ||||
| labels = [f'{v1}\n{v2}' for v1, v2 in | ||||
|           zip(group_counts,group_percentages)] | ||||
| labels = np.asarray(labels).reshape(2,2) | ||||
|  | ||||
| with sns.axes_style("white"): | ||||
|     cmap = sns.diverging_palette(h_neg=100, h_pos=10, s=99, l=55, sep=3, as_cmap=True) | ||||
|     sns.heatmap(data, annot=labels, fmt='', cmap='Set2_r', square=True, cbar=False, xticklabels=cats,yticklabels=cats) | ||||
| plt.title('Simple-CNN') | ||||
| plt.ylabel('True label') | ||||
| plt.xlabel('Predicted label') | ||||
| plt.tight_layout() | ||||
| plt.savefig('cnn.pdf', bbox_inches='tight') | ||||
| @@ -2,9 +2,12 @@ from typing import NamedTuple, Union | ||||
| from collections import deque, OrderedDict, defaultdict | ||||
| import numpy as np | ||||
| import random | ||||
|  | ||||
| import pandas as pd | ||||
| import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
| from tqdm import trange | ||||
|  | ||||
| class Experience(NamedTuple): | ||||
|     # can be use for a single (s_t, a, r s_{t+1}) tuple | ||||
| @@ -57,6 +60,9 @@ class BaseLearner: | ||||
|     def train(self): | ||||
|         pass | ||||
|  | ||||
|     def reward(self, r): | ||||
|         return r | ||||
|  | ||||
|     def learn(self, n_steps): | ||||
|         train_type, train_freq = self.train_every | ||||
|         while self.step < n_steps: | ||||
| @@ -70,7 +76,7 @@ class BaseLearner: | ||||
|                 next_obs, reward, done, info = self.env.step(action if not len(action) == 1 else action[0]) | ||||
|  | ||||
|                 experience = Experience(observation=obs, next_observation=next_obs, | ||||
|                                         action=action, reward=reward, | ||||
|                                         action=action, reward=self.reward(reward), | ||||
|                                         done=done, episode=self.episode)  # do we really need to copy? | ||||
|                 self.on_new_experience(experience) | ||||
|                 # end of step routine | ||||
| @@ -90,7 +96,7 @@ class BaseLearner: | ||||
|             self.running_reward.append(total_reward) | ||||
|             self.episode += 1 | ||||
|             try: | ||||
|                 if self.step % 10 == 0: | ||||
|                 if self.step % 100 == 0: | ||||
|                     print( | ||||
|                         f'Step: {self.step} ({(self.step / n_steps) * 100:.2f}%)\tRunning reward: {sum(list(self.running_reward)) / len(self.running_reward):.2f}\t' | ||||
|                         f' eps: {self.eps:.4f}\tRunning loss: {sum(list(self.running_loss)) / len(self.running_loss):.4f}\tUpdates:{self.n_updates}') | ||||
| @@ -98,6 +104,21 @@ class BaseLearner: | ||||
|                 pass | ||||
|         self.on_all_done() | ||||
|  | ||||
|     def evaluate(self, n_episodes=100, render=False): | ||||
|         with torch.no_grad(): | ||||
|             data = [] | ||||
|             for eval_i in trange(n_episodes): | ||||
|                 obs, done = self.env.reset(), False | ||||
|                 while not done: | ||||
|                     action = self.get_action(obs) | ||||
|                     next_obs, reward, done, info = self.env.step(action if not len(action) == 1 else action[0]) | ||||
|                     if render: self.env.render() | ||||
|                     obs = next_obs  # srsly i'm so stupid | ||||
|                     info.update({'reward': reward, 'eval_episode': eval_i}) | ||||
|                     data.append(info) | ||||
|         return pd.DataFrame(data).fillna(0) | ||||
|  | ||||
|  | ||||
|  | ||||
| class BaseBuffer: | ||||
|     def __init__(self, size: int): | ||||
| @@ -187,7 +208,7 @@ class BaseDDQN(BaseDQN): | ||||
| class BaseICM(nn.Module): | ||||
|     def __init__(self, backbone_dims=[2*3*5*5, 64, 64], head_dims=[2*64, 64, 9]): | ||||
|         super(BaseICM, self).__init__() | ||||
|         self.backbone = mlp_maker(backbone_dims, flatten=True) | ||||
|         self.backbone = mlp_maker(backbone_dims, flatten=True, activation_last='relu', activation='relu') | ||||
|         self.icm = mlp_maker(head_dims) | ||||
|         self.ce = nn.CrossEntropyLoss() | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| import numpy as np | ||||
| import torch | ||||
| import torch.nn.functional as F | ||||
| from algorithms.q_learner import QLearner | ||||
| @@ -53,19 +54,24 @@ class MQLearner(QLearner): | ||||
|             self._backprop_loss(loss) | ||||
|  | ||||
| from tqdm import trange | ||||
| from collections import deque | ||||
| class MQICMLearner(MQLearner): | ||||
|     def __init__(self, *args, icm, **kwargs): | ||||
|         super(MQICMLearner, self).__init__(*args, **kwargs) | ||||
|         self.icm = icm | ||||
|         self.icm_optimizer = torch.optim.Adam(self.icm.parameters()) | ||||
|         self.icm_optimizer = torch.optim.AdamW(self.icm.parameters()) | ||||
|         self.normalize_reward = deque(maxlen=1000) | ||||
|  | ||||
|     def on_all_done(self): | ||||
|         for b in trange(50000): | ||||
|         from collections import deque | ||||
|         losses = deque(maxlen=100) | ||||
|         for b in trange(10000): | ||||
|             batch = self.buffer.sample(128, 0) | ||||
|             s0, s1, a = batch.observation,  batch.next_observation, batch.action | ||||
|             loss = self.icm(s0, s1, a.squeeze())['loss'] | ||||
|             self.icm_optimizer.zero_grad() | ||||
|             loss.backward() | ||||
|             self.icm_optimizer.step() | ||||
|             losses.append(loss.item()) | ||||
|             if b%100 == 0: | ||||
|                 print(loss.item()) | ||||
|                 print(np.mean(losses)) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from typing import Union | ||||
| import torch | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| from algorithms.q_learner import QLearner | ||||
|  | ||||
|  | ||||
| @@ -37,4 +38,18 @@ class VDNLearner(QLearner): | ||||
|                 target_q_raw += next_q_values_raw | ||||
|             target_q = experience.reward + (1 - experience.done) * self.gamma * target_q_raw | ||||
|             loss = torch.mean(self.reg_weight * pred_q + torch.pow(pred_q - target_q, 2)) | ||||
|             self._backprop_loss(loss) | ||||
|             self._backprop_loss(loss) | ||||
|  | ||||
|     def evaluate(self, n_episodes=100, render=False): | ||||
|         with torch.no_grad(): | ||||
|             data = [] | ||||
|             for eval_i in range(n_episodes): | ||||
|                 obs, done = self.env.reset(), False | ||||
|                 while not done: | ||||
|                     action = self.get_action(obs) | ||||
|                     next_obs, reward, done, info = self.env.step(action) | ||||
|                     if render: self.env.render() | ||||
|                     obs = next_obs  # srsly i'm so stupid | ||||
|                     info.update({'reward': reward, 'eval_episode': eval_i}) | ||||
|                     data.append(info) | ||||
|         return pd.DataFrame(data).fillna(0) | ||||
|   | ||||
| @@ -0,0 +1,27 @@ | ||||
| def rooms(n_agents=1): | ||||
|     from environments.factory.factory_dirt_item import DirtItemFactory | ||||
|     from environments.factory.factory_item import ItemFactory, ItemProperties | ||||
|     from environments.factory.factory_dirt import DirtProperties, DirtFactory | ||||
|     from environments.utility_classes import MovementProperties, ObservationProperties, AgentRenderOptions | ||||
|  | ||||
|     obs_props = ObservationProperties(render_agents=AgentRenderOptions.NOT, | ||||
|                                       omit_agent_self=True, | ||||
|                                       additional_agent_placeholder=None, | ||||
|                                       frames_to_stack=0, | ||||
|                                       pomdp_r=2 | ||||
|                                       ) | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True, | ||||
|                                     allow_square_movement=True, | ||||
|                                     allow_no_op=False) | ||||
|     dirt_props = DirtProperties(initial_dirt_ratio=0.35, initial_dirt_spawn_r_var=0.1, | ||||
|                                 clean_amount=0.34, | ||||
|                                 max_spawn_amount=0.1, max_global_amount=20, | ||||
|                                 max_local_amount=1, spawn_frequency=0, max_spawn_ratio=0.05, | ||||
|                                 dirt_smear_amount=0.0, agent_can_interact=True) | ||||
|     factory_kwargs = dict(n_agents=n_agents, max_steps=400, parse_doors=True, | ||||
|                           level_name='rooms', record_episodes=False, doors_have_area=False, | ||||
|                           verbose=False, | ||||
|                           mv_prop=move_props, | ||||
|                           obs_prop=obs_props | ||||
|                           ) | ||||
|     return DirtFactory(dirt_props=dirt_props, **factory_kwargs) | ||||
|   | ||||
| @@ -1,57 +1,7 @@ | ||||
| import random | ||||
| from pathlib import Path | ||||
|  | ||||
| from environments.factory.factory_dirt import DirtFactory, DirtProperties | ||||
| from environments.factory.factory_item import ItemFactory, ItemProperties | ||||
| from environments.logging.recorder import RecorderCallback | ||||
| from environments.utility_classes import MovementProperties | ||||
| from environments.factory.factory_dirt import DirtFactory | ||||
| from environments.factory.factory_item import ItemFactory | ||||
|  | ||||
|  | ||||
| class DirtItemFactory(ItemFactory, DirtFactory): | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     with RecorderCallback(filepath=Path('debug_out') / f'recorder_xxxx.json', occupation_map=False, | ||||
|                           trajectory_map=False) as recorder: | ||||
|  | ||||
|         dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20, | ||||
|                                     max_local_amount=1, spawn_frequency=3, max_spawn_ratio=0.05, | ||||
|                                     dirt_smear_amount=0.0, agent_can_interact=True) | ||||
|         item_props = ItemProperties(n_items=5, agent_can_interact=True) | ||||
|         move_props = MovementProperties(allow_diagonal_movement=True, | ||||
|                                         allow_square_movement=True, | ||||
|                                         allow_no_op=False) | ||||
|  | ||||
|         render = True | ||||
|  | ||||
|         factory = DirtItemFactory(n_agents=1, done_at_collision=False, frames_to_stack=0, | ||||
|                               level_name='rooms', max_steps=200, combin_agent_obs=True, | ||||
|                               omit_agent_in_obs=True, parse_doors=True, pomdp_r=3, | ||||
|                               record_episodes=True, verbose=False, cast_shadows=True, | ||||
|                               movement_properties=move_props, dirt_properties=dirt_props | ||||
|                               ) | ||||
|  | ||||
|         # noinspection DuplicatedCode | ||||
|         n_actions = factory.action_space.n - 1 | ||||
|         _ = factory.observation_space | ||||
|  | ||||
|         for epoch in range(4): | ||||
|             random_actions = [[random.randint(0, n_actions) for _ | ||||
|                                in range(factory.n_agents)] for _ | ||||
|                               in range(factory.max_steps + 1)] | ||||
|             env_state = factory.reset() | ||||
|             r = 0 | ||||
|             for agent_i_action in random_actions: | ||||
|                 env_state, step_r, done_bool, info_obj = factory.step(agent_i_action) | ||||
|                 # recorder.read_info(0, info_obj) | ||||
|                 r += step_r | ||||
|                 if render: | ||||
|                     factory.render() | ||||
|                 if done_bool: | ||||
|                     # recorder.read_done(0, done_bool) | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|         pass | ||||
|   | ||||
| @@ -126,6 +126,6 @@ class Renderer: | ||||
| if __name__ == '__main__': | ||||
|     renderer = Renderer(fps=2, cell_size=40) | ||||
|     for i in range(15): | ||||
|         entity_1 = RenderEntity('agent', [5, i], 1, 'idle', 'idle') | ||||
|         entity_1 = RenderEntity('agent_collision', [5, i], 1, 'idle', 'idle') | ||||
|         renderer.render([entity_1]) | ||||
|  | ||||
|   | ||||
							
								
								
									
										115
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										115
									
								
								main.py
									
									
									
									
									
								
							| @@ -1,115 +0,0 @@ | ||||
| import warnings | ||||
|  | ||||
| from pathlib import Path | ||||
| import time | ||||
|  | ||||
| from stable_baselines3.common.callbacks import CallbackList | ||||
| from stable_baselines3.common.vec_env import SubprocVecEnv | ||||
|  | ||||
| from environments.factory.factory_dirt_item import DirtItemFactory | ||||
| from environments.factory.factory_item import ItemFactory, ItemProperties | ||||
| from environments.factory.factory_dirt import DirtProperties, DirtFactory | ||||
| from environments.logging.monitor import MonitorCallback | ||||
| from environments.logging.recorder import RecorderCallback | ||||
| from environments.utility_classes import MovementProperties | ||||
| from plotting.compare_runs import compare_seed_runs, compare_model_runs | ||||
|  | ||||
| warnings.filterwarnings('ignore', category=FutureWarning) | ||||
| warnings.filterwarnings('ignore', category=UserWarning) | ||||
|  | ||||
|  | ||||
| def make_env(env_kwargs_dict): | ||||
|  | ||||
|     def _init(): | ||||
|         with DirtFactory(**env_kwargs_dict) as init_env: | ||||
|             return init_env | ||||
|  | ||||
|     return _init | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
|     # combine_runs(Path('debug_out') / 'A2C_1630314192') | ||||
|     # exit() | ||||
|  | ||||
|     # compare_runs(Path('debug_out'), 1623052687, ['step_reward']) | ||||
|     # exit() | ||||
|  | ||||
|     from stable_baselines3 import PPO, DQN, A2C | ||||
|     # from algorithms.reg_dqn import RegDQN | ||||
|     # from sb3_contrib import QRDQN | ||||
|  | ||||
|     dirt_props = DirtProperties(clean_amount=2, gain_amount=0.1, max_global_amount=20, | ||||
|                                 max_local_amount=1, spawn_frequency=16, max_spawn_ratio=0.05, | ||||
|                                 dirt_smear_amount=0.0, agent_can_interact=True) | ||||
|     item_props = ItemProperties(n_items=10, agent_can_interact=True, | ||||
|                                 spawn_frequency=30, n_drop_off_locations=2, | ||||
|                                 max_agent_inventory_capacity=15) | ||||
|     move_props = MovementProperties(allow_diagonal_movement=True, | ||||
|                                     allow_square_movement=True, | ||||
|                                     allow_no_op=False) | ||||
|     train_steps = 5e6 | ||||
|     time_stamp = int(time.time()) | ||||
|  | ||||
|     out_path = None | ||||
|  | ||||
|     for modeL_type in [A2C, PPO, DQN]:  # ,RegDQN, QRDQN]: | ||||
|         for seed in range(3): | ||||
|             env_kwargs = dict(n_agents=1, | ||||
|                               # item_prop=item_props, | ||||
|                               dirt_properties=dirt_props, | ||||
|                               movement_properties=move_props, | ||||
|                               pomdp_r=2, max_steps=1000, parse_doors=False, | ||||
|                               level_name='rooms', frames_to_stack=4, | ||||
|                               omit_agent_in_obs=True, combin_agent_obs=True, record_episodes=False, | ||||
|                               cast_shadows=True, doors_have_area=False, env_seed=seed, verbose=False, | ||||
|                               ) | ||||
|  | ||||
|             if modeL_type.__name__ in ["PPO", "A2C"]: | ||||
|                 kwargs = dict(ent_coef=0.01) | ||||
|                 env = SubprocVecEnv([make_env(env_kwargs) for _ in range(10)], start_method="spawn") | ||||
|             elif modeL_type.__name__ in ["RegDQN", "DQN", "QRDQN"]: | ||||
|                 env = make_env(env_kwargs)() | ||||
|                 kwargs = dict(buffer_size=50000, | ||||
|                               learning_starts=64, | ||||
|                               batch_size=64, | ||||
|                               target_update_interval=5000, | ||||
|                               exploration_fraction=0.25, | ||||
|                               exploration_final_eps=0.025 | ||||
|                               ) | ||||
|             else: | ||||
|                 raise NameError(f'The model "{modeL_type.__name__}" has the wrong name.') | ||||
|  | ||||
|             model = modeL_type("MlpPolicy", env, verbose=1, seed=seed, device='cpu', **kwargs) | ||||
|  | ||||
|             out_path = Path('debug_out') / f'{model.__class__.__name__}_{time_stamp}' | ||||
|  | ||||
|             # identifier = f'{seed}_{model.__class__.__name__}_{time_stamp}' | ||||
|             identifier = f'{seed}_{model.__class__.__name__}_{time_stamp}' | ||||
|             out_path /= identifier | ||||
|  | ||||
|             callbacks = CallbackList( | ||||
|                 [MonitorCallback(filepath=out_path / f'monitor_{identifier}.pick'), | ||||
|                  RecorderCallback(filepath=out_path / f'recorder_{identifier}.json', occupation_map=False, | ||||
|                                   trajectory_map=False | ||||
|                                   )] | ||||
|             ) | ||||
|  | ||||
|             model.learn(total_timesteps=int(train_steps), callback=callbacks) | ||||
|  | ||||
|             save_path = out_path / f'model_{identifier}.zip' | ||||
|             save_path.parent.mkdir(parents=True, exist_ok=True) | ||||
|             model.save(save_path) | ||||
|             param_path = out_path.parent / f'env_{model.__class__.__name__}_{time_stamp}.json' | ||||
|             try: | ||||
|                 env.env_method('save_params', param_path) | ||||
|             except AttributeError: | ||||
|                 env.save_params(param_path) | ||||
|             print("Model Trained and saved") | ||||
|         print("Model Group Done.. Plotting...") | ||||
|  | ||||
|         if out_path: | ||||
|             compare_seed_runs(out_path.parent) | ||||
|     print("All Models Done... Evaluating") | ||||
|     if out_path: | ||||
|         compare_model_runs(Path('debug_out'), time_stamp, 'step_reward') | ||||
							
								
								
									
										86
									
								
								main_test.py
									
									
									
									
									
								
							
							
						
						
									
										86
									
								
								main_test.py
									
									
									
									
									
								
							| @@ -1,86 +0,0 @@ | ||||
| # foreign imports | ||||
| import warnings | ||||
|  | ||||
| from pathlib import Path | ||||
| import yaml | ||||
| from gym.wrappers import FrameStack | ||||
| from natsort import natsorted | ||||
|  | ||||
| from stable_baselines3.common.callbacks import CallbackList | ||||
| from stable_baselines3 import PPO, DQN, A2C | ||||
|  | ||||
| # our imports | ||||
| from environments.factory.factory_dirt import DirtFactory, DirtProperties | ||||
| from environments.logging.monitor import MonitorCallback | ||||
| from algorithms.reg_dqn import RegDQN | ||||
| from main import compare_model_runs, compare_seed_runs | ||||
|  | ||||
| warnings.filterwarnings('ignore', category=FutureWarning) | ||||
| warnings.filterwarnings('ignore', category=UserWarning) | ||||
| model_mapping = dict(A2C=A2C, PPO=PPO, DQN=DQN, RegDQN=RegDQN) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
|     # get n policies pi_1, ..., pi_n trained in single agent setting | ||||
|     # rewards = [] | ||||
|     # repeat for x eval runs | ||||
|     # total reward = rollout game for y steps with n policies in multi-agent setting | ||||
|     # rewards += [total reward] | ||||
|     # boxplot total rewards | ||||
|  | ||||
|     run_id = '1623923982' | ||||
|     model_name = 'A2C' | ||||
|  | ||||
|     # ----------------------- | ||||
|     out_path = Path(__file__).parent / 'debug_out' | ||||
|  | ||||
|     # from sb3_contrib import QRDQN | ||||
|     model_path = out_path / f'{model_name}_{run_id}' | ||||
|     model_files = list(natsorted(model_path.rglob('model_*.zip'))) | ||||
|     this_model = model_files[0] | ||||
|     render = True | ||||
|  | ||||
|     model = model_mapping[model_name].load(this_model) | ||||
|  | ||||
|     for seed in range(3): | ||||
|         with (model_path / f'env_{model_path.name}.yaml').open('r') as f: | ||||
|             env_kwargs = yaml.load(f, Loader=yaml.FullLoader) | ||||
|         dirt_props = DirtProperties(clean_amount=3, gain_amount=0.2, max_global_amount=30, | ||||
|                                     max_local_amount=3, spawn_frequency=1, max_spawn_ratio=0.05) | ||||
|         # env_kwargs.update(n_agents=1, dirt_prop=dirt_props) | ||||
|         env = DirtFactory(**env_kwargs) | ||||
|  | ||||
|         env = FrameStack(env, 4) | ||||
|  | ||||
|         exp_out_path = model_path / 'exp' | ||||
|         callbacks = CallbackList( | ||||
|             [MonitorCallback(filepath=exp_out_path / f'future_exp_name')] | ||||
|         ) | ||||
|  | ||||
|         n_actions = env.action_space.n | ||||
|  | ||||
|         for epoch in range(100): | ||||
|             observations = env.reset() | ||||
|             if render: | ||||
|                 if isinstance(env, FrameStack): | ||||
|                     env.env.render() | ||||
|                 else: | ||||
|                     env.render() | ||||
|             done_bool = False | ||||
|             r = 0 | ||||
|             while not done_bool: | ||||
|                 if env.n_agents > 1: | ||||
|                     actions = [model.predict(obs, deterministic=False)[0] for obs in observations] | ||||
|                 else: | ||||
|                     actions = model.predict(observations, deterministic=False)[0] | ||||
|  | ||||
|                 observations, r, done_bool, info_obj = env.step(actions) | ||||
|                 if render: | ||||
|                     env.render() | ||||
|                 if done_bool: | ||||
|                     break | ||||
|             print(f'Factory run {epoch} done, reward is:\n    {r}') | ||||
|  | ||||
|     if out_path: | ||||
|         compare_seed_runs(out_path.parent) | ||||
| @@ -1,9 +1,11 @@ | ||||
| import numpy as np | ||||
| from environments.factory import rooms | ||||
| import random | ||||
| from gym.wrappers import FrameStack | ||||
|  | ||||
| env = rooms(n_agents=2) | ||||
| env = FrameStack(env, num_stack=3) | ||||
| state, *_ = env.reset() | ||||
|  | ||||
| class SatMad(object): | ||||
|     def __init__(self): | ||||
|         pass | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     pass | ||||
| for i in range(1000): | ||||
|     state, *_ = env.step([random.randint(0, 9), random.randint(0, 9)]) | ||||
|     env.render() | ||||
		Reference in New Issue
	
	Block a user
	 romue
					romue