mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2026-01-15 23:41:39 +01:00
Added code for tsp_runs + Updated eval configs so that every episode only takes a maximum number of steps
This commit is contained in:
@@ -58,5 +58,5 @@ Rules:
|
|||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
#DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
|
DoneAtMaxStepsReached:
|
||||||
#max_steps: 1000
|
max_steps: 50
|
||||||
|
|||||||
@@ -58,5 +58,5 @@ Rules:
|
|||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
#DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
|
DoneAtMaxStepsReached:
|
||||||
#max_steps: 1000
|
max_steps: 50
|
||||||
|
|||||||
@@ -58,5 +58,5 @@ Rules:
|
|||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
#DoneOnAllDirtCleaned:
|
||||||
#DoneAtMaxStepsReached: # Mayne Required since door blocking will result in infinite loop
|
DoneAtMaxStepsReached:
|
||||||
#max_steps: 1000
|
max_steps: 50
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ Agents:
|
|||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,1)
|
- (9,2)
|
||||||
Reiner:
|
Reiner:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
@@ -39,11 +39,11 @@ Agents:
|
|||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,8) # (9, 4)
|
- (9,5)
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (1, 1), (4,5), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ General:
|
|||||||
Agents:
|
Agents:
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
Actions:
|
Actions:
|
||||||
- Move8
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
- DestAction
|
- DestAction
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -29,7 +29,7 @@ Agents:
|
|||||||
- (3,1) # Agent spawnpoint
|
- (3,1) # Agent spawnpoint
|
||||||
Sigmund:
|
Sigmund:
|
||||||
Actions:
|
Actions:
|
||||||
- Move8
|
- Move4
|
||||||
- Noop
|
- Noop
|
||||||
- DestAction
|
- DestAction
|
||||||
- DoorUse
|
- DoorUse
|
||||||
@@ -67,6 +67,11 @@ Rules:
|
|||||||
# Init
|
# Init
|
||||||
AssignGlobalPositions: { }
|
AssignGlobalPositions: { }
|
||||||
|
|
||||||
|
DoneAtDestinationReach:
|
||||||
|
reward_at_done: 1
|
||||||
|
# We want to give rewards only, when all targets have been reached.
|
||||||
|
condition: "all"
|
||||||
|
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 100
|
max_steps: 50
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ class DestAction(Action):
|
|||||||
|
|
||||||
def do(self, entity, state) -> Union[None, ActionResult]:
|
def do(self, entity, state) -> Union[None, ActionResult]:
|
||||||
if destination := state[d.DESTINATION].by_pos(entity.pos):
|
if destination := state[d.DESTINATION].by_pos(entity.pos):
|
||||||
valid = destination.do_wait_action(entity)
|
valid = destination[0].do_wait_action(entity)
|
||||||
state.print(f'{entity.name} just waited at {entity.pos}')
|
state.print(f'{entity.name} just waited at {entity.pos}')
|
||||||
else:
|
else:
|
||||||
valid = c.NOT_VALID
|
valid = c.NOT_VALID
|
||||||
|
|||||||
107
studies/tsp_runs.py
Normal file
107
studies/tsp_runs.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import imageio
|
||||||
|
from tqdm import trange
|
||||||
|
|
||||||
|
from marl_factory_grid.algorithms.static.TSP_dirt_agent import TSPDirtAgent
|
||||||
|
from marl_factory_grid.algorithms.static.TSP_item_agent import TSPItemAgent
|
||||||
|
from marl_factory_grid.algorithms.static.TSP_target_agent import TSPTargetAgent
|
||||||
|
from marl_factory_grid.environment.factory import Factory
|
||||||
|
|
||||||
|
def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
||||||
|
agents = [TSPDirtAgent(factory, 0), TSPDirtAgent(factory, 1)]
|
||||||
|
if not emergent_phenomenon:
|
||||||
|
edge_costs = {}
|
||||||
|
# Add costs for horizontal edges
|
||||||
|
for i in range(1, 10):
|
||||||
|
for j in range(1, 9):
|
||||||
|
# Add costs for both traversal directions
|
||||||
|
edge_costs[f"{(i, j)}-{i, j + 1}"] = 0.55 + (i - 1) * 0.05
|
||||||
|
edge_costs[f"{i, j + 1}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
|
||||||
|
|
||||||
|
# Add costs for vertical edges
|
||||||
|
for i in range(1, 9):
|
||||||
|
for j in range(1, 10):
|
||||||
|
# Add costs for both traversal directions
|
||||||
|
edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i - 1) * 0.05
|
||||||
|
edge_costs[f"{i + 1, j}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
|
||||||
|
|
||||||
|
|
||||||
|
for agent in agents:
|
||||||
|
for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||||
|
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
|
||||||
|
|
||||||
|
"""for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||||
|
print(f"Edge ({u}-{v}) has weight: {weight}")"""
|
||||||
|
|
||||||
|
return agents
|
||||||
|
|
||||||
|
|
||||||
|
def get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory):
|
||||||
|
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
||||||
|
if not emergent_phenomenon:
|
||||||
|
print(emergent_phenomenon)
|
||||||
|
for agent in agents:
|
||||||
|
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
|
||||||
|
return agents
|
||||||
|
|
||||||
|
def run_tsp_setting(config_name, emergent_phenomenon):
|
||||||
|
# Render at each step?
|
||||||
|
render = True
|
||||||
|
|
||||||
|
# Path to config File
|
||||||
|
path = Path(f'../marl_factory_grid/configs/{config_name}.yaml')
|
||||||
|
|
||||||
|
# Create results folder
|
||||||
|
runs = os.listdir("../study_out/")
|
||||||
|
run_numbers = [int(run[7:]) for run in runs if run[:7] == "tsp_run"]
|
||||||
|
next_run_number = max(run_numbers) + 1 if run_numbers else 0
|
||||||
|
results_path = f"../study_out/tsp_run{next_run_number}"
|
||||||
|
os.mkdir(results_path)
|
||||||
|
|
||||||
|
# Env Init
|
||||||
|
factory = Factory(path)
|
||||||
|
|
||||||
|
with open(f"{results_path}/env_config.txt", "w") as txt_file:
|
||||||
|
txt_file.write(str(factory.conf))
|
||||||
|
|
||||||
|
recorder = imageio.get_writer(f'{results_path}/pygame_recording.mp4', fps=5)
|
||||||
|
|
||||||
|
for episode in trange(1):
|
||||||
|
_ = factory.reset()
|
||||||
|
done = False
|
||||||
|
if render:
|
||||||
|
factory.set_recorder(recorder)
|
||||||
|
factory.render()
|
||||||
|
factory._renderer.fps = 5
|
||||||
|
if config_name == "dirt_quadrant":
|
||||||
|
agents = get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory)
|
||||||
|
elif config_name == "two_rooms_one_door_modified":
|
||||||
|
agents = get_two_rooms_one_door_modified_tsp_agents(emergent_phenomenon, factory)
|
||||||
|
else:
|
||||||
|
print("Config name does not exist. Abort...")
|
||||||
|
break
|
||||||
|
while not done:
|
||||||
|
a = [x.predict() for x in agents]
|
||||||
|
obs_type, _, _, done, info = factory.step(a)
|
||||||
|
if render:
|
||||||
|
factory.render()
|
||||||
|
if done:
|
||||||
|
print(f'Episode {episode} done...')
|
||||||
|
break
|
||||||
|
|
||||||
|
recorder.close()
|
||||||
|
|
||||||
|
|
||||||
|
def dirt_quadrant_multi_agent_tsp(emergent_phenomenon):
|
||||||
|
run_tsp_setting("dirt_quadrant", emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
def two_rooms_one_door_modified_multi_agent_tsp(emergent_phenomenon):
|
||||||
|
run_tsp_setting("two_rooms_one_door_modified", emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dirt_quadrant_multi_agent_tsp(False)
|
||||||
Reference in New Issue
Block a user