mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-11-16 23:33:51 +01:00
Merge remote-tracking branch 'origin/marl_refactor' into marl_refactor
This commit is contained in:
@@ -384,6 +384,14 @@ class A2C:
|
|||||||
obs[0][1][x][y] = 1
|
obs[0][1][x][y] = 1
|
||||||
print("Missing agent position")
|
print("Missing agent position")
|
||||||
|
|
||||||
|
def get_all_cleaned_dirt_piles(self, dirt_piles_positions, cleaned_dirt_piles):
|
||||||
|
meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
|
||||||
|
for agent_idx in range(self.n_agents):
|
||||||
|
for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
|
||||||
|
if cleaned:
|
||||||
|
meta_cleaned_dirt_piles[pos] = True
|
||||||
|
return meta_cleaned_dirt_piles
|
||||||
|
|
||||||
def handle_dirt(self, env, cleaned_dirt_piles, ordered_dirt_piles, target_pile, indices, reward, done):
|
def handle_dirt(self, env, cleaned_dirt_piles, ordered_dirt_piles, target_pile, indices, reward, done):
|
||||||
# Check if agent moved on field with dirt. If that is the case collect dirt automatically
|
# Check if agent moved on field with dirt. If that is the case collect dirt automatically
|
||||||
agent_positions = [env.state.moving_entites[agent_idx].pos for agent_idx in range(self.n_agents)]
|
agent_positions = [env.state.moving_entites[agent_idx].pos for agent_idx in range(self.n_agents)]
|
||||||
@@ -428,12 +436,7 @@ class A2C:
|
|||||||
done = True
|
done = True
|
||||||
elif self.cfg[nms.ALGORITHM]["pile_all_done"] == "shared":
|
elif self.cfg[nms.ALGORITHM]["pile_all_done"] == "shared":
|
||||||
# End episode if both agents together have cleaned all dirt piles
|
# End episode if both agents together have cleaned all dirt piles
|
||||||
meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
|
if all(self.get_all_cleaned_dirt_piles(dirt_piles_positions, cleaned_dirt_piles).values()):
|
||||||
for agent_idx in range(self.n_agents):
|
|
||||||
for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
|
|
||||||
if cleaned:
|
|
||||||
meta_cleaned_dirt_piles[pos] = True
|
|
||||||
if all(meta_cleaned_dirt_piles.values()):
|
|
||||||
done = True
|
done = True
|
||||||
|
|
||||||
return reward, done
|
return reward, done
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ algorithm:
|
|||||||
entropy_coef: 0.01
|
entropy_coef: 0.01
|
||||||
vf_coef: 0.05
|
vf_coef: 0.05
|
||||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||||
max_steps: 270000
|
max_steps: 240000
|
||||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||||
pile-observability: "single" # Options: "single", "all"
|
pile-observability: "single" # Options: "single", "all"
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ Agents:
|
|||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (9,9), (4,5), (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (9,9), (7,9), (4,7), (2,4), (1, 1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
|
|||||||
@@ -59,4 +59,4 @@ Rules:
|
|||||||
# Done Conditions
|
# Done Conditions
|
||||||
#DoneOnAllDirtCleaned:
|
#DoneOnAllDirtCleaned:
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 50
|
max_steps: 30
|
||||||
|
|||||||
@@ -25,8 +25,15 @@ Agents:
|
|||||||
#- Self
|
#- Self
|
||||||
#Positions:
|
#Positions:
|
||||||
#- (9,1)
|
#- (9,1)
|
||||||
|
#- (1,1)
|
||||||
|
#- (2,4)
|
||||||
|
#- (4,7)
|
||||||
|
#- (7,9)
|
||||||
|
#- (2,4)
|
||||||
|
#- (4,7)
|
||||||
|
#- (7,9)
|
||||||
#- (9,9)
|
#- (9,9)
|
||||||
#- (4,5)
|
#- (9,1)
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
@@ -35,12 +42,19 @@ Agents:
|
|||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,5)
|
- (9,5)
|
||||||
- (9,9)
|
#- (1,1)
|
||||||
- (4,5)
|
#- (2,4)
|
||||||
|
#- (4,7)
|
||||||
|
#- (7,9)
|
||||||
|
#- (2,4)
|
||||||
|
#- (4,7)
|
||||||
|
#- (7,9)
|
||||||
|
#- (9,9)
|
||||||
|
#- (9,5)
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (9,9), (4,5), (1,1) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9) #(9,9), (7,9), (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (4,7), (2,4), (1, 1) # (1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
|
|||||||
@@ -24,11 +24,17 @@ Agents:
|
|||||||
#- Self
|
#- Self
|
||||||
#Positions:
|
#Positions:
|
||||||
#- (9,1)
|
#- (9,1)
|
||||||
#- (4,5)
|
|
||||||
#- (1,1)
|
#- (1,1)
|
||||||
#- (4,5)
|
#- (2,4)
|
||||||
#- (9,1)
|
#- (4,7)
|
||||||
|
#- (6,8)
|
||||||
|
#- (7,9)
|
||||||
|
#- (2,4)
|
||||||
|
#- (4,7)
|
||||||
|
#- (6,8)
|
||||||
|
#- (7,9)
|
||||||
#- (9,9)
|
#- (9,9)
|
||||||
|
#- (9,1)
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
@@ -37,16 +43,22 @@ Agents:
|
|||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,5)
|
- (9,5)
|
||||||
- (4,5)
|
|
||||||
- (1,1)
|
- (1,1)
|
||||||
- (4,5)
|
- (2,4)
|
||||||
- (9,5)
|
- (4,7)
|
||||||
|
- (6,8)
|
||||||
|
- (7,9)
|
||||||
|
- (2,4)
|
||||||
|
- (4,7)
|
||||||
|
- (6,8)
|
||||||
|
- (7,9)
|
||||||
- (9,9)
|
- (9,9)
|
||||||
|
- (9,5)
|
||||||
|
|
||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (9,9), (1,1), (4,5) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
coords_or_quantity: (1, 1), (2,4), (4,7), (6,8), (7,9), (9,9) # (4,7), (2,4), (1, 1) #(1, 1), (2,4), (4,7), (7,9), (9,9) # (1, 1), (1,2), (1,3), (2,4), (2,5), (3,6), (4,7), (5,8), (6,8), (7,9), (8,9), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ Agents:
|
|||||||
- DirtPiles
|
- DirtPiles
|
||||||
- Self
|
- Self
|
||||||
Positions:
|
Positions:
|
||||||
- (9,2)
|
- (9,1)
|
||||||
Reiner:
|
Reiner:
|
||||||
Actions:
|
Actions:
|
||||||
- Move4
|
- Move4
|
||||||
@@ -43,7 +43,7 @@ Agents:
|
|||||||
|
|
||||||
Entities:
|
Entities:
|
||||||
DirtPiles:
|
DirtPiles:
|
||||||
coords_or_quantity: (1, 1), (4,5), (9,9)
|
coords_or_quantity: (1, 1), (2,4), (4,7), (7,9), (9,9)
|
||||||
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
initial_amount: 0.5 # <1 to ensure that the robot which first attempts to clean this field, can remove the dirt in one action
|
||||||
clean_amount: 1
|
clean_amount: 1
|
||||||
dirt_spawn_r_var: 0
|
dirt_spawn_r_var: 0
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
###########
|
###########
|
||||||
#---#######
|
#---------#
|
||||||
#-----#####
|
#---------#
|
||||||
#------####
|
#---------#
|
||||||
#-------###
|
#---------#
|
||||||
#--------##
|
#---------#
|
||||||
#--------##
|
#---------#
|
||||||
#---------#
|
#---------#
|
||||||
#---------#
|
#---------#
|
||||||
#---------#
|
#---------#
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ def single_agent_eval(config_name, run):
|
|||||||
agent = A2C(train_cfg, eval_cfg)
|
agent = A2C(train_cfg, eval_cfg)
|
||||||
print("Evaluation phase")
|
print("Evaluation phase")
|
||||||
agent.load_agents(run)
|
agent.load_agents(run)
|
||||||
agent.eval_loop(10)
|
agent.eval_loop(1)
|
||||||
|
|
||||||
|
|
||||||
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
||||||
@@ -55,7 +55,7 @@ def multi_agent_eval(config_name, runs, emergent_phenomenon=False):
|
|||||||
agent = A2C(train_cfg, eval_cfg)
|
agent = A2C(train_cfg, eval_cfg)
|
||||||
print("Evaluation phase")
|
print("Evaluation phase")
|
||||||
agent.load_agents(runs)
|
agent.load_agents(runs)
|
||||||
agent.eval_loop(10)
|
agent.eval_loop(1)
|
||||||
|
|
||||||
|
|
||||||
def dirt_quadrant_single_agent_training():
|
def dirt_quadrant_single_agent_training():
|
||||||
@@ -70,7 +70,7 @@ def dirt_quadrant_single_agent_eval(agent_name):
|
|||||||
if agent_name == "Sigmund":
|
if agent_name == "Sigmund":
|
||||||
run = "run0"
|
run = "run0"
|
||||||
elif agent_name == "Wolfgang":
|
elif agent_name == "Wolfgang":
|
||||||
run = "run4"
|
run = "run1"
|
||||||
single_agent_eval("dirt_quadrant", [run])
|
single_agent_eval("dirt_quadrant", [run])
|
||||||
|
|
||||||
|
|
||||||
@@ -82,15 +82,15 @@ def two_rooms_one_door_modified_single_agent_eval(agent_name):
|
|||||||
single_agent_eval("two_rooms_one_door_modified", [run])
|
single_agent_eval("two_rooms_one_door_modified", [run])
|
||||||
|
|
||||||
|
|
||||||
def dirt_quadrant_multi_agent_eval(emergent_phenomenon):
|
def dirt_quadrant_5_multi_agent_eval(emergent_phenomenon):
|
||||||
multi_agent_eval("dirt_quadrant", ["run0", "run1"], emergent_phenomenon)
|
multi_agent_eval("dirt_quadrant", ["run4", "run5"], emergent_phenomenon)
|
||||||
|
|
||||||
|
def dirt_quadrant_5_multi_agent_ctde_eval(emergent_phenomenon): # run7 == run4
|
||||||
|
multi_agent_eval("dirt_quadrant", ["run4", "run7"], emergent_phenomenon)
|
||||||
|
|
||||||
def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
|
def two_rooms_one_door_modified_multi_agent_eval(emergent_phenomenon):
|
||||||
multi_agent_eval("two_rooms_one_door_modified", ["run2", "run3"], emergent_phenomenon)
|
multi_agent_eval("two_rooms_one_door_modified", ["run2", "run3"], emergent_phenomenon)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
dirt_quadrant_single_agent_training()
|
dirt_quadrant_single_agent_training()
|
||||||
Reference in New Issue
Block a user