mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-07-11 23:42:40 +02:00
Added shared piles option to dirt_quadrant eval + Changed dirt_quadrant layout and adapted configs
This commit is contained in:
@ -383,6 +383,14 @@ class A2C:
|
||||
obs[0][1][x][y] = 1
|
||||
print("Missing agent position")
|
||||
|
||||
def get_all_cleaned_dirt_piles(self, dirt_piles_positions, cleaned_dirt_piles):
|
||||
meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
|
||||
for agent_idx in range(self.n_agents):
|
||||
for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
|
||||
if cleaned:
|
||||
meta_cleaned_dirt_piles[pos] = True
|
||||
return meta_cleaned_dirt_piles
|
||||
|
||||
def handle_dirt(self, env, cleaned_dirt_piles, ordered_dirt_piles, target_pile, indices, reward, done):
|
||||
# Check if agent moved on field with dirt. If that is the case collect dirt automatically
|
||||
agent_positions = [env.state.moving_entites[agent_idx].pos for agent_idx in range(self.n_agents)]
|
||||
@ -427,12 +435,7 @@ class A2C:
|
||||
done = True
|
||||
elif self.cfg[nms.ALGORITHM]["pile_all_done"] == "shared":
|
||||
# End episode if both agents together have cleaned all dirt piles
|
||||
meta_cleaned_dirt_piles = {pos: False for pos in dirt_piles_positions}
|
||||
for agent_idx in range(self.n_agents):
|
||||
for (pos, cleaned) in cleaned_dirt_piles[agent_idx].items():
|
||||
if cleaned:
|
||||
meta_cleaned_dirt_piles[pos] = True
|
||||
if all(meta_cleaned_dirt_piles.values()):
|
||||
if all(self.get_all_cleaned_dirt_piles(dirt_piles_positions, cleaned_dirt_piles).values()):
|
||||
done = True
|
||||
|
||||
return reward, done
|
||||
|
@ -24,7 +24,7 @@ algorithm:
|
||||
entropy_coef: 0.01
|
||||
vf_coef: 0.05
|
||||
n_steps: 0 # How much experience should be sampled at most (n-TD) until the next value and policy update is performed. Default 0: MC
|
||||
max_steps: 270000
|
||||
max_steps: 240000
|
||||
advantage: "Advantage-AC" # Options: "Advantage-AC", "TD-Advantage-AC", "Reinforce"
|
||||
pile-order: "fixed" # Options: "fixed", "random", "none", "agents", "dynamic", "smart" (Use "fixed", "random" and "none" for single agent training and the other for multi agent inference)
|
||||
pile-observability: "single" # Options: "single", "all"
|
||||
|
Reference in New Issue
Block a user