mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-10-24 02:56:51 +02:00
Added explanation for narrow_corridor.yaml
This commit is contained in:
@@ -1,23 +1,41 @@
|
|||||||
General:
|
eneral:
|
||||||
|
# Your Seed
|
||||||
env_seed: 69
|
env_seed: 69
|
||||||
|
# Individual or global rewards?
|
||||||
individual_rewards: true
|
individual_rewards: true
|
||||||
|
# The level.txt file to load
|
||||||
level_name: narrow_corridor
|
level_name: narrow_corridor
|
||||||
|
# View Radius; 0 = full observatbility
|
||||||
pomdp_r: 0
|
pomdp_r: 0
|
||||||
|
# print all messages and events
|
||||||
verbose: true
|
verbose: true
|
||||||
|
|
||||||
Agents:
|
Agents:
|
||||||
|
# Agents are identified by their name
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
|
# The available actions for this particular agent
|
||||||
Actions:
|
Actions:
|
||||||
|
# Able to do nothing
|
||||||
- Noop
|
- Noop
|
||||||
|
# Able to move in all 8 directions
|
||||||
- Move8
|
- Move8
|
||||||
|
# Stuff the agent can observe (per 2d slice)
|
||||||
|
# use "Combined" if you want to merge multiple slices into one
|
||||||
Observations:
|
Observations:
|
||||||
|
# He sees walls
|
||||||
- Walls
|
- Walls
|
||||||
|
# he sees other agent, "karl-Heinz" in this setting would be fine, too
|
||||||
- Other
|
- Other
|
||||||
|
# He can see Destinations, that are assigned to him (hence the singular)
|
||||||
- Destination
|
- Destination
|
||||||
|
# Avaiable Spawn Positions as list
|
||||||
Positions:
|
Positions:
|
||||||
- (2, 1)
|
- (2, 1)
|
||||||
- (2, 5)
|
- (2, 5)
|
||||||
|
# It is okay to collide with other agents, so that
|
||||||
|
# they end up on the same position
|
||||||
is_blocking_pos: true
|
is_blocking_pos: true
|
||||||
|
# See Above....
|
||||||
Karl-Heinz:
|
Karl-Heinz:
|
||||||
Actions:
|
Actions:
|
||||||
- Noop
|
- Noop
|
||||||
@@ -31,12 +49,19 @@ Agents:
|
|||||||
- (2, 5)
|
- (2, 5)
|
||||||
is_blocking_pos: true
|
is_blocking_pos: true
|
||||||
|
|
||||||
|
# Other noteworthy Entitites
|
||||||
Entities:
|
Entities:
|
||||||
|
# The destiantions or positional targets to reach
|
||||||
Destinations:
|
Destinations:
|
||||||
|
# Let them spawn on closed doors and agent positions
|
||||||
ignore_blocking: true
|
ignore_blocking: true
|
||||||
|
# We need a special spawn rule...
|
||||||
spawnrule:
|
spawnrule:
|
||||||
|
# ...which assigns the destinations per agent
|
||||||
SpawnDestinationsPerAgent:
|
SpawnDestinationsPerAgent:
|
||||||
|
# we use this parameter
|
||||||
coords_or_quantity:
|
coords_or_quantity:
|
||||||
|
# to enable and assign special positions per agent
|
||||||
Wolfgang:
|
Wolfgang:
|
||||||
- (2, 1)
|
- (2, 1)
|
||||||
- (2, 5)
|
- (2, 5)
|
||||||
@@ -47,12 +72,18 @@ Entities:
|
|||||||
# GlobalPositions:
|
# GlobalPositions:
|
||||||
# normalized: false
|
# normalized: false
|
||||||
|
|
||||||
|
# Define the env. dynamics
|
||||||
Rules:
|
Rules:
|
||||||
# Utilities
|
# Utilities
|
||||||
|
# This rule Checks for Collision, also it assigns the (negative) reward
|
||||||
WatchCollisions:
|
WatchCollisions:
|
||||||
|
reward: -0.1
|
||||||
|
reward_at_done: -1
|
||||||
done_at_collisions: false
|
done_at_collisions: false
|
||||||
# Done Conditions
|
# Done Conditions
|
||||||
|
# Load any of the rules, to check for done conditions.
|
||||||
# DoneAtDestinationReachAny:
|
# DoneAtDestinationReachAny:
|
||||||
DoneAtDestinationReachAll:
|
DoneAtDestinationReachAll:
|
||||||
|
# reward_at_done: 1
|
||||||
DoneAtMaxStepsReached:
|
DoneAtMaxStepsReached:
|
||||||
max_steps: 500
|
max_steps: 500
|
||||||
|
|||||||
@@ -2,3 +2,4 @@ MOVEMENTS_VALID: float = -0.001
|
|||||||
MOVEMENTS_FAIL: float = -0.05
|
MOVEMENTS_FAIL: float = -0.05
|
||||||
NOOP: float = -0.01
|
NOOP: float = -0.01
|
||||||
COLLISION: float = -0.5
|
COLLISION: float = -0.5
|
||||||
|
COLLISION_DONE: float = -1
|
||||||
|
|||||||
@@ -126,8 +126,10 @@ class AssignGlobalPositions(Rule):
|
|||||||
|
|
||||||
class WatchCollisions(Rule):
|
class WatchCollisions(Rule):
|
||||||
|
|
||||||
def __init__(self, done_at_collisions: bool = False):
|
def __init__(self, reward=r.COLLISION, done_at_collisions: bool = False, reward_at_done=r.COLLISION_DONE):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.reward_at_done = reward_at_done
|
||||||
|
self.reward = reward
|
||||||
self.done_at_collisions = done_at_collisions
|
self.done_at_collisions = done_at_collisions
|
||||||
self.curr_done = False
|
self.curr_done = False
|
||||||
|
|
||||||
@@ -140,12 +142,12 @@ class WatchCollisions(Rule):
|
|||||||
if len(guests) >= 2:
|
if len(guests) >= 2:
|
||||||
for i, guest in enumerate(guests):
|
for i, guest in enumerate(guests):
|
||||||
try:
|
try:
|
||||||
guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
|
guest.set_state(TickResult(identifier=c.COLLISION, reward=self.reward,
|
||||||
validity=c.NOT_VALID, entity=self))
|
validity=c.NOT_VALID, entity=self))
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
results.append(TickResult(entity=guest, identifier=c.COLLISION,
|
results.append(TickResult(entity=guest, identifier=c.COLLISION,
|
||||||
reward=r.COLLISION, validity=c.VALID))
|
reward=self.reward, validity=c.VALID))
|
||||||
self.curr_done = True if self.done_at_collisions else False
|
self.curr_done = True if self.done_at_collisions else False
|
||||||
return results
|
return results
|
||||||
|
|
||||||
@@ -154,5 +156,5 @@ class WatchCollisions(Rule):
|
|||||||
inter_entity_collision_detected = self.curr_done
|
inter_entity_collision_detected = self.curr_done
|
||||||
move_failed = any(h.is_move(x.state.identifier) and not x.state.validity for x in state[c.AGENT])
|
move_failed = any(h.is_move(x.state.identifier) and not x.state.validity for x in state[c.AGENT])
|
||||||
if inter_entity_collision_detected or move_failed:
|
if inter_entity_collision_detected or move_failed:
|
||||||
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
|
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=self.reward_at_done)]
|
||||||
return [DoneResult(validity=c.NOT_VALID, identifier=self.name)]
|
return []
|
||||||
|
|||||||
Reference in New Issue
Block a user