Added explanation for narrow_corridor.yaml

This commit is contained in:
Steffen Illium 2023-11-10 06:54:38 +01:00
parent 06a5130b25
commit a9462a8b6f
3 changed files with 40 additions and 6 deletions

View File

@ -1,23 +1,41 @@
General:
eneral:
# Your Seed
env_seed: 69
# Individual or global rewards?
individual_rewards: true
# The level.txt file to load
level_name: narrow_corridor
# View Radius; 0 = full observatbility
pomdp_r: 0
# print all messages and events
verbose: true
Agents:
# Agents are identified by their name
Wolfgang:
# The available actions for this particular agent
Actions:
# Able to do nothing
- Noop
# Able to move in all 8 directions
- Move8
# Stuff the agent can observe (per 2d slice)
# use "Combined" if you want to merge multiple slices into one
Observations:
# He sees walls
- Walls
# he sees other agent, "karl-Heinz" in this setting would be fine, too
- Other
# He can see Destinations, that are assigned to him (hence the singular)
- Destination
# Avaiable Spawn Positions as list
Positions:
- (2, 1)
- (2, 5)
# It is okay to collide with other agents, so that
# they end up on the same position
is_blocking_pos: true
# See Above....
Karl-Heinz:
Actions:
- Noop
@ -31,12 +49,19 @@ Agents:
- (2, 5)
is_blocking_pos: true
# Other noteworthy Entitites
Entities:
# The destiantions or positional targets to reach
Destinations:
# Let them spawn on closed doors and agent positions
ignore_blocking: true
# We need a special spawn rule...
spawnrule:
# ...which assigns the destinations per agent
SpawnDestinationsPerAgent:
# we use this parameter
coords_or_quantity:
# to enable and assign special positions per agent
Wolfgang:
- (2, 1)
- (2, 5)
@ -47,12 +72,18 @@ Entities:
# GlobalPositions:
# normalized: false
# Define the env. dynamics
Rules:
# Utilities
# This rule Checks for Collision, also it assigns the (negative) reward
WatchCollisions:
reward: -0.1
reward_at_done: -1
done_at_collisions: false
# Done Conditions
# Load any of the rules, to check for done conditions.
# DoneAtDestinationReachAny:
DoneAtDestinationReachAll:
# reward_at_done: 1
DoneAtMaxStepsReached:
max_steps: 500

View File

@ -2,3 +2,4 @@ MOVEMENTS_VALID: float = -0.001
MOVEMENTS_FAIL: float = -0.05
NOOP: float = -0.01
COLLISION: float = -0.5
COLLISION_DONE: float = -1

View File

@ -126,8 +126,10 @@ class AssignGlobalPositions(Rule):
class WatchCollisions(Rule):
def __init__(self, done_at_collisions: bool = False):
def __init__(self, reward=r.COLLISION, done_at_collisions: bool = False, reward_at_done=r.COLLISION_DONE):
super().__init__()
self.reward_at_done = reward_at_done
self.reward = reward
self.done_at_collisions = done_at_collisions
self.curr_done = False
@ -140,12 +142,12 @@ class WatchCollisions(Rule):
if len(guests) >= 2:
for i, guest in enumerate(guests):
try:
guest.set_state(TickResult(identifier=c.COLLISION, reward=r.COLLISION,
guest.set_state(TickResult(identifier=c.COLLISION, reward=self.reward,
validity=c.NOT_VALID, entity=self))
except AttributeError:
pass
results.append(TickResult(entity=guest, identifier=c.COLLISION,
reward=r.COLLISION, validity=c.VALID))
reward=self.reward, validity=c.VALID))
self.curr_done = True if self.done_at_collisions else False
return results
@ -154,5 +156,5 @@ class WatchCollisions(Rule):
inter_entity_collision_detected = self.curr_done
move_failed = any(h.is_move(x.state.identifier) and not x.state.validity for x in state[c.AGENT])
if inter_entity_collision_detected or move_failed:
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=r.COLLISION)]
return [DoneResult(validity=c.NOT_VALID, identifier=self.name)]
return [DoneResult(validity=c.VALID, identifier=c.COLLISION, reward=self.reward_at_done)]
return []