Update utility plotting + Added alternative methods for TSP emergence prevention

This commit is contained in:
Julian Schönberger
2024-05-27 13:25:52 +02:00
parent a78e0dd8a3
commit 41a1ec0a5b
2 changed files with 40 additions and 13 deletions

View File

@ -219,48 +219,59 @@ def plot_reward_development(reward_development, cfg, results_path):
def plot_collected_coins_per_step():
# Observed behaviour for multi-agent setting consisting of run0 and run0
cleaned_dirt_per_step_emergent = [0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5]
cleaned_dirt_per_step = [0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 5]
cleaned_dirt_per_step = [0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 5] # RL and TSP
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, color='green', linewidth=3, label='Prevented')
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, color='green', linewidth=3, label='Prevented (RL)')
plt.step(range(1, len(cleaned_dirt_per_step_emergent) + 1), cleaned_dirt_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, linestyle='dotted', color='darkorange', linewidth=3, label='Prevented (TSP)')
plt.xlabel("Environment step", fontsize=20)
plt.ylabel("Collected Coins", fontsize=20)
yint = range(min(cleaned_dirt_per_step), max(cleaned_dirt_per_step) + 1)
plt.yticks(yint, fontsize=17)
plt.xticks(range(1, len(cleaned_dirt_per_step_emergent) + 1), fontsize=17)
frame1 = plt.gca()
# Only display every 5th tick label
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
if (idx + 1) % 5 != 0:
xlabel_i.set_visible(False)
xlabel_i.set_fontsize(0.0)
plt.legend(prop={'size': 20})
# Change order of labels in legend
handles, labels = frame1.get_legend_handles_labels()
order = [0, 2, 1]
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], prop={'size': 20})
fig = plt.gcf()
fig.set_size_inches(8, 7)
plt.savefig("../study_out/number_of_collected_coins.png")
plt.savefig("../study_out/number_of_collected_coins.pdf")
plt.show()
def plot_reached_flags_per_step():
# Observed behaviour for multi-agent setting consisting of runs 1 + 2
reached_flags_per_step_emergent = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
reached_flags_per_step = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
reached_flags_per_step_RL = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
reached_flags_per_step_TSP = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
plt.step(range(1, len(reached_flags_per_step) + 1), reached_flags_per_step, color='green', linewidth=3, label='Prevented')
plt.step(range(1, len(reached_flags_per_step_RL) + 1), reached_flags_per_step_RL, color='green', linewidth=3, label='Prevented (RL)')
plt.step(range(1, len(reached_flags_per_step_emergent) + 1), reached_flags_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
plt.step(range(1, len(reached_flags_per_step_TSP) + 1), reached_flags_per_step_TSP, linestyle='dotted', color='darkorange', linewidth=3, label='Prevented (TSP)')
plt.xlabel("Environment step", fontsize=20)
plt.ylabel("Reached Flags", fontsize=20)
yint = range(min(reached_flags_per_step), max(reached_flags_per_step) + 1)
yint = range(min(reached_flags_per_step_RL), max(reached_flags_per_step_RL) + 1)
plt.yticks(yint, fontsize=17)
plt.xticks(range(1, len(reached_flags_per_step_emergent) + 1), fontsize=17)
frame1 = plt.gca()
# Only display every 5th tick label
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
if (idx + 1) % 5 != 0:
xlabel_i.set_visible(False)
xlabel_i.set_fontsize(0.0)
plt.legend(prop={'size': 20})
# Change order of labels in legend
handles, labels = frame1.get_legend_handles_labels()
order = [0, 2, 1]
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], prop={'size': 20})
fig = plt.gcf()
fig.set_size_inches(8, 7)
plt.savefig("../study_out/number_of_reached_flags.png")
plt.savefig("../study_out/number_of_reached_flags.pdf")
plt.show()

View File

@ -1,7 +1,7 @@
import os
import time
from pathlib import Path
import numpy as np
from tqdm import trange
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
@ -24,7 +24,7 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
for i in range(1, 9):
for j in range(1, 10):
# Add costs for both traversal directions
edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i - 1) * 0.05
edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i) * 0.05
edge_costs[f"{i + 1, j}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
@ -39,9 +39,25 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
if not emergent_phenomenon:
print(emergent_phenomenon)
edge_costs = {}
# Add costs for horizontal edges
for i in range(1, 6):
for j in range(1, 13):
# Add costs for both traversal directions
edge_costs[f"{(i, j)}-{i, j + 1}"] = np.abs(5/i*np.cbrt(((j+1)/4 - 1)) - 1)
edge_costs[f"{i, j + 1}-{(i, j)}"] = np.abs(5/i*np.cbrt((j/4 - 1)) - 1)
# Add costs for vertical edges
for i in range(1, 5):
for j in range(1, 14):
# Add costs for both traversal directions
edge_costs[f"{(i, j)}-{i + 1, j}"] = np.abs(5/(i+1)*np.cbrt((j/4 - 1)) - 1)
edge_costs[f"{i + 1, j}-{(i, j)}"] = np.abs(5/i*np.cbrt((j/4 - 1)) - 1)
for agent in agents:
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
for u, v, weight in agent._position_graph.edges(data='weight'):
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
return agents