mirror of
https://github.com/illiumst/marl-factory-grid.git
synced 2025-12-14 03:00:37 +01:00
Update utility plotting + Added alternative methods for TSP emergence prevention
This commit is contained in:
@@ -219,48 +219,59 @@ def plot_reward_development(reward_development, cfg, results_path):
|
|||||||
def plot_collected_coins_per_step():
|
def plot_collected_coins_per_step():
|
||||||
# Observed behaviour for multi-agent setting consisting of run0 and run0
|
# Observed behaviour for multi-agent setting consisting of run0 and run0
|
||||||
cleaned_dirt_per_step_emergent = [0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5]
|
cleaned_dirt_per_step_emergent = [0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5]
|
||||||
cleaned_dirt_per_step = [0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 5]
|
cleaned_dirt_per_step = [0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 5] # RL and TSP
|
||||||
|
|
||||||
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, color='green', linewidth=3, label='Prevented')
|
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, color='green', linewidth=3, label='Prevented (RL)')
|
||||||
plt.step(range(1, len(cleaned_dirt_per_step_emergent) + 1), cleaned_dirt_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
|
plt.step(range(1, len(cleaned_dirt_per_step_emergent) + 1), cleaned_dirt_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
|
||||||
|
plt.step(range(1, len(cleaned_dirt_per_step) + 1), cleaned_dirt_per_step, linestyle='dotted', color='darkorange', linewidth=3, label='Prevented (TSP)')
|
||||||
plt.xlabel("Environment step", fontsize=20)
|
plt.xlabel("Environment step", fontsize=20)
|
||||||
plt.ylabel("Collected Coins", fontsize=20)
|
plt.ylabel("Collected Coins", fontsize=20)
|
||||||
yint = range(min(cleaned_dirt_per_step), max(cleaned_dirt_per_step) + 1)
|
yint = range(min(cleaned_dirt_per_step), max(cleaned_dirt_per_step) + 1)
|
||||||
plt.yticks(yint, fontsize=17)
|
plt.yticks(yint, fontsize=17)
|
||||||
plt.xticks(range(1, len(cleaned_dirt_per_step_emergent) + 1), fontsize=17)
|
plt.xticks(range(1, len(cleaned_dirt_per_step_emergent) + 1), fontsize=17)
|
||||||
frame1 = plt.gca()
|
frame1 = plt.gca()
|
||||||
|
# Only display every 5th tick label
|
||||||
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
|
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
|
||||||
if (idx + 1) % 5 != 0:
|
if (idx + 1) % 5 != 0:
|
||||||
xlabel_i.set_visible(False)
|
xlabel_i.set_visible(False)
|
||||||
xlabel_i.set_fontsize(0.0)
|
xlabel_i.set_fontsize(0.0)
|
||||||
plt.legend(prop={'size': 20})
|
# Change order of labels in legend
|
||||||
|
handles, labels = frame1.get_legend_handles_labels()
|
||||||
|
order = [0, 2, 1]
|
||||||
|
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], prop={'size': 20})
|
||||||
fig = plt.gcf()
|
fig = plt.gcf()
|
||||||
fig.set_size_inches(8, 7)
|
fig.set_size_inches(8, 7)
|
||||||
plt.savefig("../study_out/number_of_collected_coins.png")
|
plt.savefig("../study_out/number_of_collected_coins.pdf")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
def plot_reached_flags_per_step():
|
def plot_reached_flags_per_step():
|
||||||
# Observed behaviour for multi-agent setting consisting of runs 1 + 2
|
# Observed behaviour for multi-agent setting consisting of runs 1 + 2
|
||||||
reached_flags_per_step_emergent = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
reached_flags_per_step_emergent = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
||||||
reached_flags_per_step = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
|
reached_flags_per_step_RL = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
|
||||||
|
reached_flags_per_step_TSP = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]
|
||||||
|
|
||||||
plt.step(range(1, len(reached_flags_per_step) + 1), reached_flags_per_step, color='green', linewidth=3, label='Prevented')
|
plt.step(range(1, len(reached_flags_per_step_RL) + 1), reached_flags_per_step_RL, color='green', linewidth=3, label='Prevented (RL)')
|
||||||
plt.step(range(1, len(reached_flags_per_step_emergent) + 1), reached_flags_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
|
plt.step(range(1, len(reached_flags_per_step_emergent) + 1), reached_flags_per_step_emergent, linestyle='--', color='darkred', linewidth=3, label='Emergent')
|
||||||
|
plt.step(range(1, len(reached_flags_per_step_TSP) + 1), reached_flags_per_step_TSP, linestyle='dotted', color='darkorange', linewidth=3, label='Prevented (TSP)')
|
||||||
plt.xlabel("Environment step", fontsize=20)
|
plt.xlabel("Environment step", fontsize=20)
|
||||||
plt.ylabel("Reached Flags", fontsize=20)
|
plt.ylabel("Reached Flags", fontsize=20)
|
||||||
yint = range(min(reached_flags_per_step), max(reached_flags_per_step) + 1)
|
yint = range(min(reached_flags_per_step_RL), max(reached_flags_per_step_RL) + 1)
|
||||||
plt.yticks(yint, fontsize=17)
|
plt.yticks(yint, fontsize=17)
|
||||||
plt.xticks(range(1, len(reached_flags_per_step_emergent) + 1), fontsize=17)
|
plt.xticks(range(1, len(reached_flags_per_step_emergent) + 1), fontsize=17)
|
||||||
frame1 = plt.gca()
|
frame1 = plt.gca()
|
||||||
|
# Only display every 5th tick label
|
||||||
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
|
for idx, xlabel_i in enumerate(frame1.axes.get_xticklabels()):
|
||||||
if (idx + 1) % 5 != 0:
|
if (idx + 1) % 5 != 0:
|
||||||
xlabel_i.set_visible(False)
|
xlabel_i.set_visible(False)
|
||||||
xlabel_i.set_fontsize(0.0)
|
xlabel_i.set_fontsize(0.0)
|
||||||
plt.legend(prop={'size': 20})
|
# Change order of labels in legend
|
||||||
|
handles, labels = frame1.get_legend_handles_labels()
|
||||||
|
order = [0, 2, 1]
|
||||||
|
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order], prop={'size': 20})
|
||||||
fig = plt.gcf()
|
fig = plt.gcf()
|
||||||
fig.set_size_inches(8, 7)
|
fig.set_size_inches(8, 7)
|
||||||
plt.savefig("../study_out/number_of_reached_flags.png")
|
plt.savefig("../study_out/number_of_reached_flags.pdf")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from tqdm import trange
|
from tqdm import trange
|
||||||
|
|
||||||
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
|
from marl_factory_grid.algorithms.tsp.TSP_dirt_agent import TSPDirtAgent
|
||||||
@@ -24,7 +24,7 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
|||||||
for i in range(1, 9):
|
for i in range(1, 9):
|
||||||
for j in range(1, 10):
|
for j in range(1, 10):
|
||||||
# Add costs for both traversal directions
|
# Add costs for both traversal directions
|
||||||
edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i - 1) * 0.05
|
edge_costs[f"{(i, j)}-{i + 1, j}"] = 0.55 + (i) * 0.05
|
||||||
edge_costs[f"{i + 1, j}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
|
edge_costs[f"{i + 1, j}-{(i, j)}"] = 0.55 + (i - 1) * 0.05
|
||||||
|
|
||||||
|
|
||||||
@@ -39,9 +39,25 @@ def get_dirt_quadrant_tsp_agents(emergent_phenomenon, factory):
|
|||||||
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
|
def get_two_rooms_tsp_agents(emergent_phenomenon, factory):
|
||||||
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
agents = [TSPTargetAgent(factory, 0), TSPTargetAgent(factory, 1)]
|
||||||
if not emergent_phenomenon:
|
if not emergent_phenomenon:
|
||||||
print(emergent_phenomenon)
|
edge_costs = {}
|
||||||
|
# Add costs for horizontal edges
|
||||||
|
for i in range(1, 6):
|
||||||
|
for j in range(1, 13):
|
||||||
|
# Add costs for both traversal directions
|
||||||
|
edge_costs[f"{(i, j)}-{i, j + 1}"] = np.abs(5/i*np.cbrt(((j+1)/4 - 1)) - 1)
|
||||||
|
edge_costs[f"{i, j + 1}-{(i, j)}"] = np.abs(5/i*np.cbrt((j/4 - 1)) - 1)
|
||||||
|
|
||||||
|
# Add costs for vertical edges
|
||||||
|
for i in range(1, 5):
|
||||||
|
for j in range(1, 14):
|
||||||
|
# Add costs for both traversal directions
|
||||||
|
edge_costs[f"{(i, j)}-{i + 1, j}"] = np.abs(5/(i+1)*np.cbrt((j/4 - 1)) - 1)
|
||||||
|
edge_costs[f"{i + 1, j}-{(i, j)}"] = np.abs(5/i*np.cbrt((j/4 - 1)) - 1)
|
||||||
|
|
||||||
|
|
||||||
for agent in agents:
|
for agent in agents:
|
||||||
agent._position_graph[(3, 1)][(3, 2)]['weight'] = 4
|
for u, v, weight in agent._position_graph.edges(data='weight'):
|
||||||
|
agent._position_graph[u][v]['weight'] = edge_costs[f"{u}-{v}"]
|
||||||
return agents
|
return agents
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user