diff --git a/_bibliography.bib b/_bibliography.bib index e1745540..5ed12abe 100644 --- a/_bibliography.bib +++ b/_bibliography.bib @@ -212,4 +212,20 @@ organization={Conference on Artificial Life - Alife 2023}, author={Kölle, Michael and Illium, Steffen and Hahn, Carsten and Schauer, Lorenz and Hutter, Johannes and Linnhoff-Popien, Claudia}, journal={arXiv preprint arXiv:2301.07420}, year={2023} +} + +@inproceedings{altmann2024emergence, + title={Emergence in Multi-agent Systems: A Safety Perspective}, + author={Altmann, Philipp and Schönberger, Julian and Illium, Steffen and Zorn, Maximilian and Ritz, Fabian and Haider, Tom and Burton, Simon and Gabor, Thomas}, + booktitle={International Symposium on Leveraging Applications of Formal Methods}, + pages={104--120}, + year={2024}, + organization={Springer Nature Switzerland Cham} +} + +@article{kolle2024aquarium, + title={Aquarium: A Comprehensive Framework for Exploring Predator-Prey Dynamics through Multi-Agent Reinforcement Learning Algorithms}, + author={Kölle, Michael and Erpelding, Yannick and Ritz, Fabian and Phan, Thomy and Illium, Steffen and Linnhoff-Popien, Claudia}, + journal={arXiv preprint arXiv:2401.07056}, + year={2024} } \ No newline at end of file diff --git a/_config.yml b/_config.yml index 49b2b4d7..acfbf703 100644 --- a/_config.yml +++ b/_config.yml @@ -21,7 +21,7 @@ locale : "en-US" title : "Steffen Illium" title_separator : "---" subtitle : " " # site tagline that appears below site title in masthead -name : Steffen Illium +name : Dr. Steffen Illium description : "Personal Website" url : "https://steffenillium.de" # the base hostname & protocol for your site e.g. "https://mmistakes.github.io" baseurl : "" # the subpath of your site, e.g. "/blog" @@ -57,9 +57,9 @@ social: # Site Author author: - name : "Steffen Illium" + name : "Dr. Steffen Illium" avatar : "/assets/images/newshot_2.jpg" # path of avatar image, e.g. "/assets/images/bio-photo.jpg" - bio : "[AI Researcher](/research/) and [Lecturer](/teaching/), [PHD Student](https://www.mobile.ifi.lmu.de/team/steffen-illium/) @ [LMU Munich](https://www.lmu.de/en/index.html)" + bio : "Senior AI Consultant @[XITASO](https://xitaso.com), [AI Researcher](/research/) [PhD](https://www.mobile.ifi.lmu.de/team/steffen-illium/) by [LMU Munich](https://www.lmu.de/en/index.html)" location : "Augsburg" links: - label: "LMU-Munich" diff --git a/_posts/research/2024-01-13-aquarium.md b/_posts/research/2024-01-13-aquarium.md new file mode 100644 index 00000000..4ba7f305 --- /dev/null +++ b/_posts/research/2024-01-13-aquarium.md @@ -0,0 +1,18 @@ +--- +layout: single +title: "Aquarium" +categories: research MARL reinforcement-learning multi-agent +excerpt: "Exploring Predator-Prey Dynamics" +header: + teaser: assets/figures/20_aquarium.png +--- + +![Multi-Agent Reinforcement Learning Cycle](\assets\figures\20_aquarium.png){:style="display:block; width:40%" .align-right} +Recent advances in multi-agent reinforcement learning have enabled the modeling of complex interactions between agents in simulated environments. In particular, predator-prey dynamics have garnered significant interest, and various simulations have been adapted to meet unique requirements. To avoid further time-intensive development efforts, we introduce *Aquarium*, a versatile multi-agent reinforcement learning environment designed for studying predator-prey interactions and emergent behavior. *Aquarium* is open-source and seamlessly integrates with the PettingZoo framework, allowing for a quick start using established algorithm implementations. It features physics-based agent movement on a two-dimensional, edge-wrapping plane. Both the agent-environment interactions (observations, actions, rewards) and environmental parameters (agent speed, prey reproduction, predator starvation, and more) are fully customizable. In addition to providing a resource-efficient visualization, *Aquarium* supports video recording, facilitating a visual understanding of agent behavior. + +To showcase the environment's capabilities, we conducted preliminary studies using proximal policy optimization (PPO) to train multiple prey agents to evade a predator. Consistent with existing literature, we found that individual learning leads to worse performance, while parameter sharing significantly improves coordination and sample efficiency. +{% cite kolle2024aquarium %} + +![Construction of the Observation Vector](\assets\figures\20_capture_statistics.png){:style="display:block; width:70%" .align-center} + +![Average captures and rewards per prey agent](\assets\figures\20_observation_vector.png){:style="display:block; width:70%" .align-center} diff --git a/_posts/research/2024-10-27-emergence-mas.md b/_posts/research/2024-10-27-emergence-mas.md new file mode 100644 index 00000000..3030899f --- /dev/null +++ b/_posts/research/2024-10-27-emergence-mas.md @@ -0,0 +1,18 @@ +--- +layout: single +title: "MAS Emergence" +categories: research multi-agent reinforcement-learning safety emergence +excerpt: "A Safety Perspective" +header: + teaser: assets/figures/21_coins_teaser.png +--- + +![Evaluation Environments](\assets\figures\21_envs.png){:style="display:block; width:40%" .align-right} +Emergent effects can occur in multi-agent systems (MAS), where decision-making is decentralized and based on local information. These effects may range from minor deviations in behavior to catastrophic system failures. To formally define these phenomena, we identify misalignments between the global inherent specification (the true specification) and its local approximation (e.g., the configuration of distinct reward components or observations). Leveraging established safety concepts, we develop a framework for understanding these emergent effects. To demonstrate the resulting implications, we examine two highly configurable gridworld scenarios, where inadequate specifications lead to unintended behavior deviations when derived independently. Acknowledging that a global solution may not always be practical, we propose adjusting the underlying parameterizations to mitigate these issues, thereby improving system alignment and reducing the risk of emergent failures. +{% cite altmann2024emergence %} + +![Instances of emergent behavior](\assets\figures\21_coins.png){:style="display:block; width:70%" .align-center} + +![Blocking behavior](\assets\figures\21_blocking.png){:style="display:block; width:70%" .align-center} + + diff --git a/assets/figures/20_aquarium.png b/assets/figures/20_aquarium.png new file mode 100644 index 00000000..f822ebca Binary files /dev/null and b/assets/figures/20_aquarium.png differ diff --git a/assets/figures/20_capture_statistics.png b/assets/figures/20_capture_statistics.png new file mode 100644 index 00000000..5def6d93 Binary files /dev/null and b/assets/figures/20_capture_statistics.png differ diff --git a/assets/figures/20_observation_vector.png b/assets/figures/20_observation_vector.png new file mode 100644 index 00000000..d78eca14 Binary files /dev/null and b/assets/figures/20_observation_vector.png differ diff --git a/assets/figures/21_blocking.png b/assets/figures/21_blocking.png new file mode 100644 index 00000000..798de9c4 Binary files /dev/null and b/assets/figures/21_blocking.png differ diff --git a/assets/figures/21_coins.png b/assets/figures/21_coins.png new file mode 100644 index 00000000..d67c0a2a Binary files /dev/null and b/assets/figures/21_coins.png differ diff --git a/assets/figures/21_coins_teaser.png b/assets/figures/21_coins_teaser.png new file mode 100644 index 00000000..059a1fb6 Binary files /dev/null and b/assets/figures/21_coins_teaser.png differ diff --git a/assets/figures/21_envs.png b/assets/figures/21_envs.png new file mode 100644 index 00000000..aee156f9 Binary files /dev/null and b/assets/figures/21_envs.png differ diff --git a/assets/publications/altmann2024emergence.pdf b/assets/publications/altmann2024emergence.pdf new file mode 100644 index 00000000..099e32f3 Binary files /dev/null and b/assets/publications/altmann2024emergence.pdf differ diff --git a/assets/publications/kolle2024aquarium.pdf b/assets/publications/kolle2024aquarium.pdf new file mode 100644 index 00000000..0dcb0f75 Binary files /dev/null and b/assets/publications/kolle2024aquarium.pdf differ diff --git a/nginx_default.conf b/nginx_default.conf index d11500dc..3311e019 100644 --- a/nginx_default.conf +++ b/nginx_default.conf @@ -1,6 +1,6 @@ map $http_accept $webp_suffix { - default ""; - "~*webp" ".webp"; + default ""; + "~*webp" ".webp"; } server { @@ -8,9 +8,16 @@ server { listen [::]:80; server_name localhost; - location ~* \.(?:jpg|jpeg|png|webp)$ { - root /usr/share/nginx/html; - try_files $uri$webp_suffix $uri = 404; + location ~* ^/.+\.(jpg|jpeg|png|gif)$ { + root /usr/share/nginx/html; + # BEGIN Browser Caching of WebP + expires 180d; + add_header Pragma "public"; + add_header Cache-Control "public"; + # END Browser Caching of WebP + + add_header Vary Accept; + try_files $uri$webp_suffix $uri =404; } location / { @@ -22,13 +29,12 @@ server { location = /404.html { root /usr/share/nginx/html; } - - # Caching headers - # location ~* \.(?:ico|css|js|gif|jpe?g|png)$ { - # expires 30d; - # add_header Pragma public; - # add_header Cache-Control "public"; - # } + # Browser Caching + location ~* \.(css|js|ico|gif|jpeg|jpg|webp|png|svg|eot|otf|woff|woff2|ttf|ogg)$ { + expires 180d; + add_header Pragma "public"; + add_header Cache-Control "public"; + } gzip on; gzip_comp_level 4;