From 38a3ef76875e35655a9a11efee1cb155b5a78450 Mon Sep 17 00:00:00 2001
From: romue <robert.mueller@campus.lmu.de>
Date: Wed, 2 Jun 2021 09:38:15 +0200
Subject: [PATCH] added domain shift envs and adjusted reload_agent.py

---
 environments/domain_adaption/__init__.py      |   0
 .../car_racing_variants/__init__.py           |  64 ++
 .../car_racing_variants/car_dynamics.py       | 289 +++++++++
 .../car_racing_variants/car_racing.py         | 608 ++++++++++++++++++
 .../car_racing_variants/version.py            |   1 +
 .../natural_rl_environment/imgsource.py       | 120 ++++
 .../natural_rl_environment/matting.py         |  32 +
 .../natural_rl_environment/natural_env.py     |  99 +++
 environments/domain_adaption/test.py          |  24 +
 reload_agent.py                               |  17 +-
 10 files changed, 1246 insertions(+), 8 deletions(-)
 create mode 100644 environments/domain_adaption/__init__.py
 create mode 100644 environments/domain_adaption/car_racing_variants/__init__.py
 create mode 100644 environments/domain_adaption/car_racing_variants/car_dynamics.py
 create mode 100644 environments/domain_adaption/car_racing_variants/car_racing.py
 create mode 100644 environments/domain_adaption/car_racing_variants/version.py
 create mode 100644 environments/domain_adaption/natural_rl_environment/imgsource.py
 create mode 100644 environments/domain_adaption/natural_rl_environment/matting.py
 create mode 100755 environments/domain_adaption/natural_rl_environment/natural_env.py
 create mode 100644 environments/domain_adaption/test.py

diff --git a/environments/domain_adaption/__init__.py b/environments/domain_adaption/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/environments/domain_adaption/car_racing_variants/__init__.py b/environments/domain_adaption/car_racing_variants/__init__.py
new file mode 100644
index 0000000..ae0a495
--- /dev/null
+++ b/environments/domain_adaption/car_racing_variants/__init__.py
@@ -0,0 +1,64 @@
+from gym.envs.registration import register
+
+# Env registration
+# ==========================
+
+register(
+    id='CarRacingColor-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'color',
+    },
+)
+
+register(
+    id='CarRacingColor3-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'color3',
+    },
+)
+
+register(
+    id='CarRacingBar-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'bar',
+    },
+)
+
+register(
+    id='CarRacingBlob-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'blob',
+    },
+)
+
+register(
+    id='CarRacingNoise-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'noise',
+    },
+)
+
+register(
+    id='CarRacingVideo-v0',
+    entry_point='car_racing_variants.car_racing:CarRacing',
+    max_episode_steps=1000,
+    reward_threshold=900.0,
+    kwargs={
+        'modification': 'video',
+    },
+)
diff --git a/environments/domain_adaption/car_racing_variants/car_dynamics.py b/environments/domain_adaption/car_racing_variants/car_dynamics.py
new file mode 100644
index 0000000..e8d96fa
--- /dev/null
+++ b/environments/domain_adaption/car_racing_variants/car_dynamics.py
@@ -0,0 +1,289 @@
+"""This file is based on the car_dynamics.py in OpenAI's gym.
+
+Description
+    We provide some modified CarRacing environments here:
+    * CarRacingColor-v0
+        Both the lane and the grass colors are perturbed at the env.reset() with scalar noises.
+    * CarRacingColor3-v0
+        Both the lane and the grass colors are perturbed at the env.reset() with 3d noises.
+    * CarRacingBar-v0
+        We add vertical bars on the left and right side of the screen.
+    * CarRacingBlob-v0
+        A red blob that follows the car at a fixed position in the car's frame.
+    * CarRacingNoise-v0
+        We replace the green background with noise.
+    * CarRacingVideo-v0
+        We replace the green background with frames from a video, the user is
+        responsible for creating these frames and pass the directory.
+    These environments were first used in the paper "Neuroevolution of Self-Interpretable Agent"
+    https://attentionagent.github.io/
+
+Author
+    Yujin Tang (yujintang@google.com)
+"""
+
+import numpy as np
+import math
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape)
+
+# Top-down car dynamics simulation.
+#
+# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
+# This simulation is a bit more detailed, with wheels rotation.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+SIZE = 0.02
+ENGINE_POWER            = 100000000*SIZE*SIZE
+WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE
+FRICTION_LIMIT          = 1000000*SIZE*SIZE     # friction ~= mass ~= size^2 (calculated implicitly using density)
+WHEEL_R  = 27
+WHEEL_W  = 14
+WHEELPOS = [
+    (-55,+80), (+55,+80),
+    (-55,-82), (+55,-82)
+    ]
+HULL_POLY1 =[
+    (-60,+130), (+60,+130),
+    (+60,+110), (-60,+110)
+    ]
+HULL_POLY2 =[
+    (-15,+120), (+15,+120),
+    (+20, +20), (-20,  20)
+    ]
+HULL_POLY3 =[
+    (+25, +20),
+    (+50, -10),
+    (+50, -40),
+    (+20, -90),
+    (-20, -90),
+    (-50, -40),
+    (-50, -10),
+    (-25, +20)
+    ]
+HULL_POLY4 =[
+    (-50,-120), (+50,-120),
+    (+50,-90),  (-50,-90)
+    ]
+
+WHEEL_COLOR = (0.0,0.0,0.0)
+WHEEL_WHITE = (0.3,0.3,0.3)
+MUD_COLOR   = (0.4,0.4,0.0)
+
+class Car:
+    def __init__(self, world, init_angle, init_x, init_y, add_blob=False):
+        self.world = world
+
+        self.add_blob = add_blob
+
+        fixtures = [
+            fixtureDef(shape=polygonShape(
+                vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0),
+            fixtureDef(shape=polygonShape(
+                vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0),
+            fixtureDef(shape=polygonShape(
+                vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0),
+            fixtureDef(shape=polygonShape(
+                vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0),
+        ]
+        self.hull = self.world.CreateDynamicBody(
+            position = (init_x, init_y),
+            angle = init_angle,
+            fixtures = fixtures,
+        )
+        self.hull.color = (0.8,0.0,0.0)
+        self.wheels = []
+        self.fuel_spent = 0.0
+        WHEEL_POLY = [
+            (-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R),
+            (+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R)
+            ]
+        for wx,wy in WHEELPOS:
+            front_k = 1.0 if wy > 0 else 1.0
+            w = self.world.CreateDynamicBody(
+                position = (init_x+wx*SIZE, init_y+wy*SIZE),
+                angle = init_angle,
+                fixtures = fixtureDef(
+                    shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]),
+                    density=0.1,
+                    categoryBits=0x0020,
+                    maskBits=0x001,
+                    restitution=0.0)
+                    )
+            w.wheel_rad = front_k*WHEEL_R*SIZE
+            w.color = WHEEL_COLOR
+            w.gas   = 0.0
+            w.brake = 0.0
+            w.steer = 0.0
+            w.phase = 0.0  # wheel angle
+            w.omega = 0.0  # angular velocity
+            w.skid_start = None
+            w.skid_particle = None
+            rjd = revoluteJointDef(
+                bodyA=self.hull,
+                bodyB=w,
+                localAnchorA=(wx*SIZE,wy*SIZE),
+                localAnchorB=(0,0),
+                enableMotor=True,
+                enableLimit=True,
+                maxMotorTorque=180*900*SIZE*SIZE,
+                motorSpeed = 0,
+                lowerAngle = -0.4,
+                upperAngle = +0.4,
+                )
+            w.joint = self.world.CreateJoint(rjd)
+            w.tiles = set()
+            w.userData = w
+            self.wheels.append(w)
+        self.drawlist =  self.wheels + [self.hull]
+        self.particles = []
+
+    def gas(self, gas):
+        'control: rear wheel drive'
+        gas = np.clip(gas, 0, 1)
+        for w in self.wheels[2:4]:
+            diff = gas - w.gas
+            if diff > 0.1: diff = 0.1  # gradually increase, but stop immediately
+            w.gas += diff
+
+    def brake(self, b):
+        'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation'
+        for w in self.wheels:
+            w.brake = b
+
+    def steer(self, s):
+        'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position'
+        self.wheels[0].steer = s
+        self.wheels[1].steer = s
+
+    def step(self, dt):
+        for w in self.wheels:
+            # Steer each wheel
+            dir = np.sign(w.steer - w.joint.angle)
+            val = abs(w.steer - w.joint.angle)
+            w.joint.motorSpeed = dir*min(50.0*val, 3.0)
+
+            # Position => friction_limit
+            grass = True
+            friction_limit = FRICTION_LIMIT*0.6  # Grass friction if no tile
+            for tile in w.tiles:
+                friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction)
+                grass = False
+
+            # Force
+            forw = w.GetWorldVector( (0,1) )
+            side = w.GetWorldVector( (1,0) )
+            v = w.linearVelocity
+            vf = forw[0]*v[0] + forw[1]*v[1]  # forward speed
+            vs = side[0]*v[0] + side[1]*v[1]  # side speed
+
+            # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
+            # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
+            # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
+            w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0)  # small coef not to divide by zero
+            self.fuel_spent += dt*ENGINE_POWER*w.gas
+
+            if w.brake >= 0.9:
+                w.omega = 0
+            elif w.brake > 0:
+                BRAKE_FORCE = 15    # radians per second
+                dir = -np.sign(w.omega)
+                val = BRAKE_FORCE*w.brake
+                if abs(val) > abs(w.omega): val = abs(w.omega)  # low speed => same as = 0
+                w.omega += dir*val
+            w.phase += w.omega*dt
+
+            vr = w.omega*w.wheel_rad  # rotating wheel speed
+            f_force = -vf + vr        # force direction is direction of speed difference
+            p_force = -vs
+
+            # Physically correct is to always apply friction_limit until speed is equal.
+            # But dt is finite, that will lead to oscillations if difference is already near zero.
+            f_force *= 205000*SIZE*SIZE  # Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
+            p_force *= 205000*SIZE*SIZE
+            force = np.sqrt(np.square(f_force) + np.square(p_force))
+
+            # Skid trace
+            if abs(force) > 2.0*friction_limit:
+                if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30:
+                    w.skid_particle.poly.append( (w.position[0], w.position[1]) )
+                elif w.skid_start is None:
+                    w.skid_start = w.position
+                else:
+                    w.skid_particle = self._create_particle( w.skid_start, w.position, grass )
+                    w.skid_start = None
+            else:
+                w.skid_start = None
+                w.skid_particle = None
+
+            if abs(force) > friction_limit:
+                f_force /= force
+                p_force /= force
+                force = friction_limit  # Correct physics here
+                f_force *= force
+                p_force *= force
+
+            w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA
+
+            w.ApplyForceToCenter( (
+                p_force*side[0] + f_force*forw[0],
+                p_force*side[1] + f_force*forw[1]), True )
+
+    def draw(self, viewer, draw_particles=True):
+        if draw_particles:
+            for p in self.particles:
+                viewer.draw_polyline(p.poly, color=p.color, linewidth=5)
+        for obj in self.drawlist:
+            for i, f in enumerate(obj.fixtures):
+                trans = f.body.transform
+                path = [trans*v for v in f.shape.vertices]
+                if self.add_blob and i == 3:
+                    obj_color = (0.8, 0.0, 0.)
+                    offset_x = f.shape.vertices[0][0] + 20
+                    offset_y = f.shape.vertices[0][1] + 10
+                    width = height = 8
+                    blob =[
+                        trans * (+offset_x, +offset_y+height),
+                        trans * (+offset_x+width, +offset_y+height),
+                        trans * (+offset_x+width, +offset_y),
+                        trans * (+offset_x, +offset_y),
+                    ]
+                    viewer.draw_polygon(blob, color=obj_color)
+                viewer.draw_polygon(path, color=obj.color)
+                if "phase" not in obj.__dict__: continue
+                a1 = obj.phase
+                a2 = obj.phase + 1.2  # radians
+                s1 = math.sin(a1)
+                s2 = math.sin(a2)
+                c1 = math.cos(a1)
+                c2 = math.cos(a2)
+                if s1>0 and s2>0: continue
+                if s1>0: c1 = np.sign(c1)
+                if s2>0: c2 = np.sign(c2)
+                white_poly = [
+                    (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE),
+                    (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE)
+                    ]
+                viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE)
+
+    def _create_particle(self, point1, point2, grass):
+        class Particle:
+            pass
+        p = Particle()
+        p.color = WHEEL_COLOR if not grass else MUD_COLOR
+        p.ttl = 1
+        p.poly = [(point1[0],point1[1]), (point2[0],point2[1])]
+        p.grass = grass
+        self.particles.append(p)
+        while len(self.particles) > 30:
+            self.particles.pop(0)
+        return p
+
+    def destroy(self):
+        self.world.DestroyBody(self.hull)
+        self.hull = None
+        for w in self.wheels:
+            self.world.DestroyBody(w)
+        self.wheels = []
+
diff --git a/environments/domain_adaption/car_racing_variants/car_racing.py b/environments/domain_adaption/car_racing_variants/car_racing.py
new file mode 100644
index 0000000..de86a21
--- /dev/null
+++ b/environments/domain_adaption/car_racing_variants/car_racing.py
@@ -0,0 +1,608 @@
+"""This file is based on the car_racing.py in OpenAI's gym.
+
+Description
+    We provide some modified CarRacing environments here:
+    * CarRacingColor-v0
+        Both the lane and the grass colors are perturbed at the env.reset() with scalar noises.
+    * CarRacingColor3-v0
+        Both the lane and the grass colors are perturbed at the env.reset() with 3d noises.
+    * CarRacingBar-v0
+        We add vertical bars on the left and right side of the screen.
+    * CarRacingBlob-v0
+        A red blob that follows the car at a fixed position in the car's frame.
+    * CarRacingNoise-v0
+        We replace the green background with noise.
+    *. CarRacingVideo-v0
+        We replace the green background with frames from a video, the user is
+        responsible for creating these frames.
+    These environments were first used in the paper "Neuroevolution of Self-Interpretable Agent"
+    https://attentionagent.github.io/
+
+Author
+    Yujin Tang (yujintang@google.com)
+"""
+
+import cv2
+import glob
+import os
+import sys, math
+import numpy as np
+
+import Box2D
+from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
+
+import gym
+from gym import spaces
+from .car_dynamics import Car
+from gym.utils import colorize, seeding, EzPickle
+
+import pyglet
+from pyglet import gl
+
+# Easiest continuous control task to learn from pixels, a top-down racing environment.
+# Discrete control is reasonable in this environment as well, on/off discretization is
+# fine.
+#
+# State consists of STATE_W x STATE_H pixels.
+#
+# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is
+# the total number of tiles visited in the track. For example, if you have finished in 732 frames,
+# your reward is 1000 - 0.1*732 = 926.8 points.
+#
+# Game is solved when agent consistently gets 900+ points. Track generated is random every episode.
+#
+# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that
+# is far off the track, then it will get -100 and die.
+#
+# Some indicators shown at the bottom of the window and the state RGB buffer. From
+# left to right: true speed, four ABS sensors, steering wheel position and gyroscope.
+#
+# To play yourself (it's rather fast for humans), type:
+#
+# python gym/envs/box2d/car_racing.py
+#
+# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the
+# same time.
+#
+# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym.
+
+STATE_W = 96   # less than Atari 160x192
+STATE_H = 96
+VIDEO_W = 600
+VIDEO_H = 400
+WINDOW_W = 1000
+WINDOW_H = 800
+
+SCALE       = 6.0        # Track scale
+TRACK_RAD   = 900/SCALE  # Track is heavily morphed circle with this radius
+PLAYFIELD   = 2000/SCALE # Game over boundary
+FPS         = 50         # Frames per second
+ZOOM        = 2.7        # Camera zoom
+ZOOM_FOLLOW = True       # Set to False for fixed view (don't use zoom)
+
+
+TRACK_DETAIL_STEP = 21/SCALE
+TRACK_TURN_RATE = 0.31
+TRACK_WIDTH = 40/SCALE
+BORDER = 8/SCALE
+BORDER_MIN_COUNT = 4
+
+ROAD_COLOR = [0.4, 0.4, 0.4]
+
+class FrictionDetector(contactListener):
+    def __init__(self, env):
+        contactListener.__init__(self)
+        self.env = env
+    def BeginContact(self, contact):
+        self._contact(contact, True)
+    def EndContact(self, contact):
+        self._contact(contact, False)
+    def _contact(self, contact, begin):
+        tile = None
+        obj = None
+        u1 = contact.fixtureA.body.userData
+        u2 = contact.fixtureB.body.userData
+        if u1 and "road_friction" in u1.__dict__:
+            tile = u1
+            obj  = u2
+        if u2 and "road_friction" in u2.__dict__:
+            tile = u2
+            obj  = u1
+        if not tile:
+            return
+
+        tile.color[0] = self.env.road_color[0]
+        tile.color[1] = self.env.road_color[1]
+        tile.color[2] = self.env.road_color[2]
+        if not obj or "tiles" not in obj.__dict__:
+            return
+        if begin:
+            obj.tiles.add(tile)
+            # print tile.road_friction, "ADD", len(obj.tiles)
+            if not tile.road_visited:
+                tile.road_visited = True
+                self.env.reward += 1000.0/len(self.env.track)
+                self.env.tile_visited_count += 1
+        else:
+            obj.tiles.remove(tile)
+            # print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works)
+
+class CarRacing(gym.Env, EzPickle):
+    metadata = {
+        'render.modes': ['human', 'rgb_array', 'state_pixels'],
+        'video.frames_per_second' : FPS
+    }
+
+    def __init__(self, verbose=1, **kwargs):
+        EzPickle.__init__(self)
+        self.seed()
+
+        self.road_color = ROAD_COLOR[:]
+        self.grass_color = [0.4, 0.8, 0.4, 1]
+        if 'modification' in kwargs:
+          self._modification_type = kwargs['modification']
+        else:
+          self._modification_type = ''
+
+        self.contactListener_keepref = FrictionDetector(self)
+        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
+        self.viewer = None
+        self.invisible_state_window = None
+        self.invisible_video_window = None
+        self.road = None
+        self.car = None
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.verbose = verbose
+        self.fd_tile = fixtureDef(
+                shape = polygonShape(vertices=
+                    [(0, 0),(1, 0),(1, -1),(0, -1)]))
+
+        self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32)  # steer, gas, brake
+        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)
+
+        self.step_cnt = 0
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def _destroy(self):
+        if not self.road:
+            return
+        for t in self.road:
+            self.world.DestroyBody(t)
+        self.road = []
+        self.car.destroy()
+
+    def _create_track(self):
+        CHECKPOINTS = 12
+
+        # Create checkpoints
+        checkpoints = []
+        for c in range(CHECKPOINTS):
+            alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS)
+            rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD)
+            if c==0:
+                alpha = 0
+                rad = 1.5*TRACK_RAD
+            if c==CHECKPOINTS-1:
+                alpha = 2*math.pi*c/CHECKPOINTS
+                self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS
+                rad = 1.5*TRACK_RAD
+            checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) )
+
+        # print "\n".join(str(h) for h in checkpoints)
+        # self.road_poly = [ (    # uncomment this to see checkpoints
+        #    [ (tx,ty) for a,tx,ty in checkpoints ],
+        #    (0.7,0.7,0.9) ) ]
+        self.road = []
+
+        # Go from one checkpoint to another to create track
+        x, y, beta = 1.5*TRACK_RAD, 0, 0
+        dest_i = 0
+        laps = 0
+        track = []
+        no_freeze = 2500
+        visited_other_side = False
+        while True:
+            alpha = math.atan2(y, x)
+            if visited_other_side and alpha > 0:
+                laps += 1
+                visited_other_side = False
+            if alpha < 0:
+                visited_other_side = True
+                alpha += 2*math.pi
+            while True: # Find destination from checkpoints
+                failed = True
+                while True:
+                    dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
+                    if alpha <= dest_alpha:
+                        failed = False
+                        break
+                    dest_i += 1
+                    if dest_i % len(checkpoints) == 0:
+                        break
+                if not failed:
+                    break
+                alpha -= 2*math.pi
+                continue
+            r1x = math.cos(beta)
+            r1y = math.sin(beta)
+            p1x = -r1y
+            p1y = r1x
+            dest_dx = dest_x - x  # vector towards destination
+            dest_dy = dest_y - y
+            proj = r1x*dest_dx + r1y*dest_dy  # destination vector projected on rad
+            while beta - alpha >  1.5*math.pi:
+                 beta -= 2*math.pi
+            while beta - alpha < -1.5*math.pi:
+                 beta += 2*math.pi
+            prev_beta = beta
+            proj *= SCALE
+            if proj >  0.3:
+                 beta -= min(TRACK_TURN_RATE, abs(0.001*proj))
+            if proj < -0.3:
+                 beta += min(TRACK_TURN_RATE, abs(0.001*proj))
+            x += p1x*TRACK_DETAIL_STEP
+            y += p1y*TRACK_DETAIL_STEP
+            track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) )
+            if laps > 4:
+                 break
+            no_freeze -= 1
+            if no_freeze==0:
+                 break
+        # print "\n".join([str(t) for t in enumerate(track)])
+
+        # Find closed loop range i1..i2, first loop should be ignored, second is OK
+        i1, i2 = -1, -1
+        i = len(track)
+        while True:
+            i -= 1
+            if i==0:
+                return False  # Failed
+            pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha
+            if pass_through_start and i2==-1:
+                i2 = i
+            elif pass_through_start and i1==-1:
+                i1 = i
+                break
+        if self.verbose == 1:
+            print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1))
+        assert i1!=-1
+        assert i2!=-1
+
+        track = track[i1:i2-1]
+
+        first_beta = track[0][1]
+        first_perp_x = math.cos(first_beta)
+        first_perp_y = math.sin(first_beta)
+        # Length of perpendicular jump to put together head and tail
+        well_glued_together = np.sqrt(
+            np.square( first_perp_x*(track[0][2] - track[-1][2]) ) +
+            np.square( first_perp_y*(track[0][3] - track[-1][3]) ))
+        if well_glued_together > TRACK_DETAIL_STEP:
+            return False
+
+        # Red-white border on hard turns
+        border = [False]*len(track)
+        for i in range(len(track)):
+            good = True
+            oneside = 0
+            for neg in range(BORDER_MIN_COUNT):
+                beta1 = track[i-neg-0][1]
+                beta2 = track[i-neg-1][1]
+                good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2
+                oneside += np.sign(beta1 - beta2)
+            good &= abs(oneside) == BORDER_MIN_COUNT
+            border[i] = good
+        for i in range(len(track)):
+            for neg in range(BORDER_MIN_COUNT):
+                border[i-neg] |= border[i]
+
+        # Create tiles
+        for i in range(len(track)):
+            alpha1, beta1, x1, y1 = track[i]
+            alpha2, beta2, x2, y2 = track[i-1]
+            road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1))
+            road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1))
+            road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2))
+            road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2))
+            vertices = [road1_l, road1_r, road2_r, road2_l]
+            self.fd_tile.shape.vertices = vertices
+            t = self.world.CreateStaticBody(fixtures=self.fd_tile)
+            t.userData = t
+            c = 0.01*(i%3)
+            t.color = [self.road_color[0] + c,
+                       self.road_color[1] + c,
+                       self.road_color[2] + c]
+            t.road_visited = False
+            t.road_friction = 1.0
+            t.fixtures[0].sensor = True
+            self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color ))
+
+            self.road.append(t)
+            if border[i]:
+                side = np.sign(beta2 - beta1)
+                b1_l = (x1 + side* TRACK_WIDTH        *math.cos(beta1), y1 + side* TRACK_WIDTH        *math.sin(beta1))
+                b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1))
+                b2_l = (x2 + side* TRACK_WIDTH        *math.cos(beta2), y2 + side* TRACK_WIDTH        *math.sin(beta2))
+                b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2))
+                self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) ))
+        self.track = track
+        return True
+
+    def reset(self):
+        self._destroy()
+        self.reward = 0.0
+        self.prev_reward = 0.0
+        self.tile_visited_count = 0
+        self.t = 0.0
+        self.road_poly = []
+        self.froad_poly = []
+        self.step_cnt = 0
+
+        # Color modification.
+        self.road_color = np.array([0.4, 0.4, 0.4])  # Original road color.
+        self.grass_color = np.array([0.4, 0.8, 0.4, 1])  # Original grass color.
+        if self._modification_type == 'color':
+            noise1 = np.random.uniform(-0.2, 0.2)
+            noise2 = np.random.uniform(-0.2, 0.2)
+            print('noise1={}'.format(noise1))
+            print('noise2={}'.format(noise2))
+            self.road_color += noise1
+            self.grass_color[:3] += noise2
+        if self._modification_type == 'color3':
+            noise1 = np.random.uniform(-0.2, 0.2, 3)
+            noise2 = np.random.uniform(-0.2, 0.2, 3)
+            print('noise1={}'.format(noise1))
+            print('noise2={}'.format(noise2))
+            self.road_color += noise1
+            self.grass_color[:3] += noise2
+
+        while True:
+            success = self._create_track()
+            if success:
+                break
+            if self.verbose == 1:
+                print("retry to generate track (normal if there are not many of this messages)")
+        add_blob = self._modification_type == 'blob'
+        self.car = Car(self.world, *self.track[0][1:4], add_blob=add_blob)
+
+        return self.step(None)[0]
+
+    def step(self, action):
+        if action is not None:
+            self.car.steer(-action[0])
+            self.car.gas(action[1])
+            self.car.brake(action[2])
+
+        self.car.step(1.0/FPS)
+        self.world.Step(1.0/FPS, 6*30, 2*30)
+        self.t += 1.0/FPS
+
+        self.state = self.render("state_pixels")
+
+        step_reward = 0
+        done = False
+        if action is not None: # First step without action, called from reset()
+            self.reward -= 0.1
+            # We actually don't want to count fuel spent, we want car to be faster.
+            # self.reward -=  10 * self.car.fuel_spent / ENGINE_POWER
+            self.car.fuel_spent = 0.0
+            step_reward = self.reward - self.prev_reward
+            self.prev_reward = self.reward
+            if self.tile_visited_count==len(self.track):
+                done = True
+            x, y = self.car.hull.position
+            if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
+                done = True
+                step_reward = -100
+
+        self.step_cnt += 1
+
+        return self.state, step_reward, done, {}
+
+    def render(self, mode='human'):
+        assert mode in ['human', 'state_pixels', 'rgb_array']
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
+            self.score_label = pyglet.text.Label('0000', font_size=36,
+                x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center',
+                color=(255,255,255,255))
+            self.transform = rendering.Transform()
+
+        if "t" not in self.__dict__: return  # reset() not called yet
+
+        zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
+        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
+        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
+        scroll_x = self.car.hull.position[0]
+        scroll_y = self.car.hull.position[1]
+        angle = -self.car.hull.angle
+        vel = self.car.hull.linearVelocity
+        if np.linalg.norm(vel) > 0.5:
+            angle = math.atan2(vel[0], vel[1])
+        self.transform.set_scale(zoom, zoom)
+        self.transform.set_translation(
+            WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
+            WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
+        self.transform.set_rotation(angle)
+
+        self.car.draw(self.viewer, mode!="state_pixels")
+
+        arr = None
+        win = self.viewer.window
+        win.switch_to()
+        win.dispatch_events()
+
+        win.clear()
+        t = self.transform
+        if mode=='rgb_array':
+            VP_W = VIDEO_W
+            VP_H = VIDEO_H
+        elif mode == 'state_pixels':
+            VP_W = STATE_W
+            VP_H = STATE_H
+        else:
+            pixel_scale = 1
+            if hasattr(win.context, '_nscontext'):
+                pixel_scale = win.context._nscontext.view().backingScaleFactor()  # pylint: disable=protected-access
+            VP_W = int(pixel_scale * WINDOW_W)
+            VP_H = int(pixel_scale * WINDOW_H)
+
+        gl.glViewport(0, 0, VP_W, VP_H)
+        t.enable()
+        self.render_road()
+        for geom in self.viewer.onetime_geoms:
+            geom.render()
+        self.viewer.onetime_geoms = []
+        t.disable()
+        self.render_indicators(WINDOW_W, WINDOW_H)
+
+        if mode == 'human':
+            win.flip()
+            return self.viewer.isopen
+
+        image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
+        arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='')
+        arr = arr.reshape(VP_H, VP_W, 4)
+        arr = arr[::-1, :, 0:3]
+
+        return arr
+
+    def close(self):
+        if self.viewer is not None:
+            self.viewer.close()
+            self.viewer = None
+
+    def render_road(self):
+        if self._modification_type in ['noise', 'video']:
+            H = W = int(PLAYFIELD * 2)
+            if self._modification_type == 'noise':
+                background = np.random.randint(0, 256, H * W * 3).reshape([H, W, 3])
+                img_path = '/tmp/screen.png'
+                cv2.imwrite(img_path, background)
+            else:
+                video_img_dir = os.environ.get('CARRACING_VIDEO_DIR', '/tmp/car_racing_video')
+                max_steps = len(glob.glob(os.path.join(video_img_dir, '*.png')))
+                img_path = os.path.join(video_img_dir, '{}.png'.format((self.step_cnt + 1) % max_steps))
+            image = pyglet.image.load(img_path)
+            image.anchor_x = image.width // 2
+            image.anchor_y = image.height // 2
+            s = pyglet.sprite.Sprite(image)
+            s.draw()
+        else:
+            gl.glBegin(gl.GL_QUADS)
+            gl.glColor4f(*self.grass_color)
+            gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0)
+            gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0)
+            gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0)
+            gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
+            gl.glColor4f(0.4, 0.9, 0.4, 1.0)
+            k = PLAYFIELD/20.0
+            for x in range(-20, 20, 2):
+                for y in range(-20, 20, 2):
+                    gl.glVertex3f(k*x + k, k*y + 0, 0)
+                    gl.glVertex3f(k*x + 0, k*y + 0, 0)
+                    gl.glVertex3f(k*x + 0, k*y + k, 0)
+                    gl.glVertex3f(k*x + k, k*y + k, 0)
+            gl.glEnd()
+
+        gl.glBegin(gl.GL_QUADS)
+        for poly, color in self.road_poly:
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            for p in poly:
+                gl.glVertex3f(p[0], p[1], 0)
+        gl.glEnd()
+
+    def render_indicators(self, W, H):
+        gl.glBegin(gl.GL_QUADS)
+        s = W/40.0
+        h = H/40.0
+        gl.glColor4f(0,0,0,1)
+        gl.glVertex3f(W, 0, 0)
+        gl.glVertex3f(W, 5*h, 0)
+        gl.glVertex3f(0, 5*h, 0)
+        gl.glVertex3f(0, 0, 0)
+
+        if self._modification_type == 'bar':
+            gl.glVertex3f(W, 5*h, 0)
+            gl.glVertex3f(W, H, 0)
+            gl.glVertex3f(W-3*s, H, 0)
+            gl.glVertex3f(W-3*s, 5*h, 0)
+
+            gl.glVertex3f(3*s, 5*h, 0)
+            gl.glVertex3f(3*s, H, 0)
+            gl.glVertex3f(0, H, 0)
+            gl.glVertex3f(0, 5*h, 0)
+
+        def vertical_ind(place, val, color):
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            gl.glVertex3f((place+0)*s, h + h*val, 0)
+            gl.glVertex3f((place+1)*s, h + h*val, 0)
+            gl.glVertex3f((place+1)*s, h, 0)
+            gl.glVertex3f((place+0)*s, h, 0)
+        def horiz_ind(place, val, color):
+            gl.glColor4f(color[0], color[1], color[2], 1)
+            gl.glVertex3f((place+0)*s, 4*h , 0)
+            gl.glVertex3f((place+val)*s, 4*h, 0)
+            gl.glVertex3f((place+val)*s, 2*h, 0)
+            gl.glVertex3f((place+0)*s, 2*h, 0)
+        true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]))
+        vertical_ind(5, 0.02*true_speed, (1,1,1))
+        vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors
+        vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1))
+        vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1))
+        vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1))
+        horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0))
+        horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0))
+        gl.glEnd()
+        self.score_label.text = "%04i" % self.reward
+        self.score_label.draw()
+
+
+if __name__=="__main__":
+    from pyglet.window import key
+    a = np.array( [0.0, 0.0, 0.0] )
+    def key_press(k, mod):
+        global restart
+        if k==0xff0d: restart = True
+        if k==key.LEFT:  a[0] = -1.0
+        if k==key.RIGHT: a[0] = +1.0
+        if k==key.UP:    a[1] = +1.0
+        if k==key.DOWN:  a[2] = +0.8   # set 1.0 for wheels to block to zero rotation
+    def key_release(k, mod):
+        if k==key.LEFT  and a[0]==-1.0: a[0] = 0
+        if k==key.RIGHT and a[0]==+1.0: a[0] = 0
+        if k==key.UP:    a[1] = 0
+        if k==key.DOWN:  a[2] = 0
+    env = CarRacing()
+    env.render()
+    env.viewer.window.on_key_press = key_press
+    env.viewer.window.on_key_release = key_release
+    record_video = False
+    if record_video:
+        from gym.wrappers.monitor import Monitor
+        env = Monitor(env, '/tmp/video-test', force=True)
+    isopen = True
+    while isopen:
+        env.reset()
+        total_reward = 0.0
+        steps = 0
+        restart = False
+        while True:
+            s, r, done, info = env.step(a)
+            total_reward += r
+            if steps % 200 == 0 or done:
+                print("\naction " + str(["{:+0.2f}".format(x) for x in a]))
+                print("step {} total_reward {:+0.2f}".format(steps, total_reward))
+                #import matplotlib.pyplot as plt
+                #plt.imshow(s)
+                #plt.savefig("test.jpeg")
+            steps += 1
+            isopen = env.render()
+            if done or restart or isopen == False:
+                break
+    env.close()
diff --git a/environments/domain_adaption/car_racing_variants/version.py b/environments/domain_adaption/car_racing_variants/version.py
new file mode 100644
index 0000000..055ed57
--- /dev/null
+++ b/environments/domain_adaption/car_racing_variants/version.py
@@ -0,0 +1 @@
+VERSION='0.0.1'
diff --git a/environments/domain_adaption/natural_rl_environment/imgsource.py b/environments/domain_adaption/natural_rl_environment/imgsource.py
new file mode 100644
index 0000000..37ff12f
--- /dev/null
+++ b/environments/domain_adaption/natural_rl_environment/imgsource.py
@@ -0,0 +1,120 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import cv2
+import skvideo.io
+
+
+class ImageSource(object):
+    """
+    Source of natural images to be added to a simulated environment.
+    """
+    def get_image(self):
+        """
+        Returns:
+            an RGB image of [h, w, 3] with a fixed shape.
+        """
+        pass
+
+    def reset(self):
+        """ Called when an episode ends. """
+        pass
+
+
+class FixedColorSource(ImageSource):
+    def __init__(self, shape, color):
+        """
+        Args:
+            shape: [h, w]
+            color: a 3-tuple
+        """
+        self.arr = np.zeros((shape[0], shape[1], 3))
+        self.arr[:, :] = color
+
+    def get_image(self):
+        return np.copy(self.arr)
+
+
+class RandomColorSource(ImageSource):
+    def __init__(self, shape):
+        """
+        Args:
+            shape: [h, w]
+        """
+        self.shape = shape
+        self.reset()
+
+    def reset(self):
+        self._color = np.random.randint(0, 256, size=(3,))
+
+    def get_image(self):
+        arr = np.zeros((self.shape[0], self.shape[1], 3))
+        arr[:, :] = self._color
+        return arr
+
+
+class NoiseSource(ImageSource):
+    def __init__(self, shape, strength=50):
+        """
+        Args:
+            shape: [h, w]
+            strength (int): the strength of noise, in range [0, 255]
+        """
+        self.shape = shape
+        self.strength = strength
+
+    def get_image(self):
+        return np.maximum(np.random.randn(
+            self.shape[0], self.shape[1], 3) * self.strength, 0)
+
+
+class RandomImageSource(ImageSource):
+    def __init__(self, shape, filelist):
+        """
+        Args:
+            shape: [h, w]
+            filelist: a list of image files
+        """
+        self.shape_wh = shape[::-1]
+        self.filelist = filelist
+        self.reset()
+
+    def reset(self):
+        fname = np.random.choice(self.filelist)
+        im = cv2.imread(fname, cv2.IMREAD_COLOR)
+        im = im[:, :, ::-1]
+        im = cv2.resize(im, self.shape_wh)
+        self._im = im
+
+    def get_image(self):
+        return self._im
+
+
+class RandomVideoSource(ImageSource):
+    def __init__(self, shape, filelist):
+        """
+        Args:
+            shape: [h, w]
+            filelist: a list of video files
+        """
+        self.shape_wh = shape[::-1]
+        self.filelist = filelist
+        self.reset()
+
+    def reset(self):
+        fname = np.random.choice(self.filelist)
+        self.frames = skvideo.io.vread(fname)
+        self.frame_idx = 0
+
+    def get_image(self):
+        if self.frame_idx >= self.frames.shape[0]:
+            self.reset()
+        im = self.frames[self.frame_idx][:, :, ::-1]
+        self.frame_idx += 1
+        im = im[:, :, ::-1]
+        im = cv2.resize(im, self.shape_wh)
+        return im
diff --git a/environments/domain_adaption/natural_rl_environment/matting.py b/environments/domain_adaption/natural_rl_environment/matting.py
new file mode 100644
index 0000000..10acde4
--- /dev/null
+++ b/environments/domain_adaption/natural_rl_environment/matting.py
@@ -0,0 +1,32 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+class BackgroundMatting(object):
+    """
+    Produce a mask of a given image which will be replaced by natural signals.
+    """
+    def get_mask(self, img):
+        """
+        Take an image of [H, W, 3]. Returns a mask of [H, W]
+        """
+        raise NotImplementedError()
+
+
+class BackgroundMattingWithColor(BackgroundMatting):
+    """
+    Produce a mask by masking the given color. This is a simple strategy
+    but effective for many games.
+    """
+    def __init__(self, color):
+        """
+        Args:
+            color: a (r, g, b) tuple
+        """
+        self._color = color
+
+    def get_mask(self, img):
+        return img == self._color
diff --git a/environments/domain_adaption/natural_rl_environment/natural_env.py b/environments/domain_adaption/natural_rl_environment/natural_env.py
new file mode 100755
index 0000000..7017e88
--- /dev/null
+++ b/environments/domain_adaption/natural_rl_environment/natural_env.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+import argparse
+import glob
+import gym
+from gym.utils import play
+
+from matting import BackgroundMattingWithColor
+from imgsource import (
+    RandomImageSource,
+    RandomColorSource,
+    NoiseSource,
+    RandomVideoSource,
+)
+
+
+class ReplaceBackgroundEnv(gym.ObservationWrapper):
+
+    viewer = None
+
+    def __init__(self, env, bg_matting, natural_source):
+        """
+        The source must produce a image with a shape that's compatible to
+        `env.observation_space`.
+        """
+        super(ReplaceBackgroundEnv, self).__init__(env)
+        self._bg_matting = bg_matting
+        self._natural_source = natural_source
+
+    def observation(self, obs):
+        mask = self._bg_matting.get_mask(obs)
+        img = self._natural_source.get_image()
+        obs[mask] = img[mask]
+        self._last_ob = obs
+        return obs
+
+    def reset(self):
+        self._natural_source.reset()
+        return super(ReplaceBackgroundEnv, self).reset()
+
+    # modified from gym/envs/atari/atari_env.py
+    # This makes the monitor work
+    def render(self, mode="human"):
+        img = self._last_ob
+        if mode == "rgb_array":
+            return img
+        elif mode == "human":
+            from gym.envs.classic_control import rendering
+
+            if self.viewer is None:
+                self.viewer = rendering.SimpleImageViewer()
+            self.viewer.imshow(img)
+            return env.viewer.isopen
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--env", help="The gym environment to base on")
+    parser.add_argument("--imgsource", choices=["color", "noise", "images", "videos"])
+    parser.add_argument(
+        "--resource-files", help="A glob pattern to obtain images or videos"
+    )
+    parser.add_argument("--dump-video", help="If given, a directory to dump video")
+    args = parser.parse_args()
+
+    env = gym.make(args.env)
+    shape2d = env.observation_space.shape[:2]
+
+    if args.imgsource:
+        if args.imgsource == "color":
+            imgsource = RandomColorSource(shape2d)
+        elif args.imgsource == "noise":
+            imgsource = NoiseSource(shape2d)
+        else:
+            files = glob.glob(os.path.expanduser(args.resource_files))
+            assert len(files), "Pattern {} does not match any files".format(
+                args.resource_files
+            )
+            if args.imgsource == "images":
+                imgsource = RandomImageSource(shape2d, files)
+            else:
+                imgsource = RandomVideoSource(shape2d, files)
+
+        wrapped_env = ReplaceBackgroundEnv(
+            env, BackgroundMattingWithColor((0, 0, 0)), imgsource
+        )
+    else:
+        wrapped_env = env
+
+    if args.dump_video:
+        assert os.path.isdir(args.dump_video)
+        wrapped_env = gym.wrappers.Monitor(wrapped_env, args.dump_video)
+    play.play(wrapped_env, zoom=4)
diff --git a/environments/domain_adaption/test.py b/environments/domain_adaption/test.py
new file mode 100644
index 0000000..5f150a9
--- /dev/null
+++ b/environments/domain_adaption/test.py
@@ -0,0 +1,24 @@
+import gym
+
+env_dict = gym.envs.registration.registry.env_specs.copy()
+
+
+for env in env_dict:
+    if 'CarRacingColor3-v0' in env:
+        print("Remove {} from registry".format(env))
+        del gym.envs.registration.registry.env_specs[env]
+#from environments.domain_adaption.natural_rl_environment import natural_env
+import environments.domain_adaption.car_racing_variants
+
+
+env = gym.make('CarRacingColor3-v0')
+env.seed(666)
+
+while True:
+    ob = env.reset()
+    done = False
+    step = 0
+    while not done and 0 <= step <= 500:
+        ob, reward, done, _ = env.step(env.action_space.sample())
+        step += 1
+        env.render()
\ No newline at end of file
diff --git a/reload_agent.py b/reload_agent.py
index 9973452..cb88e19 100644
--- a/reload_agent.py
+++ b/reload_agent.py
@@ -14,20 +14,21 @@ warnings.filterwarnings('ignore', category=UserWarning)
 
 if __name__ == '__main__':
 
-    out_path = Path(r'C:\Users\steff\projects\f_iks\debug_out\A2C_1622558379')
-    with (out_path / f'env_{out_path.name}.pick').open('rb') as f:
+    model_name = 'A2C_1622571986'
+    run_id = 0
+    out_path = Path(__file__).parent / 'debug_out'
+    model_path = out_path / model_name
+
+    with (model_path / f'env_{model_name}.pick').open('rb') as f:
         env_kwargs = pickle.load(f)
-    env = SimpleFactory(allow_no_op=False, allow_diagonal_movement=False, allow_square_movement=True, **env_kwargs)
+    env = SimpleFactory( **env_kwargs)
 
     # Edit THIS:
-    model_path = out_path / '1_A2C_1622558379'
-
-    model_files = list(natsorted(out_path.rglob('*.zip')))
+    model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('*.zip')))
     this_model = model_files[0]
 
     model = PPO.load(this_model)
-    evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False,
-                                        render=True)
+    evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True)
     print(evaluation_result)
 
     env.close()