From 38a3ef76875e35655a9a11efee1cb155b5a78450 Mon Sep 17 00:00:00 2001 From: romue Date: Wed, 2 Jun 2021 09:38:15 +0200 Subject: [PATCH] added domain shift envs and adjusted reload_agent.py --- environments/domain_adaption/__init__.py | 0 .../car_racing_variants/__init__.py | 64 ++ .../car_racing_variants/car_dynamics.py | 289 +++++++++ .../car_racing_variants/car_racing.py | 608 ++++++++++++++++++ .../car_racing_variants/version.py | 1 + .../natural_rl_environment/imgsource.py | 120 ++++ .../natural_rl_environment/matting.py | 32 + .../natural_rl_environment/natural_env.py | 99 +++ environments/domain_adaption/test.py | 24 + reload_agent.py | 17 +- 10 files changed, 1246 insertions(+), 8 deletions(-) create mode 100644 environments/domain_adaption/__init__.py create mode 100644 environments/domain_adaption/car_racing_variants/__init__.py create mode 100644 environments/domain_adaption/car_racing_variants/car_dynamics.py create mode 100644 environments/domain_adaption/car_racing_variants/car_racing.py create mode 100644 environments/domain_adaption/car_racing_variants/version.py create mode 100644 environments/domain_adaption/natural_rl_environment/imgsource.py create mode 100644 environments/domain_adaption/natural_rl_environment/matting.py create mode 100755 environments/domain_adaption/natural_rl_environment/natural_env.py create mode 100644 environments/domain_adaption/test.py diff --git a/environments/domain_adaption/__init__.py b/environments/domain_adaption/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/environments/domain_adaption/car_racing_variants/__init__.py b/environments/domain_adaption/car_racing_variants/__init__.py new file mode 100644 index 0000000..ae0a495 --- /dev/null +++ b/environments/domain_adaption/car_racing_variants/__init__.py @@ -0,0 +1,64 @@ +from gym.envs.registration import register + +# Env registration +# ========================== + +register( + id='CarRacingColor-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'color', + }, +) + +register( + id='CarRacingColor3-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'color3', + }, +) + +register( + id='CarRacingBar-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'bar', + }, +) + +register( + id='CarRacingBlob-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'blob', + }, +) + +register( + id='CarRacingNoise-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'noise', + }, +) + +register( + id='CarRacingVideo-v0', + entry_point='car_racing_variants.car_racing:CarRacing', + max_episode_steps=1000, + reward_threshold=900.0, + kwargs={ + 'modification': 'video', + }, +) diff --git a/environments/domain_adaption/car_racing_variants/car_dynamics.py b/environments/domain_adaption/car_racing_variants/car_dynamics.py new file mode 100644 index 0000000..e8d96fa --- /dev/null +++ b/environments/domain_adaption/car_racing_variants/car_dynamics.py @@ -0,0 +1,289 @@ +"""This file is based on the car_dynamics.py in OpenAI's gym. + +Description + We provide some modified CarRacing environments here: + * CarRacingColor-v0 + Both the lane and the grass colors are perturbed at the env.reset() with scalar noises. + * CarRacingColor3-v0 + Both the lane and the grass colors are perturbed at the env.reset() with 3d noises. + * CarRacingBar-v0 + We add vertical bars on the left and right side of the screen. + * CarRacingBlob-v0 + A red blob that follows the car at a fixed position in the car's frame. + * CarRacingNoise-v0 + We replace the green background with noise. + * CarRacingVideo-v0 + We replace the green background with frames from a video, the user is + responsible for creating these frames and pass the directory. + These environments were first used in the paper "Neuroevolution of Self-Interpretable Agent" + https://attentionagent.github.io/ + +Author + Yujin Tang (yujintang@google.com) +""" + +import numpy as np +import math +import Box2D +from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener, shape) + +# Top-down car dynamics simulation. +# +# Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell. +# This simulation is a bit more detailed, with wheels rotation. +# +# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym. + +SIZE = 0.02 +ENGINE_POWER = 100000000*SIZE*SIZE +WHEEL_MOMENT_OF_INERTIA = 4000*SIZE*SIZE +FRICTION_LIMIT = 1000000*SIZE*SIZE # friction ~= mass ~= size^2 (calculated implicitly using density) +WHEEL_R = 27 +WHEEL_W = 14 +WHEELPOS = [ + (-55,+80), (+55,+80), + (-55,-82), (+55,-82) + ] +HULL_POLY1 =[ + (-60,+130), (+60,+130), + (+60,+110), (-60,+110) + ] +HULL_POLY2 =[ + (-15,+120), (+15,+120), + (+20, +20), (-20, 20) + ] +HULL_POLY3 =[ + (+25, +20), + (+50, -10), + (+50, -40), + (+20, -90), + (-20, -90), + (-50, -40), + (-50, -10), + (-25, +20) + ] +HULL_POLY4 =[ + (-50,-120), (+50,-120), + (+50,-90), (-50,-90) + ] + +WHEEL_COLOR = (0.0,0.0,0.0) +WHEEL_WHITE = (0.3,0.3,0.3) +MUD_COLOR = (0.4,0.4,0.0) + +class Car: + def __init__(self, world, init_angle, init_x, init_y, add_blob=False): + self.world = world + + self.add_blob = add_blob + + fixtures = [ + fixtureDef(shape=polygonShape( + vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY1 ]), density=1.0), + fixtureDef(shape=polygonShape( + vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY2 ]), density=1.0), + fixtureDef(shape=polygonShape( + vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY3 ]), density=1.0), + fixtureDef(shape=polygonShape( + vertices=[ (x*SIZE,y*SIZE) for x,y in HULL_POLY4 ]), density=1.0), + ] + self.hull = self.world.CreateDynamicBody( + position = (init_x, init_y), + angle = init_angle, + fixtures = fixtures, + ) + self.hull.color = (0.8,0.0,0.0) + self.wheels = [] + self.fuel_spent = 0.0 + WHEEL_POLY = [ + (-WHEEL_W,+WHEEL_R), (+WHEEL_W,+WHEEL_R), + (+WHEEL_W,-WHEEL_R), (-WHEEL_W,-WHEEL_R) + ] + for wx,wy in WHEELPOS: + front_k = 1.0 if wy > 0 else 1.0 + w = self.world.CreateDynamicBody( + position = (init_x+wx*SIZE, init_y+wy*SIZE), + angle = init_angle, + fixtures = fixtureDef( + shape=polygonShape(vertices=[ (x*front_k*SIZE,y*front_k*SIZE) for x,y in WHEEL_POLY ]), + density=0.1, + categoryBits=0x0020, + maskBits=0x001, + restitution=0.0) + ) + w.wheel_rad = front_k*WHEEL_R*SIZE + w.color = WHEEL_COLOR + w.gas = 0.0 + w.brake = 0.0 + w.steer = 0.0 + w.phase = 0.0 # wheel angle + w.omega = 0.0 # angular velocity + w.skid_start = None + w.skid_particle = None + rjd = revoluteJointDef( + bodyA=self.hull, + bodyB=w, + localAnchorA=(wx*SIZE,wy*SIZE), + localAnchorB=(0,0), + enableMotor=True, + enableLimit=True, + maxMotorTorque=180*900*SIZE*SIZE, + motorSpeed = 0, + lowerAngle = -0.4, + upperAngle = +0.4, + ) + w.joint = self.world.CreateJoint(rjd) + w.tiles = set() + w.userData = w + self.wheels.append(w) + self.drawlist = self.wheels + [self.hull] + self.particles = [] + + def gas(self, gas): + 'control: rear wheel drive' + gas = np.clip(gas, 0, 1) + for w in self.wheels[2:4]: + diff = gas - w.gas + if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately + w.gas += diff + + def brake(self, b): + 'control: brake b=0..1, more than 0.9 blocks wheels to zero rotation' + for w in self.wheels: + w.brake = b + + def steer(self, s): + 'control: steer s=-1..1, it takes time to rotate steering wheel from side to side, s is target position' + self.wheels[0].steer = s + self.wheels[1].steer = s + + def step(self, dt): + for w in self.wheels: + # Steer each wheel + dir = np.sign(w.steer - w.joint.angle) + val = abs(w.steer - w.joint.angle) + w.joint.motorSpeed = dir*min(50.0*val, 3.0) + + # Position => friction_limit + grass = True + friction_limit = FRICTION_LIMIT*0.6 # Grass friction if no tile + for tile in w.tiles: + friction_limit = max(friction_limit, FRICTION_LIMIT*tile.road_friction) + grass = False + + # Force + forw = w.GetWorldVector( (0,1) ) + side = w.GetWorldVector( (1,0) ) + v = w.linearVelocity + vf = forw[0]*v[0] + forw[1]*v[1] # forward speed + vs = side[0]*v[0] + side[1]*v[1] # side speed + + # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy + # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power + # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega + w.omega += dt*ENGINE_POWER*w.gas/WHEEL_MOMENT_OF_INERTIA/(abs(w.omega)+5.0) # small coef not to divide by zero + self.fuel_spent += dt*ENGINE_POWER*w.gas + + if w.brake >= 0.9: + w.omega = 0 + elif w.brake > 0: + BRAKE_FORCE = 15 # radians per second + dir = -np.sign(w.omega) + val = BRAKE_FORCE*w.brake + if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0 + w.omega += dir*val + w.phase += w.omega*dt + + vr = w.omega*w.wheel_rad # rotating wheel speed + f_force = -vf + vr # force direction is direction of speed difference + p_force = -vs + + # Physically correct is to always apply friction_limit until speed is equal. + # But dt is finite, that will lead to oscillations if difference is already near zero. + f_force *= 205000*SIZE*SIZE # Random coefficient to cut oscillations in few steps (have no effect on friction_limit) + p_force *= 205000*SIZE*SIZE + force = np.sqrt(np.square(f_force) + np.square(p_force)) + + # Skid trace + if abs(force) > 2.0*friction_limit: + if w.skid_particle and w.skid_particle.grass==grass and len(w.skid_particle.poly) < 30: + w.skid_particle.poly.append( (w.position[0], w.position[1]) ) + elif w.skid_start is None: + w.skid_start = w.position + else: + w.skid_particle = self._create_particle( w.skid_start, w.position, grass ) + w.skid_start = None + else: + w.skid_start = None + w.skid_particle = None + + if abs(force) > friction_limit: + f_force /= force + p_force /= force + force = friction_limit # Correct physics here + f_force *= force + p_force *= force + + w.omega -= dt*f_force*w.wheel_rad/WHEEL_MOMENT_OF_INERTIA + + w.ApplyForceToCenter( ( + p_force*side[0] + f_force*forw[0], + p_force*side[1] + f_force*forw[1]), True ) + + def draw(self, viewer, draw_particles=True): + if draw_particles: + for p in self.particles: + viewer.draw_polyline(p.poly, color=p.color, linewidth=5) + for obj in self.drawlist: + for i, f in enumerate(obj.fixtures): + trans = f.body.transform + path = [trans*v for v in f.shape.vertices] + if self.add_blob and i == 3: + obj_color = (0.8, 0.0, 0.) + offset_x = f.shape.vertices[0][0] + 20 + offset_y = f.shape.vertices[0][1] + 10 + width = height = 8 + blob =[ + trans * (+offset_x, +offset_y+height), + trans * (+offset_x+width, +offset_y+height), + trans * (+offset_x+width, +offset_y), + trans * (+offset_x, +offset_y), + ] + viewer.draw_polygon(blob, color=obj_color) + viewer.draw_polygon(path, color=obj.color) + if "phase" not in obj.__dict__: continue + a1 = obj.phase + a2 = obj.phase + 1.2 # radians + s1 = math.sin(a1) + s2 = math.sin(a2) + c1 = math.cos(a1) + c2 = math.cos(a2) + if s1>0 and s2>0: continue + if s1>0: c1 = np.sign(c1) + if s2>0: c2 = np.sign(c2) + white_poly = [ + (-WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), (+WHEEL_W*SIZE, +WHEEL_R*c1*SIZE), + (+WHEEL_W*SIZE, +WHEEL_R*c2*SIZE), (-WHEEL_W*SIZE, +WHEEL_R*c2*SIZE) + ] + viewer.draw_polygon([trans*v for v in white_poly], color=WHEEL_WHITE) + + def _create_particle(self, point1, point2, grass): + class Particle: + pass + p = Particle() + p.color = WHEEL_COLOR if not grass else MUD_COLOR + p.ttl = 1 + p.poly = [(point1[0],point1[1]), (point2[0],point2[1])] + p.grass = grass + self.particles.append(p) + while len(self.particles) > 30: + self.particles.pop(0) + return p + + def destroy(self): + self.world.DestroyBody(self.hull) + self.hull = None + for w in self.wheels: + self.world.DestroyBody(w) + self.wheels = [] + diff --git a/environments/domain_adaption/car_racing_variants/car_racing.py b/environments/domain_adaption/car_racing_variants/car_racing.py new file mode 100644 index 0000000..de86a21 --- /dev/null +++ b/environments/domain_adaption/car_racing_variants/car_racing.py @@ -0,0 +1,608 @@ +"""This file is based on the car_racing.py in OpenAI's gym. + +Description + We provide some modified CarRacing environments here: + * CarRacingColor-v0 + Both the lane and the grass colors are perturbed at the env.reset() with scalar noises. + * CarRacingColor3-v0 + Both the lane and the grass colors are perturbed at the env.reset() with 3d noises. + * CarRacingBar-v0 + We add vertical bars on the left and right side of the screen. + * CarRacingBlob-v0 + A red blob that follows the car at a fixed position in the car's frame. + * CarRacingNoise-v0 + We replace the green background with noise. + *. CarRacingVideo-v0 + We replace the green background with frames from a video, the user is + responsible for creating these frames. + These environments were first used in the paper "Neuroevolution of Self-Interpretable Agent" + https://attentionagent.github.io/ + +Author + Yujin Tang (yujintang@google.com) +""" + +import cv2 +import glob +import os +import sys, math +import numpy as np + +import Box2D +from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener) + +import gym +from gym import spaces +from .car_dynamics import Car +from gym.utils import colorize, seeding, EzPickle + +import pyglet +from pyglet import gl + +# Easiest continuous control task to learn from pixels, a top-down racing environment. +# Discrete control is reasonable in this environment as well, on/off discretization is +# fine. +# +# State consists of STATE_W x STATE_H pixels. +# +# Reward is -0.1 every frame and +1000/N for every track tile visited, where N is +# the total number of tiles visited in the track. For example, if you have finished in 732 frames, +# your reward is 1000 - 0.1*732 = 926.8 points. +# +# Game is solved when agent consistently gets 900+ points. Track generated is random every episode. +# +# Episode finishes when all tiles are visited. Car also can go outside of PLAYFIELD, that +# is far off the track, then it will get -100 and die. +# +# Some indicators shown at the bottom of the window and the state RGB buffer. From +# left to right: true speed, four ABS sensors, steering wheel position and gyroscope. +# +# To play yourself (it's rather fast for humans), type: +# +# python gym/envs/box2d/car_racing.py +# +# Remember it's powerful rear-wheel drive car, don't press accelerator and turn at the +# same time. +# +# Created by Oleg Klimov. Licensed on the same terms as the rest of OpenAI Gym. + +STATE_W = 96 # less than Atari 160x192 +STATE_H = 96 +VIDEO_W = 600 +VIDEO_H = 400 +WINDOW_W = 1000 +WINDOW_H = 800 + +SCALE = 6.0 # Track scale +TRACK_RAD = 900/SCALE # Track is heavily morphed circle with this radius +PLAYFIELD = 2000/SCALE # Game over boundary +FPS = 50 # Frames per second +ZOOM = 2.7 # Camera zoom +ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom) + + +TRACK_DETAIL_STEP = 21/SCALE +TRACK_TURN_RATE = 0.31 +TRACK_WIDTH = 40/SCALE +BORDER = 8/SCALE +BORDER_MIN_COUNT = 4 + +ROAD_COLOR = [0.4, 0.4, 0.4] + +class FrictionDetector(contactListener): + def __init__(self, env): + contactListener.__init__(self) + self.env = env + def BeginContact(self, contact): + self._contact(contact, True) + def EndContact(self, contact): + self._contact(contact, False) + def _contact(self, contact, begin): + tile = None + obj = None + u1 = contact.fixtureA.body.userData + u2 = contact.fixtureB.body.userData + if u1 and "road_friction" in u1.__dict__: + tile = u1 + obj = u2 + if u2 and "road_friction" in u2.__dict__: + tile = u2 + obj = u1 + if not tile: + return + + tile.color[0] = self.env.road_color[0] + tile.color[1] = self.env.road_color[1] + tile.color[2] = self.env.road_color[2] + if not obj or "tiles" not in obj.__dict__: + return + if begin: + obj.tiles.add(tile) + # print tile.road_friction, "ADD", len(obj.tiles) + if not tile.road_visited: + tile.road_visited = True + self.env.reward += 1000.0/len(self.env.track) + self.env.tile_visited_count += 1 + else: + obj.tiles.remove(tile) + # print tile.road_friction, "DEL", len(obj.tiles) -- should delete to zero when on grass (this works) + +class CarRacing(gym.Env, EzPickle): + metadata = { + 'render.modes': ['human', 'rgb_array', 'state_pixels'], + 'video.frames_per_second' : FPS + } + + def __init__(self, verbose=1, **kwargs): + EzPickle.__init__(self) + self.seed() + + self.road_color = ROAD_COLOR[:] + self.grass_color = [0.4, 0.8, 0.4, 1] + if 'modification' in kwargs: + self._modification_type = kwargs['modification'] + else: + self._modification_type = '' + + self.contactListener_keepref = FrictionDetector(self) + self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) + self.viewer = None + self.invisible_state_window = None + self.invisible_video_window = None + self.road = None + self.car = None + self.reward = 0.0 + self.prev_reward = 0.0 + self.verbose = verbose + self.fd_tile = fixtureDef( + shape = polygonShape(vertices= + [(0, 0),(1, 0),(1, -1),(0, -1)])) + + self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake + self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) + + self.step_cnt = 0 + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def _destroy(self): + if not self.road: + return + for t in self.road: + self.world.DestroyBody(t) + self.road = [] + self.car.destroy() + + def _create_track(self): + CHECKPOINTS = 12 + + # Create checkpoints + checkpoints = [] + for c in range(CHECKPOINTS): + alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) + rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) + if c==0: + alpha = 0 + rad = 1.5*TRACK_RAD + if c==CHECKPOINTS-1: + alpha = 2*math.pi*c/CHECKPOINTS + self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS + rad = 1.5*TRACK_RAD + checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) + + # print "\n".join(str(h) for h in checkpoints) + # self.road_poly = [ ( # uncomment this to see checkpoints + # [ (tx,ty) for a,tx,ty in checkpoints ], + # (0.7,0.7,0.9) ) ] + self.road = [] + + # Go from one checkpoint to another to create track + x, y, beta = 1.5*TRACK_RAD, 0, 0 + dest_i = 0 + laps = 0 + track = [] + no_freeze = 2500 + visited_other_side = False + while True: + alpha = math.atan2(y, x) + if visited_other_side and alpha > 0: + laps += 1 + visited_other_side = False + if alpha < 0: + visited_other_side = True + alpha += 2*math.pi + while True: # Find destination from checkpoints + failed = True + while True: + dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] + if alpha <= dest_alpha: + failed = False + break + dest_i += 1 + if dest_i % len(checkpoints) == 0: + break + if not failed: + break + alpha -= 2*math.pi + continue + r1x = math.cos(beta) + r1y = math.sin(beta) + p1x = -r1y + p1y = r1x + dest_dx = dest_x - x # vector towards destination + dest_dy = dest_y - y + proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad + while beta - alpha > 1.5*math.pi: + beta -= 2*math.pi + while beta - alpha < -1.5*math.pi: + beta += 2*math.pi + prev_beta = beta + proj *= SCALE + if proj > 0.3: + beta -= min(TRACK_TURN_RATE, abs(0.001*proj)) + if proj < -0.3: + beta += min(TRACK_TURN_RATE, abs(0.001*proj)) + x += p1x*TRACK_DETAIL_STEP + y += p1y*TRACK_DETAIL_STEP + track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) ) + if laps > 4: + break + no_freeze -= 1 + if no_freeze==0: + break + # print "\n".join([str(t) for t in enumerate(track)]) + + # Find closed loop range i1..i2, first loop should be ignored, second is OK + i1, i2 = -1, -1 + i = len(track) + while True: + i -= 1 + if i==0: + return False # Failed + pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha + if pass_through_start and i2==-1: + i2 = i + elif pass_through_start and i1==-1: + i1 = i + break + if self.verbose == 1: + print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) + assert i1!=-1 + assert i2!=-1 + + track = track[i1:i2-1] + + first_beta = track[0][1] + first_perp_x = math.cos(first_beta) + first_perp_y = math.sin(first_beta) + # Length of perpendicular jump to put together head and tail + well_glued_together = np.sqrt( + np.square( first_perp_x*(track[0][2] - track[-1][2]) ) + + np.square( first_perp_y*(track[0][3] - track[-1][3]) )) + if well_glued_together > TRACK_DETAIL_STEP: + return False + + # Red-white border on hard turns + border = [False]*len(track) + for i in range(len(track)): + good = True + oneside = 0 + for neg in range(BORDER_MIN_COUNT): + beta1 = track[i-neg-0][1] + beta2 = track[i-neg-1][1] + good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 + oneside += np.sign(beta1 - beta2) + good &= abs(oneside) == BORDER_MIN_COUNT + border[i] = good + for i in range(len(track)): + for neg in range(BORDER_MIN_COUNT): + border[i-neg] |= border[i] + + # Create tiles + for i in range(len(track)): + alpha1, beta1, x1, y1 = track[i] + alpha2, beta2, x2, y2 = track[i-1] + road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1)) + road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1)) + road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2)) + road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2)) + vertices = [road1_l, road1_r, road2_r, road2_l] + self.fd_tile.shape.vertices = vertices + t = self.world.CreateStaticBody(fixtures=self.fd_tile) + t.userData = t + c = 0.01*(i%3) + t.color = [self.road_color[0] + c, + self.road_color[1] + c, + self.road_color[2] + c] + t.road_visited = False + t.road_friction = 1.0 + t.fixtures[0].sensor = True + self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color )) + + self.road.append(t) + if border[i]: + side = np.sign(beta2 - beta1) + b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) + b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) + b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) + b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) + self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) + self.track = track + return True + + def reset(self): + self._destroy() + self.reward = 0.0 + self.prev_reward = 0.0 + self.tile_visited_count = 0 + self.t = 0.0 + self.road_poly = [] + self.froad_poly = [] + self.step_cnt = 0 + + # Color modification. + self.road_color = np.array([0.4, 0.4, 0.4]) # Original road color. + self.grass_color = np.array([0.4, 0.8, 0.4, 1]) # Original grass color. + if self._modification_type == 'color': + noise1 = np.random.uniform(-0.2, 0.2) + noise2 = np.random.uniform(-0.2, 0.2) + print('noise1={}'.format(noise1)) + print('noise2={}'.format(noise2)) + self.road_color += noise1 + self.grass_color[:3] += noise2 + if self._modification_type == 'color3': + noise1 = np.random.uniform(-0.2, 0.2, 3) + noise2 = np.random.uniform(-0.2, 0.2, 3) + print('noise1={}'.format(noise1)) + print('noise2={}'.format(noise2)) + self.road_color += noise1 + self.grass_color[:3] += noise2 + + while True: + success = self._create_track() + if success: + break + if self.verbose == 1: + print("retry to generate track (normal if there are not many of this messages)") + add_blob = self._modification_type == 'blob' + self.car = Car(self.world, *self.track[0][1:4], add_blob=add_blob) + + return self.step(None)[0] + + def step(self, action): + if action is not None: + self.car.steer(-action[0]) + self.car.gas(action[1]) + self.car.brake(action[2]) + + self.car.step(1.0/FPS) + self.world.Step(1.0/FPS, 6*30, 2*30) + self.t += 1.0/FPS + + self.state = self.render("state_pixels") + + step_reward = 0 + done = False + if action is not None: # First step without action, called from reset() + self.reward -= 0.1 + # We actually don't want to count fuel spent, we want car to be faster. + # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER + self.car.fuel_spent = 0.0 + step_reward = self.reward - self.prev_reward + self.prev_reward = self.reward + if self.tile_visited_count==len(self.track): + done = True + x, y = self.car.hull.position + if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: + done = True + step_reward = -100 + + self.step_cnt += 1 + + return self.state, step_reward, done, {} + + def render(self, mode='human'): + assert mode in ['human', 'state_pixels', 'rgb_array'] + if self.viewer is None: + from gym.envs.classic_control import rendering + self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) + self.score_label = pyglet.text.Label('0000', font_size=36, + x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center', + color=(255,255,255,255)) + self.transform = rendering.Transform() + + if "t" not in self.__dict__: return # reset() not called yet + + zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second + zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W + zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W + scroll_x = self.car.hull.position[0] + scroll_y = self.car.hull.position[1] + angle = -self.car.hull.angle + vel = self.car.hull.linearVelocity + if np.linalg.norm(vel) > 0.5: + angle = math.atan2(vel[0], vel[1]) + self.transform.set_scale(zoom, zoom) + self.transform.set_translation( + WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), + WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) + self.transform.set_rotation(angle) + + self.car.draw(self.viewer, mode!="state_pixels") + + arr = None + win = self.viewer.window + win.switch_to() + win.dispatch_events() + + win.clear() + t = self.transform + if mode=='rgb_array': + VP_W = VIDEO_W + VP_H = VIDEO_H + elif mode == 'state_pixels': + VP_W = STATE_W + VP_H = STATE_H + else: + pixel_scale = 1 + if hasattr(win.context, '_nscontext'): + pixel_scale = win.context._nscontext.view().backingScaleFactor() # pylint: disable=protected-access + VP_W = int(pixel_scale * WINDOW_W) + VP_H = int(pixel_scale * WINDOW_H) + + gl.glViewport(0, 0, VP_W, VP_H) + t.enable() + self.render_road() + for geom in self.viewer.onetime_geoms: + geom.render() + self.viewer.onetime_geoms = [] + t.disable() + self.render_indicators(WINDOW_W, WINDOW_H) + + if mode == 'human': + win.flip() + return self.viewer.isopen + + image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() + arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') + arr = arr.reshape(VP_H, VP_W, 4) + arr = arr[::-1, :, 0:3] + + return arr + + def close(self): + if self.viewer is not None: + self.viewer.close() + self.viewer = None + + def render_road(self): + if self._modification_type in ['noise', 'video']: + H = W = int(PLAYFIELD * 2) + if self._modification_type == 'noise': + background = np.random.randint(0, 256, H * W * 3).reshape([H, W, 3]) + img_path = '/tmp/screen.png' + cv2.imwrite(img_path, background) + else: + video_img_dir = os.environ.get('CARRACING_VIDEO_DIR', '/tmp/car_racing_video') + max_steps = len(glob.glob(os.path.join(video_img_dir, '*.png'))) + img_path = os.path.join(video_img_dir, '{}.png'.format((self.step_cnt + 1) % max_steps)) + image = pyglet.image.load(img_path) + image.anchor_x = image.width // 2 + image.anchor_y = image.height // 2 + s = pyglet.sprite.Sprite(image) + s.draw() + else: + gl.glBegin(gl.GL_QUADS) + gl.glColor4f(*self.grass_color) + gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) + gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) + gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) + gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) + gl.glColor4f(0.4, 0.9, 0.4, 1.0) + k = PLAYFIELD/20.0 + for x in range(-20, 20, 2): + for y in range(-20, 20, 2): + gl.glVertex3f(k*x + k, k*y + 0, 0) + gl.glVertex3f(k*x + 0, k*y + 0, 0) + gl.glVertex3f(k*x + 0, k*y + k, 0) + gl.glVertex3f(k*x + k, k*y + k, 0) + gl.glEnd() + + gl.glBegin(gl.GL_QUADS) + for poly, color in self.road_poly: + gl.glColor4f(color[0], color[1], color[2], 1) + for p in poly: + gl.glVertex3f(p[0], p[1], 0) + gl.glEnd() + + def render_indicators(self, W, H): + gl.glBegin(gl.GL_QUADS) + s = W/40.0 + h = H/40.0 + gl.glColor4f(0,0,0,1) + gl.glVertex3f(W, 0, 0) + gl.glVertex3f(W, 5*h, 0) + gl.glVertex3f(0, 5*h, 0) + gl.glVertex3f(0, 0, 0) + + if self._modification_type == 'bar': + gl.glVertex3f(W, 5*h, 0) + gl.glVertex3f(W, H, 0) + gl.glVertex3f(W-3*s, H, 0) + gl.glVertex3f(W-3*s, 5*h, 0) + + gl.glVertex3f(3*s, 5*h, 0) + gl.glVertex3f(3*s, H, 0) + gl.glVertex3f(0, H, 0) + gl.glVertex3f(0, 5*h, 0) + + def vertical_ind(place, val, color): + gl.glColor4f(color[0], color[1], color[2], 1) + gl.glVertex3f((place+0)*s, h + h*val, 0) + gl.glVertex3f((place+1)*s, h + h*val, 0) + gl.glVertex3f((place+1)*s, h, 0) + gl.glVertex3f((place+0)*s, h, 0) + def horiz_ind(place, val, color): + gl.glColor4f(color[0], color[1], color[2], 1) + gl.glVertex3f((place+0)*s, 4*h , 0) + gl.glVertex3f((place+val)*s, 4*h, 0) + gl.glVertex3f((place+val)*s, 2*h, 0) + gl.glVertex3f((place+0)*s, 2*h, 0) + true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) + vertical_ind(5, 0.02*true_speed, (1,1,1)) + vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors + vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) + vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) + vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) + horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0)) + horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0)) + gl.glEnd() + self.score_label.text = "%04i" % self.reward + self.score_label.draw() + + +if __name__=="__main__": + from pyglet.window import key + a = np.array( [0.0, 0.0, 0.0] ) + def key_press(k, mod): + global restart + if k==0xff0d: restart = True + if k==key.LEFT: a[0] = -1.0 + if k==key.RIGHT: a[0] = +1.0 + if k==key.UP: a[1] = +1.0 + if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation + def key_release(k, mod): + if k==key.LEFT and a[0]==-1.0: a[0] = 0 + if k==key.RIGHT and a[0]==+1.0: a[0] = 0 + if k==key.UP: a[1] = 0 + if k==key.DOWN: a[2] = 0 + env = CarRacing() + env.render() + env.viewer.window.on_key_press = key_press + env.viewer.window.on_key_release = key_release + record_video = False + if record_video: + from gym.wrappers.monitor import Monitor + env = Monitor(env, '/tmp/video-test', force=True) + isopen = True + while isopen: + env.reset() + total_reward = 0.0 + steps = 0 + restart = False + while True: + s, r, done, info = env.step(a) + total_reward += r + if steps % 200 == 0 or done: + print("\naction " + str(["{:+0.2f}".format(x) for x in a])) + print("step {} total_reward {:+0.2f}".format(steps, total_reward)) + #import matplotlib.pyplot as plt + #plt.imshow(s) + #plt.savefig("test.jpeg") + steps += 1 + isopen = env.render() + if done or restart or isopen == False: + break + env.close() diff --git a/environments/domain_adaption/car_racing_variants/version.py b/environments/domain_adaption/car_racing_variants/version.py new file mode 100644 index 0000000..055ed57 --- /dev/null +++ b/environments/domain_adaption/car_racing_variants/version.py @@ -0,0 +1 @@ +VERSION='0.0.1' diff --git a/environments/domain_adaption/natural_rl_environment/imgsource.py b/environments/domain_adaption/natural_rl_environment/imgsource.py new file mode 100644 index 0000000..37ff12f --- /dev/null +++ b/environments/domain_adaption/natural_rl_environment/imgsource.py @@ -0,0 +1,120 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import cv2 +import skvideo.io + + +class ImageSource(object): + """ + Source of natural images to be added to a simulated environment. + """ + def get_image(self): + """ + Returns: + an RGB image of [h, w, 3] with a fixed shape. + """ + pass + + def reset(self): + """ Called when an episode ends. """ + pass + + +class FixedColorSource(ImageSource): + def __init__(self, shape, color): + """ + Args: + shape: [h, w] + color: a 3-tuple + """ + self.arr = np.zeros((shape[0], shape[1], 3)) + self.arr[:, :] = color + + def get_image(self): + return np.copy(self.arr) + + +class RandomColorSource(ImageSource): + def __init__(self, shape): + """ + Args: + shape: [h, w] + """ + self.shape = shape + self.reset() + + def reset(self): + self._color = np.random.randint(0, 256, size=(3,)) + + def get_image(self): + arr = np.zeros((self.shape[0], self.shape[1], 3)) + arr[:, :] = self._color + return arr + + +class NoiseSource(ImageSource): + def __init__(self, shape, strength=50): + """ + Args: + shape: [h, w] + strength (int): the strength of noise, in range [0, 255] + """ + self.shape = shape + self.strength = strength + + def get_image(self): + return np.maximum(np.random.randn( + self.shape[0], self.shape[1], 3) * self.strength, 0) + + +class RandomImageSource(ImageSource): + def __init__(self, shape, filelist): + """ + Args: + shape: [h, w] + filelist: a list of image files + """ + self.shape_wh = shape[::-1] + self.filelist = filelist + self.reset() + + def reset(self): + fname = np.random.choice(self.filelist) + im = cv2.imread(fname, cv2.IMREAD_COLOR) + im = im[:, :, ::-1] + im = cv2.resize(im, self.shape_wh) + self._im = im + + def get_image(self): + return self._im + + +class RandomVideoSource(ImageSource): + def __init__(self, shape, filelist): + """ + Args: + shape: [h, w] + filelist: a list of video files + """ + self.shape_wh = shape[::-1] + self.filelist = filelist + self.reset() + + def reset(self): + fname = np.random.choice(self.filelist) + self.frames = skvideo.io.vread(fname) + self.frame_idx = 0 + + def get_image(self): + if self.frame_idx >= self.frames.shape[0]: + self.reset() + im = self.frames[self.frame_idx][:, :, ::-1] + self.frame_idx += 1 + im = im[:, :, ::-1] + im = cv2.resize(im, self.shape_wh) + return im diff --git a/environments/domain_adaption/natural_rl_environment/matting.py b/environments/domain_adaption/natural_rl_environment/matting.py new file mode 100644 index 0000000..10acde4 --- /dev/null +++ b/environments/domain_adaption/natural_rl_environment/matting.py @@ -0,0 +1,32 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + + +class BackgroundMatting(object): + """ + Produce a mask of a given image which will be replaced by natural signals. + """ + def get_mask(self, img): + """ + Take an image of [H, W, 3]. Returns a mask of [H, W] + """ + raise NotImplementedError() + + +class BackgroundMattingWithColor(BackgroundMatting): + """ + Produce a mask by masking the given color. This is a simple strategy + but effective for many games. + """ + def __init__(self, color): + """ + Args: + color: a (r, g, b) tuple + """ + self._color = color + + def get_mask(self, img): + return img == self._color diff --git a/environments/domain_adaption/natural_rl_environment/natural_env.py b/environments/domain_adaption/natural_rl_environment/natural_env.py new file mode 100755 index 0000000..7017e88 --- /dev/null +++ b/environments/domain_adaption/natural_rl_environment/natural_env.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import os +import argparse +import glob +import gym +from gym.utils import play + +from matting import BackgroundMattingWithColor +from imgsource import ( + RandomImageSource, + RandomColorSource, + NoiseSource, + RandomVideoSource, +) + + +class ReplaceBackgroundEnv(gym.ObservationWrapper): + + viewer = None + + def __init__(self, env, bg_matting, natural_source): + """ + The source must produce a image with a shape that's compatible to + `env.observation_space`. + """ + super(ReplaceBackgroundEnv, self).__init__(env) + self._bg_matting = bg_matting + self._natural_source = natural_source + + def observation(self, obs): + mask = self._bg_matting.get_mask(obs) + img = self._natural_source.get_image() + obs[mask] = img[mask] + self._last_ob = obs + return obs + + def reset(self): + self._natural_source.reset() + return super(ReplaceBackgroundEnv, self).reset() + + # modified from gym/envs/atari/atari_env.py + # This makes the monitor work + def render(self, mode="human"): + img = self._last_ob + if mode == "rgb_array": + return img + elif mode == "human": + from gym.envs.classic_control import rendering + + if self.viewer is None: + self.viewer = rendering.SimpleImageViewer() + self.viewer.imshow(img) + return env.viewer.isopen + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--env", help="The gym environment to base on") + parser.add_argument("--imgsource", choices=["color", "noise", "images", "videos"]) + parser.add_argument( + "--resource-files", help="A glob pattern to obtain images or videos" + ) + parser.add_argument("--dump-video", help="If given, a directory to dump video") + args = parser.parse_args() + + env = gym.make(args.env) + shape2d = env.observation_space.shape[:2] + + if args.imgsource: + if args.imgsource == "color": + imgsource = RandomColorSource(shape2d) + elif args.imgsource == "noise": + imgsource = NoiseSource(shape2d) + else: + files = glob.glob(os.path.expanduser(args.resource_files)) + assert len(files), "Pattern {} does not match any files".format( + args.resource_files + ) + if args.imgsource == "images": + imgsource = RandomImageSource(shape2d, files) + else: + imgsource = RandomVideoSource(shape2d, files) + + wrapped_env = ReplaceBackgroundEnv( + env, BackgroundMattingWithColor((0, 0, 0)), imgsource + ) + else: + wrapped_env = env + + if args.dump_video: + assert os.path.isdir(args.dump_video) + wrapped_env = gym.wrappers.Monitor(wrapped_env, args.dump_video) + play.play(wrapped_env, zoom=4) diff --git a/environments/domain_adaption/test.py b/environments/domain_adaption/test.py new file mode 100644 index 0000000..5f150a9 --- /dev/null +++ b/environments/domain_adaption/test.py @@ -0,0 +1,24 @@ +import gym + +env_dict = gym.envs.registration.registry.env_specs.copy() + + +for env in env_dict: + if 'CarRacingColor3-v0' in env: + print("Remove {} from registry".format(env)) + del gym.envs.registration.registry.env_specs[env] +#from environments.domain_adaption.natural_rl_environment import natural_env +import environments.domain_adaption.car_racing_variants + + +env = gym.make('CarRacingColor3-v0') +env.seed(666) + +while True: + ob = env.reset() + done = False + step = 0 + while not done and 0 <= step <= 500: + ob, reward, done, _ = env.step(env.action_space.sample()) + step += 1 + env.render() \ No newline at end of file diff --git a/reload_agent.py b/reload_agent.py index 9973452..cb88e19 100644 --- a/reload_agent.py +++ b/reload_agent.py @@ -14,20 +14,21 @@ warnings.filterwarnings('ignore', category=UserWarning) if __name__ == '__main__': - out_path = Path(r'C:\Users\steff\projects\f_iks\debug_out\A2C_1622558379') - with (out_path / f'env_{out_path.name}.pick').open('rb') as f: + model_name = 'A2C_1622571986' + run_id = 0 + out_path = Path(__file__).parent / 'debug_out' + model_path = out_path / model_name + + with (model_path / f'env_{model_name}.pick').open('rb') as f: env_kwargs = pickle.load(f) - env = SimpleFactory(allow_no_op=False, allow_diagonal_movement=False, allow_square_movement=True, **env_kwargs) + env = SimpleFactory( **env_kwargs) # Edit THIS: - model_path = out_path / '1_A2C_1622558379' - - model_files = list(natsorted(out_path.rglob('*.zip'))) + model_files = list(natsorted((model_path / f'{run_id}_{model_name}').rglob('*.zip'))) this_model = model_files[0] model = PPO.load(this_model) - evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, - render=True) + evaluation_result = evaluate_policy(model, env, n_eval_episodes=100, deterministic=False, render=True) print(evaluation_result) env.close()