Added new Environment, that is highy customizable via args

This commit is contained in:
Dominik Moritz Roth 2022-08-17 19:31:15 +02:00
parent 5db17f7bc9
commit 9a40ee07b7
3 changed files with 118 additions and 34 deletions

View File

@ -25,10 +25,15 @@ class Entity(object):
ax, ay = self.acc ax, ay = self.acc
vx, vy = vx+ax*self.env.acc_fac, vy+ay*self.env.acc_fac vx, vy = vx+ax*self.env.acc_fac, vy+ay*self.env.acc_fac
x, y = x+vx*self.env.speed_fac, y+vy*self.env.speed_fac x, y = x+vx*self.env.speed_fac, y+vy*self.env.speed_fac
if not self.env.torus_topology:
if x > 1 or x < 0: if x > 1 or x < 0:
x, y, vx, vy = self.calc_void_collision(x < 0, x, y, vx, vy) x, y, vx, vy = self.calc_void_collision(x < 0, x, y, vx, vy)
if y > 1 or y < 0: if y > 1 or y < 0:
x, y, vx, vy = self.calc_void_collision(2 + (x < 0), x, y, vx, vy) x, y, vx, vy = self.calc_void_collision(
2 + (x < 0), x, y, vx, vy)
else:
x = x % 1
y = y % 1
self.speed = vx/(1+self.drag), vy/(1+self.drag) self.speed = vx/(1+self.drag), vy/(1+self.drag)
self.pos = x, y self.pos = x, y
@ -59,6 +64,7 @@ class Entity(object):
if force_dir_len == 0: if force_dir_len == 0:
return return
force_dir = force_dir[0]/force_dir_len, force_dir[1]/force_dir_len force_dir = force_dir[0]/force_dir_len, force_dir[1]/force_dir_len
if not self.env.torus_topology:
if self.env.agent.pos[0] > 0.99 or self.env.agent.pos[0] < 0.01: if self.env.agent.pos[0] > 0.99 or self.env.agent.pos[0] < 0.01:
force_dir = force_dir[0], force_dir[1] * 2 force_dir = force_dir[0], force_dir[1] * 2
if self.env.agent.pos[1] > 0.99 or self.env.agent.pos[1] < 0.01: if self.env.agent.pos[1] > 0.99 or self.env.agent.pos[1] < 0.01:

View File

@ -13,10 +13,12 @@ import torch as th
class ColumbusEnv(gym.Env): class ColumbusEnv(gym.Env):
metadata = {'render.modes': ['human']} metadata = {'render.modes': ['human']}
def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, aux_reward_max=0, aux_penalty_max=0, void_damage=100): def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, start_pos=(0.5, 0.5), start_score=0, speed_fac=0.01, acc_fac=0.02, die_on_zero=False, return_on_score=-1, reward_mult=1, agent_drag=0, controll_type='SPEED', aux_reward_max=1, aux_penalty_max=0, aux_reward_discretize=0, void_is_type_barrier=True, void_damage=1, torus_topology=False):
super(ColumbusEnv, self).__init__() super(ColumbusEnv, self).__init__()
self.action_space = spaces.Box( self.action_space = spaces.Box(
low=-1, high=1, shape=(2,), dtype=np.float32) low=-1, high=1, shape=(2,), dtype=np.float32)
if not isinstance(observable, observables.Observable):
observable = parseObs(observable)
observable._set_env(self) observable._set_env(self)
self.observable = observable self.observable = observable
self.title = 'Untitled' self.title = 'Untitled'
@ -28,18 +30,21 @@ class ColumbusEnv(gym.Env):
self.width = 720 self.width = 720
self.height = 720 self.height = 720
self.visible = False self.visible = False
self.start_pos = (0.5, 0.5) self.start_pos = start_pos
self.speed_fac = 0.01/fps*60 self.speed_fac = speed_fac/fps*60
self.acc_fac = 0.03/fps*60 self.acc_fac = acc_fac/fps*60
self.die_on_zero = False # return (/die) when score hist zero self.die_on_zero = die_on_zero # return (/die) when score hist zero
self.return_on_score = -1 # -1 = never; return, when this score is reached self.return_on_score = return_on_score # -1 = Never
self.reward_mult = 1 self.reward_mult = reward_mult
self.start_score = start_score
# 0.01 is a good value, drag with the environment (air / ground) # 0.01 is a good value, drag with the environment (air / ground)
self.agent_drag = 0 self.agent_drag = agent_drag
self.controll_type = 'SPEED' # one of SPEED, ACC assert controll_type == 'SPEED' or controll_type == 'ACC'
self.limit_inp_to_unit_circle = True self.limit_inp_to_unit_circle = True
self.controll_type = controll_type # one of SPEED, ACC
self.aux_reward_max = aux_reward_max # 0 = off self.aux_reward_max = aux_reward_max # 0 = off
self.aux_penalty_max = aux_penalty_max # 0 = off self.aux_penalty_max = aux_penalty_max # 0 = off
self.aux_reward_discretize = aux_reward_discretize
# 0 = dont discretize; how many steps (along diagonal) # 0 = dont discretize; how many steps (along diagonal)
self.aux_reward_discretize = 0 self.aux_reward_discretize = 0
self.draw_observable = True self.draw_observable = True
@ -47,8 +52,9 @@ class ColumbusEnv(gym.Env):
self.draw_entities = True self.draw_entities = True
self.draw_confidence_ellipse = True self.draw_confidence_ellipse = True
# If the Void should be of type Barrier (else it is just of type Void and Entity) # If the Void should be of type Barrier (else it is just of type Void and Entity)
self.void_barrier = True self.void_barrier = void_is_type_barrier
self.void_damage = void_damage self.void_damage = void_damage
self.torus_topology = torus_topology
self.paused = False self.paused = False
self.keypress_timeout = 0 self.keypress_timeout = 0
@ -146,10 +152,11 @@ class ColumbusEnv(gym.Env):
observation = self.observable.get_observation() observation = self.observable.get_observation()
reward, self.new_reward, self.new_abs_reward = self.new_reward / \ reward, self.new_reward, self.new_abs_reward = self.new_reward / \
self.fps + self.new_abs_reward, 0, 0 self.fps + self.new_abs_reward, 0, 0
self.score += reward # aux_reward does not count towards the score if not self.torus_topology:
if self.agent.pos[0] < 0.001 or self.agent.pos[0] > 0.999 \ if self.agent.pos[0] < 0.001 or self.agent.pos[0] > 0.999 \
or self.agent.pos[1] < 0.001 or self.agent.pos[1] > 0.999: or self.agent.pos[1] < 0.001 or self.agent.pos[1] > 0.999:
reward -= self.void_damage/self.fps reward -= self.void_damage/self.fps
self.score += reward # aux_reward does not count towards the score
if self.aux_reward_max: if self.aux_reward_max:
reward += self._get_aux_reward() reward += self._get_aux_reward()
done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \ done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \
@ -200,7 +207,7 @@ class ColumbusEnv(gym.Env):
# will get rescaled acording to fps (=reward per second) # will get rescaled acording to fps (=reward per second)
self.new_reward = 0 self.new_reward = 0
self.new_abs_reward = 0 # will not get rescaled. should be used for one-time rewards self.new_abs_reward = 0 # will not get rescaled. should be used for one-time rewards
self.score = 0 self.score = self.start_score
self.entities = [] self.entities = []
self.timers = [] self.timers = []
self.agent = entities.Agent(self) self.agent = entities.Agent(self)
@ -456,7 +463,7 @@ class ColumbusComp(ColumbusEnv):
class ColumbusSingle(ColumbusEnv): class ColumbusSingle(ColumbusEnv):
def __init__(self, observable=observables.CompositionalObservable([observables.RayObservable(num_rays=6, chans=[entities.Enemy]), observables.StateObservable(coordsAgent=False, speedAgent=False, coordsRelativeToAgent=True, coordsRewards=True, rewardsWhitelist=None, coordsEnemys=False, enemysWhitelist=None, enemysNoBarriers=True, rewardsTimeouts=False, include_rand=True)]), hide_map=False, fps=30, env_seed=None, aux_reward_max=10, enemy_damage=1, reward_reward=25, void_damage=1, **kw): def __init__(self, observable=observables.CompositionalObservable([observables.RayObservable(num_rays=6, chans=[entities.Enemy]), observables.StateObservable(coordsAgent=False, speedAgent=False, coordsRelativeToAgent=True, coordsRewards=True, rewardsWhitelist=None, coordsEnemys=False, enemysWhitelist=None, enemysNoBarriers=True, rewardsTimeouts=False, include_rand=True)]), hide_map=False, fps=30, env_seed=None, aux_reward_max=1, enemy_damage=1, reward_reward=25, void_damage=1, **kw):
super().__init__( super().__init__(
observable=observable, fps=fps, env_seed=env_seed, aux_reward_max=aux_reward_max, void_damage=void_damage, **kw) observable=observable, fps=fps, env_seed=env_seed, aux_reward_max=aux_reward_max, void_damage=void_damage, **kw)
self.draw_entities = not hide_map self.draw_entities = not hide_map
@ -551,6 +558,63 @@ class ColumbusFootball(ColumbusEnv):
self.entities.append(entities.FlyingFootballPlayer(self, ball)) self.entities.append(entities.FlyingFootballPlayer(self, ball))
def parseObs(obsConf):
if type(obsConf) == list:
obs = []
for i, c in enumerate(obsConf):
obs.append(parseObs(c))
return observables.CompositionalObservable(obs)
if obsConf['type'] == 'State':
conf = {k: v for k, v in obsConf.items() if k not in ['type']}
return observables.StateObservable(conf)
elif obsConf['type'] == 'RayCast':
chans = []
for chan in obsConf.get('chans', []):
chans.append(getattr(entities, chan))
conf = {k: v for k, v in obsConf.items() if k not in ['type', 'chans']}
return observables.RayObservable(chans=chans, **conf)
elif obsConf['type'] == 'CNN':
conf = {k: v for k, v in obsConf.items() if k not in ['type']}
return observables.CnnObservable(**conf)
else:
raise Exception('Unknown Observable selected')
class ColumbusConfigDefined(ColumbusEnv):
def __init__(self, observable={}, env_seed=None, entities=[], fps=30, **kw):
super().__init__(
observable=observable, fps=fps, env_seed=env_seed, **kw)
self.entities_definitions = entities
def setup(self):
self.agent.pos = self.start_pos
for i, e in enumerate(self.entities_definitions):
Entity = getattr(entities, e['type'])
for i in range(e.get('num', 1) + int(self.random()*(0.99+e.get('num_rand', 0)))):
entity = Entity(self)
conf = {k: v for k, v in e.items() if str(
k) not in ['num', 'num_rand', 'type']}
for k, v in conf.items():
if k.endswith('_rand'):
n = k.replace('_rand', '')
cur = getattr(
entity, n)
inc = int((v+0.99)*self.random())
setattr(entity, n, cur + inc)
elif k.endswith('_randf'):
n = k.replace('_randf', '')
cur = getattr(
entity, n)
inc = v*self.random()
setattr(entity, n, cur + inc)
else:
setattr(entity, k, v)
self.entities.append(entity)
### ###
register( register(
id='ColumbusTestCnn-v0', id='ColumbusTestCnn-v0',
@ -629,3 +693,9 @@ register(
entry_point=ColumbusSingle, entry_point=ColumbusSingle,
max_episode_steps=30*60*2, max_episode_steps=30*60*2,
) )
register(
id='ColumbusConfigDefined-v0',
entry_point=ColumbusConfigDefined,
max_episode_steps=30*60*2,
)

View File

@ -3,6 +3,7 @@ import numpy as np
import pygame import pygame
import math import math
from columbus import entities from columbus import entities
import torch as th
class Observable(): class Observable():
@ -41,7 +42,7 @@ class CnnObservable(Observable):
def get_observation_space(self): def get_observation_space(self):
return spaces.Box(low=0, high=255, return spaces.Box(low=0, high=255,
shape=(self.out_width, self.out_height, 3), dtype=np.uint8) shape=(self.out_width, self.out_height, 3), dtype=np.float32)
def get_observation(self): def get_observation(self):
if not self.env._rendered: if not self.env._rendered:
@ -109,7 +110,7 @@ class RayObservable(Observable):
for entity in entities_l: for entity in entities_l:
if isinstance(entity, entity_type) or (self.env.void_barrier and isinstance(entity, entities.Void) and entity_type == entities.Enemy): if isinstance(entity, entity_type) or (self.env.void_barrier and isinstance(entity, entities.Void) and entity_type == entities.Enemy):
if isinstance(entity, entities.Void): if isinstance(entity, entities.Void):
if 0 >= pos[0] or pos[0] >= self.env.width or 0 >= pos[1] or pos[1] >= self.env.width: if not self.env.torus_topology and (0 >= pos[0] or pos[0] >= self.env.width or 0 >= pos[1] or pos[1] >= self.env.width):
return True return True
else: else:
if entity.shape != 'circle': if entity.shape != 'circle':
@ -147,6 +148,8 @@ class RayObservable(Observable):
rx, ry = sx + \ rx, ry = sx + \
self.env.agent.pos[0]*self.env.width, sy + \ self.env.agent.pos[0]*self.env.width, sy + \
self.env.agent.pos[1]*self.env.height self.env.agent.pos[1]*self.env.height
if self.env.torus_topology:
rx, ry = rx % self.env.width, ry % self.env.height
if self._check_collision((rx, ry), entity_type, entities): if self._check_collision((rx, ry), entity_type, entities):
self.rays[r, c] = (self.num_steps-s)/self.num_steps self.rays[r, c] = (self.num_steps-s)/self.num_steps
if self.occlusion: if self.occlusion:
@ -162,6 +165,8 @@ class RayObservable(Observable):
rx, ry = sx + \ rx, ry = sx + \
self.env.agent.pos[0]*self.env.width, sy + \ self.env.agent.pos[0]*self.env.width, sy + \
self.env.agent.pos[1]*self.env.height self.env.agent.pos[1]*self.env.height
if self.env.torus_topology:
rx, ry = rx % self.env.width, ry % self.env.height
# TODO: How stupid do I want to code? # TODO: How stupid do I want to code?
# This instanciates an Object for every Ray-hit, # This instanciates an Object for every Ray-hit,
# just to get the color for the visual. # just to get the color for the visual.
@ -273,8 +278,11 @@ class CompositionalObservable(Observable):
for i, obs in enumerate(self.observables): for i, obs in enumerate(self.observables):
space = obs.get_observation_space() space = obs.get_observation_space()
num += math.prod(space.shape) num += math.prod(space.shape)
try: # TODO: lol
low = min(low, float(space.low[0].item())) low = min(low, float(space.low[0].item()))
high = max(high, float(space.high[0].item())) high = max(high, float(space.high[0].item()))
except:
pass
if False: if False:
if not i: if not i:
low = space.low low = space.low
@ -286,9 +294,9 @@ class CompositionalObservable(Observable):
shape=(num,), dtype=np.float32) shape=(num,), dtype=np.float32)
def get_observation(self): def get_observation(self):
o = [float(point) o = [th.reshape(th.Tensor(obs.get_observation()), (-1,))
for obs in self.observables for point in obs.get_observation()] for obs in self.observables]
o = np.array(o) o = th.hstack(o)
return o return o
def draw(self): def draw(self):