Compare commits

...

3 Commits

3 changed files with 98 additions and 11 deletions

View File

@ -3,10 +3,16 @@ import math
class Entity(object): class Entity(object):
def __call__(cls, *args, **kwargs):
obj = type.__call__(cls, *args, **kwargs)
obj.__post_init__()
return obj
def __init__(self, env): def __init__(self, env):
self.shape = None self.shape = None
self.env = env self.env = env
self.pos = (env.random(), env.random()) self.pos = (env.random(), env.random())
self.last_pos = None
self.speed = (0, 0) self.speed = (0, 0)
self.acc = (0, 0) self.acc = (0, 0)
self.drag = 0 self.drag = 0
@ -14,13 +20,20 @@ class Entity(object):
self.solid = False self.solid = False
self.movable = False # False = Non movable, True = Movable, x>1: lighter movable self.movable = False # False = Non movable, True = Movable, x>1: lighter movable
self.elasticity = 1 self.elasticity = 1
#self.collision_changes_speed = True
self.collision_changes_speed = self.env.controll_type == 'ACC' self.collision_changes_speed = self.env.controll_type == 'ACC'
self.collision_elasticity = self.env.default_collision_elasticity self.collision_elasticity = self.env.default_collision_elasticity
self._crash_list = [] self._crash_list = []
self._coll_add_pushback = 0 self._coll_add_pushback = 0
self.crash_conservation_of_energy = True
self.draw_path = False
self.draw_path_col = (55, 55, 55)
self.draw_path_width = 2
def __post_init__(self):
pass
def physics_step(self): def physics_step(self):
self.last_pos = self.pos[0], self.pos[1]
x, y = self.pos x, y = self.pos
vx, vy = self.speed vx, vy = self.speed
ax, ay = self.acc ax, ay = self.acc
@ -47,8 +60,9 @@ class Entity(object):
self._crash_list = [] self._crash_list = []
def draw(self): def draw(self):
raise Exception( if self.draw_path and self.last_pos:
'[!] draw not implemented for shape "'+str(self.shape)+'"') pygame.draw.line(self.env.path_overlay, self.draw_path_col,
(self.last_pos[0]*self.env.width, self.last_pos[1]*self.env.height), (self.pos[0]*self.env.width, self.pos[1]*self.env.height), self.draw_path_width)
def on_collision(self, other, depth): def on_collision(self, other, depth):
if self.solid and other.solid: if self.solid and other.solid:
@ -86,7 +100,7 @@ class Entity(object):
force_vec[0]*self.collision_elasticity/self.env.speed_fac, self.speed[1] + \ force_vec[0]*self.collision_elasticity/self.env.speed_fac, self.speed[1] + \
force_vec[1]*self.collision_elasticity/self.env.speed_fac force_vec[1]*self.collision_elasticity/self.env.speed_fac
newspeed = math.sqrt(self.speed[0]**2+self.speed[1]**2) newspeed = math.sqrt(self.speed[0]**2+self.speed[1]**2)
if newspeed > oldspeed*1.1: if self.crash_conservation_of_energy and newspeed > oldspeed*1.1:
self.speed = self.speed[0]/newspeed*1.1 * \ self.speed = self.speed[0]/newspeed*1.1 * \
oldspeed, self.speed[1]/newspeed*oldspeed*1.1 oldspeed, self.speed[1]/newspeed*oldspeed*1.1
@ -121,6 +135,7 @@ class CircularEntity(Entity):
self.radius = 10 self.radius = 10
def draw(self): def draw(self):
super().draw()
x, y = self.pos x, y = self.pos
pygame.draw.circle(self.env.surface, self.col, pygame.draw.circle(self.env.surface, self.col,
(x*self.env.width, y*self.env.height), self.radius, width=0) (x*self.env.width, y*self.env.height), self.radius, width=0)
@ -190,6 +205,7 @@ class RectangularEntity(Entity):
self.height = 10 self.height = 10
def draw(self): def draw(self):
super().draw()
x, y = self.pos x, y = self.pos
rect = pygame.Rect(x*self.env.width, y * rect = pygame.Rect(x*self.env.width, y *
self.env.width, self.width, self.height) self.env.width, self.width, self.height)
@ -351,6 +367,39 @@ class TeleportingReward(OnceReward):
self.env.check_collisions_for(self) self.env.check_collisions_for(self)
class LoopReward(OnceReward):
def __init__(self, env):
super().__init__(env)
self.loop = [[0.25, 0.5], [0.75, 0.5]]
self.state = 0
self.jump_to_state()
self.barrier_physics = False
def jump_to_state(self):
pos_vec = [v for v in self.loop[self.state]]
if len(pos_vec) == 4:
pos_vec = pos_vec[0] + pos_vec[2] * \
(self.env.random()-0.5), pos_vec[1] + \
pos_vec[3]*(self.env.random()-0.5)
self.pos = pos_vec
def next_state(self):
self.state = (self.state + 1) % len(self.loop)
def jump_next(self):
self.next_state()
self.jump_to_state()
def on_collected(self):
self.env.new_abs_reward += self.reward
self.jump_next()
def physics_step(self):
if self.barrier_physics:
self.env.check_collisions_for(self)
super().physics_step()
class TimeoutReward(OnceReward): class TimeoutReward(OnceReward):
def __init__(self, env): def __init__(self, env):
super(TimeoutReward, self).__init__(env) super(TimeoutReward, self).__init__(env)

View File

@ -45,7 +45,7 @@ def parseObs(obsConf):
class ColumbusEnv(gym.Env): class ColumbusEnv(gym.Env):
metadata = {'render.modes': ['human']} metadata = {'render.modes': ['human']}
def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, master_seed=None, start_pos=(0.5, 0.5), start_score=0, speed_fac=0.01, acc_fac=0.04, die_on_zero=False, return_on_score=-1, reward_mult=1, agent_drag=0, controll_type='SPEED', aux_reward_max=1, aux_penalty_max=0, aux_reward_discretize=0, void_is_type_barrier=True, void_damage=1, torus_topology=False, default_collision_elasticity=1): def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, master_seed=None, start_pos=(0.5, 0.5), start_score=0, speed_fac=0.01, acc_fac=0.04, die_on_zero=False, return_on_score=-1, reward_mult=1, agent_drag=0, controll_type='SPEED', aux_reward_max=1, aux_penalty_max=0, aux_reward_discretize=0, void_is_type_barrier=True, void_damage=1, torus_topology=False, default_collision_elasticity=1, terminate_on_reward=False, agent_draw_path=False):
super(ColumbusEnv, self).__init__() super(ColumbusEnv, self).__init__()
self.action_space = spaces.Box( self.action_space = spaces.Box(
low=-1, high=1, shape=(2,), dtype=np.float32) low=-1, high=1, shape=(2,), dtype=np.float32)
@ -89,6 +89,8 @@ class ColumbusEnv(gym.Env):
self.void_damage = void_damage self.void_damage = void_damage
self.torus_topology = torus_topology self.torus_topology = torus_topology
self.default_collision_elasticity = default_collision_elasticity self.default_collision_elasticity = default_collision_elasticity
self.terminate_on_reward = terminate_on_reward
self.agent_draw_path = agent_draw_path
self.paused = False self.paused = False
self.keypress_timeout = 0 self.keypress_timeout = 0
@ -121,6 +123,8 @@ class ColumbusEnv(gym.Env):
def _ensure_surface(self): def _ensure_surface(self):
if not self.surface or not self.screen: if not self.surface or not self.screen:
self.surface = pygame.Surface((self.width, self.height)) self.surface = pygame.Surface((self.width, self.height))
self.path_overlay = pygame.Surface(
(self.width, self.height), pygame.SRCALPHA, 32)
if self.visible: if self.visible:
self.screen = pygame.display.set_mode( self.screen = pygame.display.set_mode(
(self.width, self.height)) (self.width, self.height))
@ -200,6 +204,7 @@ class ColumbusEnv(gym.Env):
self._step_timers() self._step_timers()
self._step_entities() self._step_entities()
observation = self.observable.get_observation() observation = self.observable.get_observation()
gotRew = self.new_reward > 0 or self.new_abs_reward > 0
reward, self.new_reward, self.new_abs_reward = self.new_reward / \ reward, self.new_reward, self.new_abs_reward = self.new_reward / \
self.fps + self.new_abs_reward, 0, 0 self.fps + self.new_abs_reward, 0, 0
if not self.torus_topology: if not self.torus_topology:
@ -210,7 +215,7 @@ class ColumbusEnv(gym.Env):
if self.aux_reward_max: if self.aux_reward_max:
reward += self._get_aux_reward() reward += self._get_aux_reward()
done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \ done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \
1 and self.score > self.return_on_score 1 and self.score > self.return_on_score or self.terminate_on_reward and gotRew
info = {'score': self.score, 'reward': reward} info = {'score': self.score, 'reward': reward}
self._rendered = False self._rendered = False
if done: if done:
@ -268,6 +273,7 @@ class ColumbusEnv(gym.Env):
self.entities = [] self.entities = []
self.timers = [] self.timers = []
self.agent = entities.Agent(self) self.agent = entities.Agent(self)
self.agent.draw_path = self.agent_draw_path
self.setup() self.setup()
self.entities.append(self.agent) # add it last, will be drawn on top self.entities.append(self.agent) # add it last, will be drawn on top
self.observable.reset() self.observable.reset()
@ -384,6 +390,7 @@ class ColumbusEnv(gym.Env):
if mode == 'human' and dont_show: if mode == 'human' and dont_show:
return return
self.screen.blit(self.surface, (0, 0)) self.screen.blit(self.surface, (0, 0))
self.screen.blit(self.path_overlay, (0, 0))
self._draw_observable(forceDraw=mode != 'human') self._draw_observable(forceDraw=mode != 'human')
self._draw_joystick(forceDraw=mode != 'human') self._draw_joystick(forceDraw=mode != 'human')
if chol != None: if chol != None:

View File

@ -1,16 +1,17 @@
from time import sleep, time from time import sleep, time
import numpy as np import numpy as np
import pygame import pygame
import yaml
from columbus import env from columbus import env
from columbus.observables import Observable, CnnObservable from columbus.observables import Observable, CnnObservable
def main(): def main():
Env = chooseEnv() env = chooseEnv()
env = Env(fps=30) while True:
env.start_pos = [0.6, 0.3] playEnv(env)
playEnv(env) input('<again?>')
env.close() env.close()
@ -22,6 +23,33 @@ def getAvaibleEnvs():
yield getattr(env, s) yield getattr(env, s)
def loadConfigDefinedEnv(EnvClass):
p = input('[Path to config> ')
with open(p, 'r') as f:
docs = list([d for d in yaml.safe_load_all(
f) if 'name' in d and d['name'] not in ['SLURM']])
for i, doc in enumerate(docs):
name = doc['name']
print('['+str(i)+'] '+name)
ds = int(input('[0]> ') or '0')
doc = docs[ds]
cur = doc
path = 'params.task.env_args'
p = path.split('.')
while True:
try:
if len(p) == 0:
break
key = p.pop(0)
print(key)
cur = cur[key]
except Exception as e:
print('Unable to find key "'+key+'"')
path = input('[Path> ')
print(cur)
return EnvClass(fps=30, **cur)
def chooseEnv(): def chooseEnv():
envs = list(getAvaibleEnvs()) envs = list(getAvaibleEnvs())
for i, Env in enumerate(envs): for i, Env in enumerate(envs):
@ -35,7 +63,10 @@ def chooseEnv():
if i < 0 or i >= len(envs): if i < 0 or i >= len(envs):
print( print(
'[!] That is a number, but not one that makes sense in this context...') '[!] That is a number, but not one that makes sense in this context...')
return envs[i] if envs[i] in [env.ColumbusConfigDefined]:
return loadConfigDefinedEnv(envs[i])
Env = envs[i]
return Env(fps=30)
def playEnv(env): def playEnv(env):