initial commit
This commit is contained in:
commit
65cd0516cd
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
__pychache__
|
||||
*.pyc
|
||||
*.pyo
|
3
README.md
Normal file
3
README.md
Normal file
@ -0,0 +1,3 @@
|
||||
# Project Columbus
|
||||
|
||||
Project Columbus is a framework for trivial 2D OpenAI Gym environments that are supposed to test a agents ability to solve tasks that require different forms of exploration effectively and efficiently.
|
195
entities.py
Normal file
195
entities.py
Normal file
@ -0,0 +1,195 @@
|
||||
import pygame
|
||||
import math
|
||||
|
||||
|
||||
class Entity(object):
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
self.pos = (env.random(), env.random())
|
||||
self.speed = (0, 0)
|
||||
self.acc = (0, 0)
|
||||
self.drag = 0
|
||||
self.radius = 10
|
||||
self.col = (255, 255, 255)
|
||||
self.shape = 'circle'
|
||||
|
||||
def physics_step(self):
|
||||
x, y = self.pos
|
||||
vx, vy = self.speed
|
||||
ax, ay = self.acc
|
||||
vx, vy = vx+ax*self.env.acc_fac, vy+ay*self.env.acc_fac
|
||||
x, y = x+vx*self.env.speed_fac, y+vy*self.env.speed_fac
|
||||
if x > 1 or x < 0:
|
||||
x = min(max(x, 0), 1)
|
||||
vx = 0
|
||||
if y > 1 or y < 0:
|
||||
y = min(max(y, 0), 1)
|
||||
vy = 0
|
||||
self.speed = vx/(1+self.drag), vy/(1+self.drag)
|
||||
self.pos = x, y
|
||||
|
||||
def controll_step(self):
|
||||
pass
|
||||
|
||||
def step(self):
|
||||
self.controll_step()
|
||||
self.physics_step()
|
||||
|
||||
def draw(self):
|
||||
x, y = self.pos
|
||||
pygame.draw.circle(self.env.surface, self.col,
|
||||
(x*self.env.width, y*self.env.height), self.radius, width=0)
|
||||
|
||||
def on_collision(self, other):
|
||||
pass
|
||||
|
||||
def kill(self):
|
||||
self.env.kill_entity(self)
|
||||
|
||||
|
||||
class Agent(Entity):
|
||||
def __init__(self, env):
|
||||
super(Agent, self).__init__(env)
|
||||
self.pos = (0.5, 0.5)
|
||||
self.col = (0, 0, 255)
|
||||
self.drag = self.env.agent_drag
|
||||
self.controll_type = self.env.controll_type
|
||||
|
||||
def controll_step(self):
|
||||
self._read_input()
|
||||
self.env.check_collisions_for(self)
|
||||
|
||||
def _read_input(self):
|
||||
if self.controll_type == 'SPEED':
|
||||
self.speed = self.env.inp[0] - 0.5, self.env.inp[1] - 0.5
|
||||
elif self.controll_type == 'ACC':
|
||||
self.acc = self.env.inp[0] - 0.5, self.env.inp[1] - 0.5
|
||||
else:
|
||||
raise Exception('Unsupported controll_type')
|
||||
|
||||
|
||||
class Enemy(Entity):
|
||||
def __init__(self, env):
|
||||
super(Enemy, self).__init__(env)
|
||||
self.col = (255, 0, 0)
|
||||
self.damage = 10
|
||||
|
||||
def on_collision(self, other):
|
||||
if isinstance(other, Agent):
|
||||
self.env.new_reward -= self.damage
|
||||
|
||||
|
||||
class Barrier(Enemy):
|
||||
def __init__(self, env):
|
||||
super(Barrier, self).__init__(env)
|
||||
|
||||
|
||||
class CircleBarrier(Barrier):
|
||||
def __init__(self, env):
|
||||
super(CircleBarrier, self).__init__(env)
|
||||
|
||||
|
||||
class Chaser(Enemy):
|
||||
def __init__(self, env):
|
||||
super(Chaser, self).__init__(env)
|
||||
self.target = self.env.agent
|
||||
self.arrow_fak = 100
|
||||
self.lookahead = 0
|
||||
|
||||
def _get_arrow(self):
|
||||
tx, ty = self.target.pos
|
||||
x, y = self.pos
|
||||
fx, fy = x + self.speed[0]*self.lookahead*self.env.speed_fac, y + \
|
||||
self.speed[1]*self.lookahead*self.env.speed_fac
|
||||
dx, dy = (tx-fx)*self.arrow_fak, (ty-fy)*self.arrow_fak
|
||||
return self.env._limit_to_unit_circle((dx, dy))
|
||||
|
||||
|
||||
class WalkingChaser(Chaser):
|
||||
def __init__(self, env):
|
||||
super(WalkingChaser, self).__init__(env)
|
||||
self.col = (255, 0, 0)
|
||||
self.chase_speed = 0.45
|
||||
|
||||
def controll_step(self):
|
||||
arrow = self._get_arrow()
|
||||
self.speed = arrow[0] * self.chase_speed, arrow[1] * self.chase_speed
|
||||
|
||||
|
||||
class FlyingChaser(Chaser):
|
||||
def __init__(self, env):
|
||||
super(FlyingChaser, self).__init__(env)
|
||||
self.col = (255, 0, 0)
|
||||
self.chase_acc = 0.5
|
||||
self.arrow_fak = 5
|
||||
self.lookahead = 8 + env.random()*2
|
||||
|
||||
def controll_step(self):
|
||||
arrow = self._get_arrow()
|
||||
self.acc = arrow[0] * self.chase_acc, arrow[1] * self.chase_acc
|
||||
|
||||
|
||||
class Reward(Entity):
|
||||
def __init__(self, env):
|
||||
super(Reward, self).__init__(env)
|
||||
self.col = (0, 255, 0)
|
||||
self.avaible = True
|
||||
self.enforce_not_on_barrier = False
|
||||
self.reward = 1
|
||||
|
||||
def on_collision(self, other):
|
||||
if isinstance(other, Agent):
|
||||
self.on_collect()
|
||||
elif isinstance(other, Barrier):
|
||||
self.on_barrier_collision()
|
||||
|
||||
def on_collect(self):
|
||||
self.env.new_reward += self.reward
|
||||
|
||||
def on_barrier_collision(self):
|
||||
if self.enforce_not_on_barrier:
|
||||
self.pos = (self.env.random(), self.env.random())
|
||||
self.env.check_collisions_for(self)
|
||||
|
||||
|
||||
class OnceReward(Reward):
|
||||
def __init__(self, env):
|
||||
super(OnceReward, self).__init__(env)
|
||||
self.reward = 100
|
||||
|
||||
def on_collect(self):
|
||||
self.env.new_abs_reward += self.reward
|
||||
self.kill()
|
||||
|
||||
|
||||
class TeleportingReward(OnceReward):
|
||||
def __init__(self, env):
|
||||
super(TeleportingReward, self).__init__(env)
|
||||
self.enforce_not_on_barrier = True
|
||||
self.env.check_collisions_for(self)
|
||||
|
||||
def on_collect(self):
|
||||
self.env.new_abs_reward += self.reward
|
||||
self.pos = (self.env.random(), self.env.random())
|
||||
self.env.check_collisions_for(self)
|
||||
|
||||
|
||||
class TimeoutReward(OnceReward):
|
||||
def __init__(self, env):
|
||||
super(TimeoutReward, self).__init__(env)
|
||||
self.enforce_not_on_barrier = True
|
||||
self.env.check_collisions_for(self)
|
||||
self.timeout = 10
|
||||
|
||||
def set_avaible(self, value):
|
||||
self.avaible = value
|
||||
if self.avaible:
|
||||
self.col = (0, 255, 0)
|
||||
else:
|
||||
self.col = (50, 100, 50)
|
||||
|
||||
def on_collect(self):
|
||||
if self.avaible:
|
||||
self.env.new_abs_reward += self.reward
|
||||
self.set_avaible(False)
|
||||
self.env.timers.append((self.timeout, self.set_avaible, True))
|
192
env.py
Normal file
192
env.py
Normal file
@ -0,0 +1,192 @@
|
||||
import gym
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
import pygame
|
||||
import random as random_dont_use
|
||||
import math
|
||||
import entities
|
||||
import observables
|
||||
|
||||
|
||||
class Base2DExpEnv(gym.Env):
|
||||
metadata = {'render.modes': ['human']}
|
||||
|
||||
def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1):
|
||||
super(Base2DExpEnv, self).__init__()
|
||||
self.action_space = spaces.Box(
|
||||
low=0, high=1, shape=(2,), dtype=np.float32)
|
||||
observable._set_env(self)
|
||||
self.observable = observable
|
||||
self.observation_space = self.observable.get_observation_space()
|
||||
self.title = 'Untitled'
|
||||
self.fps = fps
|
||||
self.env_seed = env_seed
|
||||
self.joystick_offset = (10, 10)
|
||||
self.surface = None
|
||||
self.screen = None
|
||||
self.width = 720
|
||||
self.height = 720
|
||||
self.speed_fac = 0.01/fps*60
|
||||
self.acc_fac = 0.03/fps*60
|
||||
self.agent_drag = 0 # 0.01 is a good value
|
||||
self.controll_type = 'SPEED' # one of SPEED, ACC
|
||||
self.limit_inp_to_unit_circle = True
|
||||
self.aux_reward_max = 0 # 0 = off
|
||||
self.aux_reward_discretize = 0 # 0 = dont discretize
|
||||
self.draw_observable = True
|
||||
self.draw_joystick = True
|
||||
|
||||
self.rng = random_dont_use.Random()
|
||||
self.reset()
|
||||
|
||||
def _seed(self, seed):
|
||||
self.rng.seed(seed)
|
||||
|
||||
def random(self):
|
||||
return self.rng.random()
|
||||
|
||||
def _ensure_surface(self):
|
||||
if not self.surface:
|
||||
self.surface = pygame.Surface((self.width, self.height))
|
||||
self.screen = pygame.display.set_mode((self.width, self.height))
|
||||
pygame.display.set_caption(self.title)
|
||||
|
||||
def _limit_to_unit_circle(self, coords):
|
||||
l_sq = coords[0]**2 + coords[1]**2
|
||||
if l_sq > 1:
|
||||
l = math.sqrt(l_sq)
|
||||
coords = coords[0] / l, coords[1] / l
|
||||
return coords
|
||||
|
||||
def _step_entities(self):
|
||||
for entity in self.entities:
|
||||
entity.step()
|
||||
|
||||
def _step_timers(self):
|
||||
new_timers = []
|
||||
for time_left, func, arg in self.timers:
|
||||
time_left -= 1/self.fps
|
||||
if time_left < 0:
|
||||
func(arg)
|
||||
else:
|
||||
new_timers.append((time_left, func, arg))
|
||||
self.timers = new_timers
|
||||
|
||||
def sq_dist(self, entity1, entity2):
|
||||
return (entity1.pos[0] - entity2.pos[0])**2 + (entity1.pos[1] - entity2.pos[1])**2
|
||||
|
||||
def dist(self, entity1, entity2):
|
||||
return math.sqrt(self._sq_dist(entity1, entity2))
|
||||
|
||||
def _get_aux_reward(self):
|
||||
aux_reward = 0
|
||||
for entity in self.entities:
|
||||
if isinstance(entity, entities.Reward):
|
||||
if entity.avaible:
|
||||
reward = self.aux_reward_max / \
|
||||
(1 + self.sq_dist(entity, self.agent))
|
||||
|
||||
if self.aux_reward_discretize:
|
||||
reward = int(reward*self.aux_reward_discretize*2) / \
|
||||
self.aux_reward_discretize / 2
|
||||
|
||||
aux_reward += reward
|
||||
return aux_reward
|
||||
|
||||
def step(self, action):
|
||||
inp = action[0], action[1]
|
||||
if self.limit_inp_to_unit_circle:
|
||||
inp = self._limit_to_unit_circle(((inp[0]-0.5)*2, (inp[1]-0.5)*2))
|
||||
inp = (inp[0]+1)/2, (inp[1]+1)/2
|
||||
self.inp = inp
|
||||
self._step_timers()
|
||||
self._step_entities()
|
||||
observation = self.observable.get_observation()
|
||||
reward, self.new_reward, self.new_abs_reward = self.new_reward / \
|
||||
self.fps + self.new_abs_reward, 0, 0
|
||||
self.score += reward # aux_reward does not count towards the score
|
||||
if self.aux_reward_max:
|
||||
reward += self._get_aux_reward()
|
||||
return observation, reward, 0, self.score
|
||||
return observation, reward, done, info
|
||||
|
||||
def check_collisions_for(self, entity):
|
||||
for other in self.entities:
|
||||
if other != entity:
|
||||
sq_dist = ((other.pos[0]-entity.pos[0])*self.width) ** 2 \
|
||||
+ ((other.pos[1]-entity.pos[1])*self.height)**2
|
||||
if sq_dist < (entity.radius + other.radius)**2:
|
||||
entity.on_collision(other)
|
||||
other.on_collision(entity)
|
||||
|
||||
def kill_entity(self, target):
|
||||
newEntities = []
|
||||
for entity in self.entities:
|
||||
if target != entity:
|
||||
newEntities.append(entity)
|
||||
else:
|
||||
del target
|
||||
break
|
||||
self.entities = newEntities
|
||||
|
||||
def setup(self):
|
||||
for i in range(16):
|
||||
enemy = entities.CircleBarrier(self)
|
||||
enemy.radius = self.random()*40+50
|
||||
self.entities.append(enemy)
|
||||
for i in range(3):
|
||||
enemy = entities.FlyingChaser(self)
|
||||
enemy.chase_acc = self.random()*0.4*0.3 # *0.6+0.5
|
||||
self.entities.append(enemy)
|
||||
for i in range(0):
|
||||
reward = entities.TimeoutReward(self)
|
||||
self.entities.append(reward)
|
||||
for i in range(1):
|
||||
reward = entities.TeleportingReward(self)
|
||||
self.entities.append(reward)
|
||||
|
||||
def reset(self):
|
||||
pygame.init()
|
||||
self.inp = (0.5, 0.5)
|
||||
# will get rescaled acording to fps (=reward per second)
|
||||
self.new_reward = 0
|
||||
self.new_abs_reward = 0 # will not get rescaled. should be used for one-time rewards
|
||||
self.score = 0
|
||||
self.entities = []
|
||||
self.timers = []
|
||||
self.agent = entities.Agent(self)
|
||||
self.setup()
|
||||
self.entities.append(self.agent) # add it last, will be drawn on top
|
||||
self._seed(self.env_seed)
|
||||
return 0
|
||||
return observation # reward, done, info can't be included
|
||||
|
||||
def _draw_entities(self):
|
||||
for entity in self.entities:
|
||||
entity.draw()
|
||||
|
||||
def _draw_observable(self, forceDraw=False):
|
||||
if self.draw_observable or forceDraw:
|
||||
self.observable.draw()
|
||||
|
||||
def _draw_joystick(self, forceDraw=False):
|
||||
if self.draw_joystick:
|
||||
x, y = self.inp
|
||||
pygame.draw.circle(self.screen, (100, 100, 100), (50 +
|
||||
self.joystick_offset[0], 50+self.joystick_offset[1]), 50, width=1)
|
||||
pygame.draw.circle(self.screen, (100, 100, 100), (20+int(60*x) +
|
||||
self.joystick_offset[0], 20+int(60*y)+self.joystick_offset[1]), 20, width=0)
|
||||
|
||||
def render(self, mode='human'):
|
||||
self._ensure_surface()
|
||||
pygame.draw.rect(self.surface, (0, 0, 0),
|
||||
pygame.Rect(0, 0, self.width, self.height))
|
||||
self._draw_entities()
|
||||
self.screen.blit(self.surface, (0, 0))
|
||||
self._draw_observable()
|
||||
self._draw_joystick()
|
||||
pygame.display.update()
|
||||
|
||||
def close(self):
|
||||
pygame.display.quit()
|
||||
pygame.quit()
|
43
humanPlayer.py
Normal file
43
humanPlayer.py
Normal file
@ -0,0 +1,43 @@
|
||||
from time import sleep, time
|
||||
from env import Base2DExpEnv
|
||||
import numpy as np
|
||||
import pygame
|
||||
|
||||
from observables import Observable, CnnObservable
|
||||
|
||||
|
||||
def main():
|
||||
env = Base2DExpEnv(fps=60, observable=CnnObservable())
|
||||
playEnv(env)
|
||||
env.close()
|
||||
|
||||
|
||||
def playEnv(env):
|
||||
env.reset()
|
||||
done = False
|
||||
while not done:
|
||||
t1 = time()
|
||||
env.render()
|
||||
pos = (0.5, 0.5)
|
||||
for event in pygame.event.get():
|
||||
pass
|
||||
# if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
# pos = pygame.mouse.get_pos()
|
||||
# print(pos)
|
||||
pos = pygame.mouse.get_pos()
|
||||
pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
|
||||
min(max((pos[1]-env.joystick_offset[1]-20)/60, 0), 1))
|
||||
obs, rew, done, info = env.step(np.array(pos, dtype=np.float32))
|
||||
print('Reward: '+str(rew))
|
||||
print('Score: '+str(info))
|
||||
t2 = time()
|
||||
dt = t2 - t1
|
||||
delay = (1/env.fps - dt)
|
||||
if delay < 0:
|
||||
print("[!] Can't keep framerate!")
|
||||
else:
|
||||
sleep(delay)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
69
observables.py
Normal file
69
observables.py
Normal file
@ -0,0 +1,69 @@
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
import pygame
|
||||
|
||||
|
||||
class Observable():
|
||||
def __init__(self):
|
||||
self.obs = None
|
||||
pass
|
||||
|
||||
def get_observation_space():
|
||||
print("[!] Using dummyObservable. Env won't output anything")
|
||||
return spaces.Box(low=0, high=255,
|
||||
shape=(1,), dtype=np.uint8)
|
||||
|
||||
|
||||
class CnnObservable(Observable):
|
||||
def __init__(self, in_width=256, in_height=256, out_width=32, out_height=32, draw_width=128, draw_height=128, smooth_scaling=True):
|
||||
super(CnnObservable, self).__init__()
|
||||
self.in_width = in_width
|
||||
self.in_height = in_height
|
||||
self.out_width = out_width
|
||||
self.out_height = out_height
|
||||
self.draw_width = draw_width
|
||||
self.draw_height = draw_height
|
||||
if smooth_scaling:
|
||||
self.scaler = pygame.transform.smoothscale
|
||||
else:
|
||||
self.scaler = pygame.transform.scale
|
||||
|
||||
def _set_env(self, env):
|
||||
self.env = env
|
||||
|
||||
def get_observation_space(self):
|
||||
return spaces.Box(low=0, high=255,
|
||||
shape=(self.out_width, self.out_height), dtype=np.uint8)
|
||||
|
||||
def get_observation(self):
|
||||
x, y = self.env.agent.pos[0]*self.env.width - self.in_width / \
|
||||
2, self.env.agent.pos[1]*self.env.height - self.in_height/2
|
||||
w, h = self.in_width, self.in_height
|
||||
cx, cy = _clip(x, 0, self.env.width), _clip(
|
||||
y, 0, self.env.height)
|
||||
cw, ch = _clip(w, 0, self.env.width - cx), _clip(h,
|
||||
0, self.env.height - cy)
|
||||
rect = pygame.Rect(cx, cy, cw, ch)
|
||||
snap = self.env.surface.subsurface(rect)
|
||||
self.snap = pygame.Surface((self.in_width, self.in_height))
|
||||
pygame.draw.rect(self.snap, (50, 50, 50),
|
||||
pygame.Rect(0, 0, self.in_width, self.in_height))
|
||||
self.snap.blit(snap, (cx - x, cy - y))
|
||||
self.obs = self.scaler(
|
||||
self.snap, (self.out_width, self.out_height))
|
||||
return self.obs
|
||||
|
||||
def draw(self):
|
||||
if not self.obs:
|
||||
self.get_observation()
|
||||
big = pygame.transform.scale(
|
||||
self.obs, (self.draw_width, self.draw_height))
|
||||
x, y = self.env.width - self.draw_width - 10, 10
|
||||
pygame.draw.rect(self.env.screen, (50, 50, 50),
|
||||
pygame.Rect(x - 1, y - 1, self.draw_width + 2, self.draw_height + 2))
|
||||
self.env.screen.blit(
|
||||
big, (x, y))
|
||||
|
||||
|
||||
def _clip(num, lower, upper):
|
||||
return min(max(num, lower), upper)
|
Loading…
Reference in New Issue
Block a user