allow unseeded envs; void acting as barrier (enemy); and registering

gym-envs
This commit is contained in:
Dominik Moritz Roth 2022-06-20 23:10:14 +02:00
parent 1c21da5513
commit 1eb86bef06

View File

@ -1,3 +1,4 @@
from gym.envs.registration import register
import gym import gym
from gym import spaces from gym import spaces
import numpy as np import numpy as np
@ -16,7 +17,6 @@ class ColumbusEnv(gym.Env):
low=0, high=1, shape=(2,), dtype=np.float32) low=0, high=1, shape=(2,), dtype=np.float32)
observable._set_env(self) observable._set_env(self)
self.observable = observable self.observable = observable
self.observation_space = self.observable.get_observation_space()
self.title = 'Untitled' self.title = 'Untitled'
self.fps = fps self.fps = fps
self.env_seed = env_seed self.env_seed = env_seed
@ -40,11 +40,17 @@ class ColumbusEnv(gym.Env):
self.draw_observable = True self.draw_observable = True
self.draw_joystick = True self.draw_joystick = True
self.draw_entities = True self.draw_entities = True
self.void_barrier = True
self.void_damage = 100
self.rng = random_dont_use.Random() self.rng = random_dont_use.Random()
self.reset() self.reset()
self.observation_space = self.observable.get_observation_space()
def _seed(self, seed): def _seed(self, seed):
if seed == None:
seed = random_dont_use.random()
self.rng.seed(seed) self.rng.seed(seed)
def random(self): def random(self):
@ -112,6 +118,9 @@ class ColumbusEnv(gym.Env):
reward, self.new_reward, self.new_abs_reward = self.new_reward / \ reward, self.new_reward, self.new_abs_reward = self.new_reward / \
self.fps + self.new_abs_reward, 0, 0 self.fps + self.new_abs_reward, 0, 0
self.score += reward # aux_reward does not count towards the score self.score += reward # aux_reward does not count towards the score
if self.agent.pos[0] < 0.001 or self.agent.pos[0] > 0.999 \
or self.agent.pos[1] < 0.001 or self.agent.pos[1] > 0.999:
reward -= self.void_damage/self.fps
if self.aux_reward_max: if self.aux_reward_max:
reward += self._get_aux_reward() reward += self._get_aux_reward()
done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \ done = self.die_on_zero and self.score <= 0 or self.return_on_score != - \
@ -150,20 +159,7 @@ class ColumbusEnv(gym.Env):
def setup(self): def setup(self):
self.agent.pos = self.start_pos self.agent.pos = self.start_pos
for i in range(18): # Expand this function
enemy = entities.CircleBarrier(self)
enemy.radius = self.random()*40+50
self.entities.append(enemy)
for i in range(3):
enemy = entities.FlyingChaser(self)
enemy.chase_acc = self.random()*0.4*0.3 # *0.6+0.5
self.entities.append(enemy)
for i in range(0):
reward = entities.TimeoutReward(self)
self.entities.append(reward)
for i in range(1):
reward = entities.TeleportingReward(self)
self.entities.append(reward)
def reset(self): def reset(self):
pygame.init() pygame.init()
@ -221,23 +217,158 @@ class ColumbusEnv(gym.Env):
class ColumbusTest3_1(ColumbusEnv): class ColumbusTest3_1(ColumbusEnv):
def __init__(self): def __init__(self, observable=observables.CnnObservable(out_width=48, out_height=48), fps=30):
super(ColumbusTest3_1, self).__init__( super(ColumbusTest3_1, self).__init__(
observable=observables.CnnObservable(out_width=48, out_height=48)) observable=observable, fps=fps, env_seed=3.1)
self.start_pos = [0.6, 0.3] self.start_pos = [0.6, 0.3]
self.fps = 30
self.score = 0 self.score = 0
self.reward_mult = 0.001
self.aux_reward_max = 1 self.aux_reward_max = 1
def setup(self):
self.agent.pos = self.start_pos
for i in range(18):
enemy = entities.CircleBarrier(self)
enemy.radius = self.random()*40+50
self.entities.append(enemy)
for i in range(3):
enemy = entities.FlyingChaser(self)
enemy.chase_acc = self.random()*0.4*0.3 # *0.6+0.5
self.entities.append(enemy)
for i in range(0):
reward = entities.TimeoutReward(self)
self.entities.append(reward)
for i in range(1):
reward = entities.TeleportingReward(self)
self.entities.append(reward)
class ColumbusTestRay(ColumbusEnv):
def __init__(self, hide_map=False): class ColumbusTestRay(ColumbusTest3_1):
def __init__(self, observable=observables.RayObservable(), hide_map=False, fps=30):
super(ColumbusTestRay, self).__init__( super(ColumbusTestRay, self).__init__(
observable=observables.RayObservable()) observable=observable, fps=fps)
self.start_pos = [0.6, 0.3]
self.fps = 30
self.score = 0
self.reward_mult = 0.001
self.aux_reward_max = 1
self.draw_entities = not hide_map self.draw_entities = not hide_map
class ColumbusRayDrone(ColumbusTestRay):
def __init__(self, observable=observables.RayObservable(), hide_map=False, fps=30):
super(ColumbusRayDrone, self).__init__(
observable=observable, hide_map=hide_map, fps=fps)
self.controll_type = 'ACC'
self.agent_drag = 0.02
class ColumbusCandyland(ColumbusEnv):
def __init__(self, observable=observables.RayObservable(chans=[entities.Reward, entities.Void], num_rays=16, include_rand=True), hide_map=False, fps=30):
super(ColumbusCandyland, self).__init__(
observable=observable, fps=fps)
self.draw_entities = not hide_map
def setup(self):
self.agent.pos = self.start_pos
for i in range(0):
reward = entities.TimeoutReward(self)
reward.radius = 30
self.entities.append(reward)
for i in range(2):
reward = entities.TeleportingReward(self)
reward.radius = 30
self.entities.append(reward)
class ColumbusEasyObstacles(ColumbusEnv):
def __init__(self, observable=observables.RayObservable(num_rays=16), hide_map=False, fps=30, env_seed=None):
super(ColumbusEasyObstacles, self).__init__(
observable=observable, fps=fps)
self.draw_entities = not hide_map
self.aux_reward_max = 0.1
def setup(self):
self.agent.pos = self.start_pos
for i in range(5):
enemy = entities.CircleBarrier(self)
enemy.radius = 30 + self.random()*70
self.entities.append(enemy)
for i in range(2):
reward = entities.TeleportingReward(self)
reward.radius = 30
self.entities.append(reward)
for i in range(1):
enemy = entities.WalkingChaser(self)
enemy.chase_speed = 0.1
self.entities.append(enemy)
class ColumbusRewardEnemyPID(ColumbusEnv):
def __init__(self, observable=observables.StateObservable(), fps=30, env_seed=None):
super(ColumbusRewardEnemyPID, self).__init__(
observable=observable, fps=fps)
self.aux_reward_max = 0.1
def setup(self):
self.agent.pos = self.start_pos
# for i in range(2):
# enemy = entities.WalkingChaser(self)
# self.entities.append(enemy)
for i in range(3):
enemy = entities.FlyingChaser(self)
enemy.chase_acc = self.random()*0.4+0.3 # *0.6+0.5
self.entities.append(enemy)
for i in range(1):
reward = entities.TeleportingReward(self)
reward.radius = 30
self.entities.append(reward)
class ColumbusRewardEnemyPIDWithBarriers(ColumbusEnv):
def __init__(self, observable=observables.StateObservable(), fps=30, env_seed=3.1):
super(ColumbusRewardEnemyPIDWithBarriers, self).__init__(
observable=observable, fps=fps)
self.aux_reward_max = 0.01
self.start_pos = (0.5, 0.5)
def setup(self):
self.agent.pos = self.start_pos
for i in range(3):
enemy = entities.CircleBarrier(self)
enemy.radius = self.random()*25+75
self.entities.append(enemy)
for i in range(3):
enemy = entities.FlyingChaser(self)
enemy.chase_acc = self.random()*0.4+0.3 # *0.6+0.5
self.entities.append(enemy)
for i in range(1):
reward = entities.TeleportingReward(self)
reward.radius = 30
self.entities.append(reward)
###
register(
id='ColumbusTestCnn-v0',
entry_point=ColumbusTest3_1,
max_episode_steps=30*60*5,
)
register(
id='ColumbusTestRay-v0',
entry_point=ColumbusTestRay,
max_episode_steps=30*60*5,
)
register(
id='ColumbusRayDrone-v0',
entry_point=ColumbusRayDrone,
max_episode_steps=30*60*5,
)
register(
id='ColumbusCandyland-v0',
entry_point=ColumbusCandyland,
max_episode_steps=30*60*5,
)
register(
id='ColumbusEasyObstacles-v0',
entry_point=ColumbusEasyObstacles,
max_episode_steps=30*60*2,
)