Compare commits
4 Commits
962bc4ff32
...
29854b2b5c
Author | SHA1 | Date | |
---|---|---|---|
29854b2b5c | |||
d998d816a1 | |||
d92758c4fd | |||
db407b0819 |
@ -135,7 +135,7 @@ class Reward(Entity):
|
|||||||
self.col = (0, 255, 0)
|
self.col = (0, 255, 0)
|
||||||
self.avaible = True
|
self.avaible = True
|
||||||
self.enforce_not_on_barrier = False
|
self.enforce_not_on_barrier = False
|
||||||
self.reward = 1
|
self.reward = 10
|
||||||
|
|
||||||
def on_collision(self, other):
|
def on_collision(self, other):
|
||||||
if isinstance(other, Agent):
|
if isinstance(other, Agent):
|
||||||
@ -155,7 +155,7 @@ class Reward(Entity):
|
|||||||
class OnceReward(Reward):
|
class OnceReward(Reward):
|
||||||
def __init__(self, env):
|
def __init__(self, env):
|
||||||
super(OnceReward, self).__init__(env)
|
super(OnceReward, self).__init__(env)
|
||||||
self.reward = 100
|
self.reward = 500
|
||||||
|
|
||||||
def on_collect(self):
|
def on_collect(self):
|
||||||
self.env.new_abs_reward += self.reward
|
self.env.new_abs_reward += self.reward
|
||||||
|
@ -15,7 +15,7 @@ class ColumbusEnv(gym.Env):
|
|||||||
def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1):
|
def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1):
|
||||||
super(ColumbusEnv, self).__init__()
|
super(ColumbusEnv, self).__init__()
|
||||||
self.action_space = spaces.Box(
|
self.action_space = spaces.Box(
|
||||||
low=0, high=1, shape=(2,), dtype=np.float32)
|
low=-1, high=1, shape=(2,), dtype=np.float32)
|
||||||
observable._set_env(self)
|
observable._set_env(self)
|
||||||
self.observable = observable
|
self.observable = observable
|
||||||
self.title = 'Untitled'
|
self.title = 'Untitled'
|
||||||
@ -112,7 +112,8 @@ class ColumbusEnv(gym.Env):
|
|||||||
return aux_reward
|
return aux_reward
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
inp = action[0], action[1]
|
# TODO: Just make the range consistent...
|
||||||
|
inp = (action[0]+1)/2, (action[1]+1)/2
|
||||||
if self._disturb_next:
|
if self._disturb_next:
|
||||||
inp = self._disturb_next
|
inp = self._disturb_next
|
||||||
self._disturb_next = False
|
self._disturb_next = False
|
||||||
@ -322,8 +323,8 @@ class ColumbusCandyland(ColumbusEnv):
|
|||||||
|
|
||||||
|
|
||||||
class ColumbusCandyland_Aux10(ColumbusCandyland):
|
class ColumbusCandyland_Aux10(ColumbusCandyland):
|
||||||
def __init__(self):
|
def __init__(self, fps=30):
|
||||||
super(ColumbusCandyland_Aux10, self).__init__()
|
super(ColumbusCandyland_Aux10, self).__init__(fps=fps)
|
||||||
self.aux_reward_max = 10
|
self.aux_reward_max = 10
|
||||||
|
|
||||||
|
|
||||||
@ -375,41 +376,41 @@ class ColumbusEasierObstacles(ColumbusEnv):
|
|||||||
|
|
||||||
|
|
||||||
class ColumbusJustState(ColumbusEnv):
|
class ColumbusJustState(ColumbusEnv):
|
||||||
def __init__(self, observable=observables.StateObservable(), fps=30, env_seed=None):
|
def __init__(self, observable=observables.StateObservable(), fps=30, num_enemies=0, num_rewards=1, env_seed=None):
|
||||||
super(ColumbusJustState, self).__init__(
|
super(ColumbusJustState, self).__init__(
|
||||||
observable=observable, fps=fps)
|
observable=observable, fps=fps)
|
||||||
self.aux_reward_max = 0.1
|
self.aux_reward_max = 1
|
||||||
|
self.num_enemies = num_enemies
|
||||||
|
self.num_rewards = num_rewards
|
||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
self.agent.pos = self.start_pos
|
self.agent.pos = self.start_pos
|
||||||
# for i in range(2):
|
for i in range(self.num_enemies):
|
||||||
# enemy = entities.WalkingChaser(self)
|
|
||||||
# self.entities.append(enemy)
|
|
||||||
for i in range(3):
|
|
||||||
enemy = entities.FlyingChaser(self)
|
enemy = entities.FlyingChaser(self)
|
||||||
enemy.chase_acc = self.random()*0.4+0.3 # *0.6+0.5
|
enemy.chase_acc = self.random()*0.4+0.3 # *0.6+0.5
|
||||||
self.entities.append(enemy)
|
self.entities.append(enemy)
|
||||||
for i in range(1):
|
for i in range(self.num_rewards):
|
||||||
reward = entities.TeleportingReward(self)
|
reward = entities.TeleportingReward(self)
|
||||||
reward.radius = 30
|
reward.radius = 30
|
||||||
self.entities.append(reward)
|
self.entities.append(reward)
|
||||||
|
|
||||||
|
|
||||||
class ColumbusStateWithBarriers(ColumbusEnv):
|
class ColumbusStateWithBarriers(ColumbusEnv):
|
||||||
def __init__(self, observable=observables.StateObservable(coordsAgent=True, speedAgent=False, coordsRelativeToAgent=False, coordsRewards=True, rewardsWhitelist=None, coordsEnemys=True, enemysWhitelist=None, enemysNoBarriers=True, rewardsTimeouts=False, include_rand=True), fps=30, env_seed=3.141, num_chasers=1):
|
def __init__(self, observable=observables.StateObservable(coordsAgent=True, speedAgent=False, coordsRelativeToAgent=False, coordsRewards=True, rewardsWhitelist=None, coordsEnemys=True, enemysWhitelist=None, enemysNoBarriers=True, rewardsTimeouts=False, include_rand=True), fps=30, env_seed=3.141, num_enemys=0, num_barriers=3):
|
||||||
super(ColumbusStateWithBarriers, self).__init__(
|
super(ColumbusStateWithBarriers, self).__init__(
|
||||||
observable=observable, fps=fps, env_seed=env_seed)
|
observable=observable, fps=fps, env_seed=env_seed)
|
||||||
self.aux_reward_max = 10
|
self.aux_reward_max = 1
|
||||||
self.start_pos = (0.5, 0.5)
|
self.start_pos = (0.5, 0.5)
|
||||||
self.num_chasers = num_chasers
|
self.num_barriers = num_barriers
|
||||||
|
self.num_enemys = num_enemys
|
||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
self.agent.pos = self.start_pos
|
self.agent.pos = self.start_pos
|
||||||
for i in range(3):
|
for i in range(self.num_barriers):
|
||||||
enemy = entities.CircleBarrier(self)
|
enemy = entities.CircleBarrier(self)
|
||||||
enemy.radius = self.random()*25+75
|
enemy.radius = self.random()*25+75
|
||||||
self.entities.append(enemy)
|
self.entities.append(enemy)
|
||||||
for i in range(self.num_chasers):
|
for i in range(self.num_enemys):
|
||||||
enemy = entities.FlyingChaser(self)
|
enemy = entities.FlyingChaser(self)
|
||||||
enemy.chase_acc = 0.55 # *0.6+0.5
|
enemy.chase_acc = 0.55 # *0.6+0.5
|
||||||
self.entities.append(enemy)
|
self.entities.append(enemy)
|
||||||
@ -469,6 +470,12 @@ register(
|
|||||||
max_episode_steps=30*60*2,
|
max_episode_steps=30*60*2,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ColumbusJustState-v0',
|
||||||
|
entry_point=ColumbusJustState,
|
||||||
|
max_episode_steps=30*60*2,
|
||||||
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ColumbusStateWithBarriers-v0',
|
id='ColumbusStateWithBarriers-v0',
|
||||||
entry_point=ColumbusStateWithBarriers,
|
entry_point=ColumbusStateWithBarriers,
|
||||||
|
@ -48,6 +48,7 @@ def playEnv(env):
|
|||||||
pos = pygame.mouse.get_pos()
|
pos = pygame.mouse.get_pos()
|
||||||
pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
|
pos = (min(max((pos[0]-env.joystick_offset[0]-20)/60, 0), 1),
|
||||||
min(max((pos[1]-env.joystick_offset[1]-20)/60, 0), 1))
|
min(max((pos[1]-env.joystick_offset[1]-20)/60, 0), 1))
|
||||||
|
pos = pos[0]*2-1, pos[1]*2-1
|
||||||
obs, rew, done, info = env.step(np.array(pos, dtype=np.float32))
|
obs, rew, done, info = env.step(np.array(pos, dtype=np.float32))
|
||||||
print('Reward: '+str(rew))
|
print('Reward: '+str(rew))
|
||||||
print('Score: '+str(info))
|
print('Score: '+str(info))
|
||||||
|
@ -209,7 +209,7 @@ class StateObservable(Observable):
|
|||||||
return self._entities
|
return self._entities
|
||||||
|
|
||||||
def get_observation_space(self):
|
def get_observation_space(self):
|
||||||
self.env.setup()
|
self.env.reset()
|
||||||
num = len(self.entities)*2+len(self._timeoutEntities) + \
|
num = len(self.entities)*2+len(self._timeoutEntities) + \
|
||||||
self.speedAgent + self.include_rand
|
self.speedAgent + self.include_rand
|
||||||
return spaces.Box(low=0-1*self.coordsRelativeToAgent, high=1,
|
return spaces.Box(low=0-1*self.coordsRelativeToAgent, high=1,
|
||||||
@ -241,10 +241,17 @@ class StateObservable(Observable):
|
|||||||
return np.array(obs)
|
return np.array(obs)
|
||||||
|
|
||||||
def draw(self):
|
def draw(self):
|
||||||
|
ofs = (0 + self.env.height/2*self.coordsRelativeToAgent,
|
||||||
|
0 + self.env.width/2*self.coordsRelativeToAgent)
|
||||||
|
if self.coordsRelativeToAgent:
|
||||||
|
pygame.draw.circle(self.env.screen, self.env.agent.col,
|
||||||
|
(0, self.env.height/2), 3, width=0)
|
||||||
|
pygame.draw.circle(self.env.screen, self.env.agent.col,
|
||||||
|
(self.env.width/2, 0), 3, width=0)
|
||||||
for i in range(int(len(self.obs)/2)):
|
for i in range(int(len(self.obs)/2)):
|
||||||
x, y = self.obs[i*2], self.obs[i*2+1]
|
x, y = self.obs[i*2], self.obs[i*2+1]
|
||||||
col = self.entities[i].col
|
col = self.entities[i].col
|
||||||
pygame.draw.circle(self.env.screen, col,
|
pygame.draw.circle(self.env.screen, col,
|
||||||
(0, y*self.env.height), 1, width=0)
|
(0, y*self.env.height+ofs[0]), 1, width=0)
|
||||||
pygame.draw.circle(self.env.screen, col,
|
pygame.draw.circle(self.env.screen, col,
|
||||||
(x*self.env.width, 0), 1, width=0)
|
(x*self.env.width+ofs[1], 0), 1, width=0)
|
||||||
|
Loading…
Reference in New Issue
Block a user