From d8856e7dc99edb918085290af11b509a8a15a68c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 5 Nov 2022 17:39:13 +0100 Subject: [PATCH] Added predefined color mappers (based on tanh and atan) --- columbus/env.py | 10 +++++++--- columbus/humanPlayer.py | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/columbus/env.py b/columbus/env.py index 6475b9c..1b67f76 100644 --- a/columbus/env.py +++ b/columbus/env.py @@ -45,7 +45,7 @@ def parseObs(obsConf): class ColumbusEnv(gym.Env): metadata = {'render.modes': ['human']} - def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, master_seed=None, start_pos=(0.5, 0.5), start_score=0, speed_fac=0.01, acc_fac=0.04, die_on_zero=False, return_on_score=-1, reward_mult=1, agent_drag=0, controll_type='SPEED', aux_reward_max=1, aux_penalty_max=0, aux_reward_discretize=0, void_is_type_barrier=True, void_damage=1, torus_topology=False, default_collision_elasticity=1, terminate_on_reward=False, agent_draw_path=False, clear_path_on_reset=True, max_steps=-1, value_color_mapper=None): + def __init__(self, observable=observables.Observable(), fps=60, env_seed=3.1, master_seed=None, start_pos=(0.5, 0.5), start_score=0, speed_fac=0.01, acc_fac=0.04, die_on_zero=False, return_on_score=-1, reward_mult=1, agent_drag=0, controll_type='SPEED', aux_reward_max=1, aux_penalty_max=0, aux_reward_discretize=0, void_is_type_barrier=True, void_damage=1, torus_topology=False, default_collision_elasticity=1, terminate_on_reward=False, agent_draw_path=False, clear_path_on_reset=True, max_steps=-1, value_color_mapper='tanh'): super(ColumbusEnv, self).__init__() self.action_space = spaces.Box( low=-1, high=1, shape=(2,), dtype=np.float32) @@ -91,6 +91,11 @@ class ColumbusEnv(gym.Env): self.terminate_on_reward = terminate_on_reward self.agent_draw_path = agent_draw_path self.clear_path_on_reset = clear_path_on_reset + if value_color_mapper == 'atan': + def value_color_mapper(x): return th.atan(x*2)/0.786/2 + elif value_color_mapper == 'tanh': + def value_color_mapper(x): return th.tanh(x*2)/0.762/2 + self.value_color_mapper = value_color_mapper self.max_steps = max_steps @@ -344,10 +349,9 @@ class ColumbusEnv(gym.Env): V = value_func(th.Tensor(np.array(obs))) V /= max(V.max(), -1*V.min())*2 - V += 0.5 - if color_mapper != None: V = color_mapper(V) + V += 0.5 c = 0 for i in range(resolution): diff --git a/columbus/humanPlayer.py b/columbus/humanPlayer.py index d2d2519..a369a13 100644 --- a/columbus/humanPlayer.py +++ b/columbus/humanPlayer.py @@ -71,7 +71,8 @@ def chooseEnv(): def value_func(obs): - return th.rand(obs.shape[0])-0.5 + return obs[:, 0] + # return th.rand(obs.shape[0])-0.5 def playEnv(env):