Compare commits

...

2 Commits

2 changed files with 27 additions and 16 deletions

View File

@ -368,6 +368,9 @@ class TeleportingReward(OnceReward):
self.env.check_collisions_for(self) self.env.check_collisions_for(self)
def on_collected(self): def on_collected(self):
# Force rerender of value func (even in static envs)
self.env._invalidate_value_map()
self.env.new_abs_reward += self.reward self.env.new_abs_reward += self.reward
self.pos = (self.env.random(), self.env.random()) self.pos = (self.env.random(), self.env.random())
self.env.check_collisions_for(self) self.env.check_collisions_for(self)
@ -382,6 +385,9 @@ class LoopReward(OnceReward):
self.barrier_physics = False self.barrier_physics = False
def jump_to_state(self): def jump_to_state(self):
# Force rerender of value func (even in static envs)
self.env._invalidate_value_map()
pos_vec = [v for v in self.loop[self.state]] pos_vec = [v for v in self.loop[self.state]]
if len(pos_vec) == 4: if len(pos_vec) == 4:
pos_vec = pos_vec[0] + pos_vec[2] * \ pos_vec = pos_vec[0] + pos_vec[2] * \
@ -422,6 +428,9 @@ class TimeoutReward(OnceReward):
def on_collected(self): def on_collected(self):
if self.avaible: if self.avaible:
# Force rerender of value func (even in static envs)
self.env._invalidate_value_map()
self.env.new_abs_reward += self.reward self.env.new_abs_reward += self.reward
self.set_avaible(False) self.set_avaible(False)
self.env.timers.append((self.timeout, self.set_avaible, True)) self.env.timers.append((self.timeout, self.set_avaible, True))
@ -468,6 +477,9 @@ class TeleportingGoal(Goal):
self.env.check_collisions_for(self) self.env.check_collisions_for(self)
def on_collected(self): def on_collected(self):
# Force rerender of value func (even in static envs)
self.env._invalidate_value_map()
self.env.new_abs_reward += self.reward self.env.new_abs_reward += self.reward
self.pos = (self.env.random(), self.env.random()) self.pos = (self.env.random(), self.env.random())
self.env.check_collisions_for(self) self.env.check_collisions_for(self)

View File

@ -381,13 +381,13 @@ class ColumbusEnv(gym.Env):
pygame.draw.circle(self.screen, smolcol, (20+int(60*x) + pygame.draw.circle(self.screen, smolcol, (20+int(60*x) +
self.joystick_offset[0], 20+int(60*y)+self.joystick_offset[1]), 20, width=0) self.joystick_offset[0], 20+int(60*y)+self.joystick_offset[1]), 20, width=0)
def _draw_confidence_ellipse(self, chol, forceDraw=False, seconds=1): def _draw_confidence_ellipse(self, chol, forceDraw=False, seconds=0.5):
# The 'seconds'-parameter only really makes sense, when using control_type='SPEED', # The 'seconds'-parameter only really makes sense, when using control_type='SPEED',
# you can still use it to scale the cov-ellipse when using control_type='ACC', # you can still use it to scale the cov-ellipse when using control_type='ACC',
# but it's relation to 'seconds' is no longer there... # but it's relation to 'seconds' is no longer there...
if self.draw_confidence_ellipse and (self.visible or forceDraw): if self.draw_confidence_ellipse and (self.visible or forceDraw):
col = (255, 255, 255) col = (255, 255, 255)
f = seconds/self.speed_fac f = seconds*self.speed_fac*self.fps*max(self.width, self.height)
while len(chol.shape) > 2: while len(chol.shape) > 2:
chol = chol[0] chol = chol[0]
@ -399,21 +399,20 @@ class ColumbusEnv(gym.Env):
L, V = th.linalg.eig(cov) L, V = th.linalg.eig(cov)
L, V = L.real, V.real L, V = L.real, V.real
w, h = int(abs(L[0].item()*f))+1, int(abs(L[1].item()*f))+1 # 5.911 is the magic-number to get a 95%-confidence interval
# In theory we would have to solve: l1, l2 = int(abs(math.sqrt(L[0].item()*5.911)*f)) + \
# R = [[cos, -sin],[sin, cos]] 1, int(abs(math.sqrt(L[1].item()*5.911)*f))+1
# But we only use the -sin term.
# Because of this our calculated angle might be wrong
# by periods of 180°
# But since an ellipsoid does not change under such an 'error',
# we don't care
# ang1 = int(math.acos(V[0, 0])/math.pi*360)
ang2 = int(math.asin(-V[0, 1])/math.pi*360)
# ang3 = int(math.asin(V[1, 0])/math.pi*360)
ang = ang2
# print(cov) if l1 >= l2:
# print(w, h, (ang1, ang2, ang3)) w, h = l1, l2
run, rise = V[0][0], V[0][1]
else:
w, h = l2, l1
run, rise = V[1][0], V[1][1]
ang = (math.atan(rise/run))/(2*math.pi)*360
# print(w, h, (run, rise, ang))
x, y = self.agent.pos x, y = self.agent.pos
x, y = x*self.width, y*self.height x, y = x*self.width, y*self.height