2021-05-11 06:19:30 +02:00
|
|
|
from typing import Tuple, Union
|
|
|
|
|
2021-03-26 14:05:16 +01:00
|
|
|
from gym import Env
|
|
|
|
|
2021-05-11 06:19:30 +02:00
|
|
|
from alr_envs.utils.positional_env import PositionalEnv
|
2021-02-05 17:10:03 +01:00
|
|
|
|
2021-02-11 10:49:57 +01:00
|
|
|
|
|
|
|
class BaseController:
|
2021-05-11 06:19:30 +02:00
|
|
|
def __init__(self, env: Env, **kwargs):
|
2021-02-11 10:49:57 +01:00
|
|
|
self.env = env
|
|
|
|
|
|
|
|
def get_action(self, des_pos, des_vel):
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
|
|
class PosController(BaseController):
|
|
|
|
def get_action(self, des_pos, des_vel):
|
|
|
|
return des_pos
|
|
|
|
|
|
|
|
|
|
|
|
class VelController(BaseController):
|
|
|
|
def get_action(self, des_pos, des_vel):
|
|
|
|
return des_vel
|
|
|
|
|
|
|
|
|
|
|
|
class PDController(BaseController):
|
2021-05-11 06:19:30 +02:00
|
|
|
"""
|
|
|
|
A PD-Controller. Using position and velocity information from a provided positional environment,
|
|
|
|
the controller calculates a response based on the desired position and velocity
|
|
|
|
|
|
|
|
:param env: A position environment
|
|
|
|
:param p_gains: Factors for the proportional gains
|
|
|
|
:param d_gains: Factors for the differential gains
|
|
|
|
"""
|
|
|
|
def __init__(self,
|
|
|
|
env: PositionalEnv,
|
|
|
|
p_gains: Union[float, Tuple],
|
|
|
|
d_gains: Union[float, Tuple]):
|
|
|
|
self.p_gains = p_gains
|
|
|
|
self.d_gains = d_gains
|
|
|
|
super(PDController, self).__init__(env, )
|
2021-02-11 10:49:57 +01:00
|
|
|
|
|
|
|
def get_action(self, des_pos, des_vel):
|
|
|
|
cur_pos = self.env.current_pos
|
|
|
|
cur_vel = self.env.current_vel
|
2021-05-11 06:19:30 +02:00
|
|
|
assert des_pos.shape != cur_pos.shape, \
|
|
|
|
"Mismatch in dimension between desired position {} and current position {}".format(des_pos.shape, cur_pos.shape)
|
|
|
|
assert des_vel.shape != cur_vel.shape, \
|
|
|
|
"Mismatch in dimension between desired velocity {} and current velocity {}".format(des_vel.shape,
|
|
|
|
cur_vel.shape)
|
2021-02-05 17:10:03 +01:00
|
|
|
trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
|
|
|
|
return trq
|
2021-02-15 16:31:34 +01:00
|
|
|
|
|
|
|
|
2021-05-11 06:19:30 +02:00
|
|
|
def get_policy_class(policy_type, env, mp_kwargs, **kwargs):
|
2021-02-15 16:31:34 +01:00
|
|
|
if policy_type == "motor":
|
2021-05-11 06:19:30 +02:00
|
|
|
return PDController(env, p_gains=mp_kwargs['p_gains'], d_gains=mp_kwargs['d_gains'])
|
2021-02-15 16:31:34 +01:00
|
|
|
elif policy_type == "velocity":
|
2021-05-11 06:19:30 +02:00
|
|
|
return VelController(env)
|
2021-02-15 16:31:34 +01:00
|
|
|
elif policy_type == "position":
|
2021-05-11 06:19:30 +02:00
|
|
|
return PosController(env)
|