updated examples to new api,

This commit is contained in:
Fabian 2023-01-12 17:21:56 +01:00
parent 0c7ac838bf
commit fbe3ef4a4b
11 changed files with 71 additions and 58 deletions

View File

@ -26,10 +26,10 @@ def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True):
ac = env.action_space.sample()
if render:
env.render(mode="human")
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if done:
if terminated or truncated:
print(env_id, rewards)
rewards = 0
obs = env.reset()
@ -102,10 +102,10 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if done:
if terminated or truncated:
print(base_env_id, rewards)
rewards = 0
obs = env.reset()

View File

@ -1,6 +1,6 @@
from collections import defaultdict
import gym
import gymnasium as gym
import numpy as np
import fancy_gym
@ -29,13 +29,13 @@ def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
# number of environment steps
for i in range(iterations):
obs, reward, done, info = env.step(env.action_space.sample())
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
rewards += reward
if render:
env.render()
if done:
if terminated or truncated:
print(rewards)
rewards = 0
obs = env.reset()
@ -69,12 +69,15 @@ def example_async(env_id="HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samp
# this would generate more samples than requested if n_samples % num_envs != 0
repeat = int(np.ceil(n_samples / env.num_envs))
for i in range(repeat):
obs, reward, done, info = env.step(env.action_space.sample())
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
buffer['obs'].append(obs)
buffer['reward'].append(reward)
buffer['done'].append(done)
buffer['terminated'].append(terminated)
buffer['truncated'].append(truncated)
buffer['info'].append(info)
rewards += reward
done = terminated or truncated
if np.any(done):
print(f"Reward at iteration {i}: {rewards[done]}")
rewards[done] = 0

View File

@ -29,9 +29,9 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface.
env.render(False)
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if done:
if terminated or truncated:
print(env_id, rewards)
rewards = 0
obs = env.reset()
@ -103,10 +103,10 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if done:
if terminated or truncated:
print(base_env_id, rewards)
rewards = 0
obs = env.reset()
@ -131,4 +131,3 @@ if __name__ == '__main__':
#
# # Custom MetaWorld task
example_custom_dmc_and_mp(seed=10, iterations=1, render=render)

View File

@ -41,11 +41,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
# This executes a full trajectory and gives back the context (obs) of the last step in the trajectory, or the
# full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
# to the return of a trajectory. Default is the sum over the step-wise rewards.
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
# Aggregated returns
returns += reward
if done:
if terminated or truncated:
print(reward)
obs = env.reset()
@ -79,10 +79,10 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
returns += reward
if done:
if terminated or truncated:
print(i, reward)
obs = env.reset()
@ -145,10 +145,10 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if done:
if terminated or truncated:
print(rewards)
rewards = 0
obs = env.reset()

View File

@ -24,10 +24,10 @@ def example_mp(env_name, seed=1, render=True):
else:
env.render(mode=None)
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
returns += reward
if done:
if terminated or truncated:
print(returns)
obs = env.reset()

View File

@ -34,7 +34,7 @@ fig.show()
for t, pos_vel in enumerate(zip(pos, vel)):
actions = env.tracking_controller.get_action(pos_vel[0], pos_vel[1], env.current_vel, env.current_pos)
actions = np.clip(actions, env.env.action_space.low, env.env.action_space.high)
_, _, _, _ = env.env.step(actions)
env.env.step(actions)
if t % 15 == 0:
img.set_data(env.env.render(mode="rgb_array"))
fig.canvas.draw()

View File

@ -7,8 +7,10 @@ extras = {
"dmc": ["dm_control>=1.0.1"],
"metaworld": ["metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld",
'mujoco-py<2.2,>=2.1',
'scipy'
'scipy',
'gym>=0.15.4',
],
"mujoco": ["gymnasium[mujoco]"],
}
# All dependencies
@ -18,7 +20,7 @@ extras["all"] = list(set(itertools.chain.from_iterable(map(lambda group: extras[
setup(
author='Fabian Otto, Onur Celik',
name='fancy_gym',
version='0.2',
version='0.3',
classifiers=[
# Python 3.7 is minimally supported
"Programming Language :: Python :: 3",
@ -29,7 +31,7 @@ setup(
],
extras_require=extras,
install_requires=[
'gym[mujoco]<0.25.0,>=0.24.0',
'gymnasium',
'mp_pytorch @ git+https://github.com/ALRhub/MP_PyTorch.git@main'
],
packages=[package for package in find_packages() if package.startswith("fancy_gym")],

View File

@ -1,39 +1,43 @@
from itertools import chain
from typing import Callable
import gymnasium as gym
import pytest
from dm_control import suite, manipulation
import fancy_gym
from test.utils import run_env, run_env_determinism
SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
# SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
# MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
DM_CONTROL_IDS = [spec.id for spec in gym.envs.registry.values() if
not isinstance(spec.entry_point, Callable) and spec.entry_point.startswith('dm_control/')]
DMC_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())
SEED = 1
@pytest.mark.parametrize('env_id', SUITE_IDS)
def test_step_suite_functionality(env_id: str):
@pytest.mark.parametrize('env_id', DM_CONTROL_IDS)
def test_step_dm_control_functionality(env_id: str):
"""Tests that suite step environments run without errors using random actions."""
run_env(env_id)
@pytest.mark.parametrize('env_id', SUITE_IDS)
def test_step_suite_determinism(env_id: str):
@pytest.mark.parametrize('env_id', DM_CONTROL_IDS)
def test_step_dm_control_determinism(env_id: str):
"""Tests that for step environments identical seeds produce identical trajectories."""
run_env_determinism(env_id, SEED)
@pytest.mark.parametrize('env_id', MANIPULATION_IDS)
def test_step_manipulation_functionality(env_id: str):
"""Tests that manipulation step environments run without errors using random actions."""
run_env(env_id)
@pytest.mark.parametrize('env_id', MANIPULATION_IDS)
def test_step_manipulation_determinism(env_id: str):
"""Tests that for step environments identical seeds produce identical trajectories."""
run_env_determinism(env_id, SEED)
# @pytest.mark.parametrize('env_id', MANIPULATION_IDS)
# def test_step_manipulation_functionality(env_id: str):
# """Tests that manipulation step environments run without errors using random actions."""
# run_env(env_id)
#
#
# @pytest.mark.parametrize('env_id', MANIPULATION_IDS)
# def test_step_manipulation_determinism(env_id: str):
# """Tests that for step environments identical seeds produce identical trajectories."""
# run_env_determinism(env_id, SEED)
@pytest.mark.parametrize('env_id', DMC_MP_IDS)

View File

@ -1,12 +1,14 @@
import itertools
from typing import Callable
import fancy_gym
import gym
import gymnasium as gym
import pytest
from test.utils import run_env, run_env_determinism
CUSTOM_IDS = [id for id, spec in gym.envs.registry.items() if
not isinstance(spec.entry_point, Callable) and
"fancy_gym" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point]
CUSTOM_MP_IDS = itertools.chain(*fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())
SEED = 1

View File

@ -1,12 +1,12 @@
from itertools import chain
import gym
import gymnasium as gym
import pytest
import fancy_gym
from test.utils import run_env, run_env_determinism
GYM_IDS = [spec.id for spec in gym.envs.registry.all() if
GYM_IDS = [spec.id for spec in gym.envs.registry.values() if
"fancy_gym" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point]
GYM_MP_IDS = chain(*fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())
SEED = 1

View File

@ -1,4 +1,4 @@
import gym
import gymnasium as gym
import numpy as np
from fancy_gym import make
@ -15,16 +15,16 @@ def run_env(env_id, iterations=None, seed=0, render=False):
seed: random seeding
render: Render the episode
Returns: observations, rewards, dones, actions
Returns: observations, rewards, terminations, truncations, actions
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
actions = []
dones = []
obs = env.reset()
print(obs.dtype)
terminations = []
truncations = []
obs, _ = env.reset()
verify_observations(obs, env.observation_space, "reset()")
iterations = iterations or (env.spec.max_episode_steps or 1)
@ -36,26 +36,28 @@ def run_env(env_id, iterations=None, seed=0, render=False):
ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
obs, reward, terminated, truncated, info = env.step(ac)
verify_observations(obs, env.observation_space, "step()")
verify_reward(reward)
verify_done(done)
verify_done(terminated)
verify_done(truncated)
rewards.append(reward)
dones.append(done)
terminations.append(terminated)
truncations.append(truncated)
if render:
env.render("human")
if done:
if terminated or truncated:
break
assert done, "Done flag is not True after end of episode."
assert terminated or truncated, "Termination or truncation flag is not True after end of episode."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
return np.array(observations), np.array(rewards), np.array(terminations), np.array(truncations), np.array(actions)
def run_env_determinism(env_id: str, seed: int):
@ -63,11 +65,12 @@ def run_env_determinism(env_id: str, seed: int):
traj2 = run_env(env_id, seed=seed)
# Iterate over two trajectories, which should have the same state and action sequence
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
obs1, rwd1, term1, trunc1, ac1, obs2, rwd2, term2, trunc2, ac2 = time_step
assert np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match."
assert np.array_equal(ac1, ac2), f"Actions [{i}] {ac1} and {ac2} do not match."
assert np.array_equal(rwd1, rwd2), f"Rewards [{i}] {rwd1} and {rwd2} do not match."
assert np.array_equal(done1, done2), f"Dones [{i}] {done1} and {done2} do not match."
assert np.array_equal(term1, term2), f"Terminateds [{i}] {term1} and {term2} do not match."
assert np.array_equal(term1, term2), f"Truncateds [{i}] {trunc1} and {trunc2} do not match."
def verify_observations(obs, observation_space: gym.Space, obs_type="reset()"):