2021-06-28 17:25:53 +02:00
|
|
|
import warnings
|
|
|
|
from collections import defaultdict
|
|
|
|
|
|
|
|
import gym
|
|
|
|
import numpy as np
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
from alr_envs.utils.make_env_helpers import make_env, make_env_rank
|
2021-06-28 17:25:53 +02:00
|
|
|
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
|
|
|
|
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
|
|
|
Example for running any env in the step based setting.
|
|
|
|
This also includes DMC environments when leveraging our custom make_env function.
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
|
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
env = make_env(env_id, seed)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
2021-06-30 15:00:36 +02:00
|
|
|
print("Observation shape: ", env.observation_space.shape)
|
2021-06-28 17:25:53 +02:00
|
|
|
print("Action shape: ", env.action_space.shape)
|
|
|
|
|
|
|
|
# number of environment steps
|
2021-06-30 15:00:36 +02:00
|
|
|
for i in range(iterations):
|
2021-06-28 17:25:53 +02:00
|
|
|
obs, reward, done, info = env.step(env.action_space.sample())
|
|
|
|
rewards += reward
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
if render:
|
2021-06-30 15:00:36 +02:00
|
|
|
env.render()
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
if done:
|
|
|
|
print(rewards)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
|
|
|
|
"""
|
|
|
|
Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
|
|
|
|
This also includes DMC and DMP environments when leveraging our custom make_env function.
|
|
|
|
Be aware, increasing the number of environments reduces the total length of the individual episodes.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
|
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
n_cpu: Number of cpus cores to use in parallel
|
|
|
|
n_samples: number of samples generated in total by all environments.
|
|
|
|
|
|
|
|
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
|
|
|
|
|
|
|
|
"""
|
|
|
|
env = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
|
|
|
# OR
|
2021-06-28 17:25:53 +02:00
|
|
|
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
# for plotting
|
|
|
|
rewards = np.zeros(n_cpu)
|
|
|
|
buffer = defaultdict(list)
|
|
|
|
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
# this would generate more samples than requested if n_samples % num_envs != 0
|
|
|
|
repeat = int(np.ceil(n_samples / env.num_envs))
|
|
|
|
for i in range(repeat):
|
|
|
|
obs, reward, done, info = env.step(env.action_space.sample())
|
|
|
|
buffer['obs'].append(obs)
|
|
|
|
buffer['reward'].append(reward)
|
|
|
|
buffer['done'].append(done)
|
|
|
|
buffer['info'].append(info)
|
|
|
|
rewards += reward
|
|
|
|
if np.any(done):
|
|
|
|
print(f"Reward at iteration {i}: {rewards[done]}")
|
|
|
|
rewards[done] = 0
|
|
|
|
|
|
|
|
# do not return values above threshold
|
|
|
|
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-07-26 17:07:17 +02:00
|
|
|
render = False
|
2021-07-02 13:09:56 +02:00
|
|
|
# Basic gym task
|
2021-07-26 17:07:17 +02:00
|
|
|
example_general("Pendulum-v0", seed=10, iterations=200, render=render)
|
2021-07-02 13:09:56 +02:00
|
|
|
#
|
|
|
|
# # Basis task from framework
|
2021-07-26 17:07:17 +02:00
|
|
|
example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=render)
|
2021-07-02 13:09:56 +02:00
|
|
|
#
|
|
|
|
# # OpenAI Mujoco task
|
2021-07-26 17:07:17 +02:00
|
|
|
example_general("HalfCheetah-v2", seed=10, render=render)
|
2021-07-02 13:09:56 +02:00
|
|
|
#
|
|
|
|
# # Mujoco task from framework
|
2021-07-26 17:07:17 +02:00
|
|
|
example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render)
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
# Vectorized multiprocessing environments
|
|
|
|
example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)
|