General Usage Examples

  1from collections import defaultdict
  2
  3import gymnasium as gym
  4import numpy as np
  5
  6import fancy_gym
  7
  8
  9def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
 10    """
 11    Example for running any env in the step based setting.
 12    This also includes DMC environments when leveraging our custom make_env function.
 13
 14    Args:
 15        env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
 16        seed: seed for deterministic behaviour
 17        iterations: Number of rollout steps to run
 18        render: Render the episode
 19
 20    Returns:
 21
 22    """
 23
 24    env = gym.make(env_id)
 25    rewards = 0
 26    obs = env.reset(seed=seed)
 27    print("Observation shape: ", env.observation_space.shape)
 28    print("Action shape: ", env.action_space.shape)
 29
 30    # number of environment steps
 31    for i in range(iterations):
 32        obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 33        rewards += reward
 34
 35        if render:
 36            env.render()
 37
 38        if terminated or truncated:
 39            print(rewards)
 40            rewards = 0
 41            obs = env.reset()
 42
 43
 44def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
 45    """
 46    Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
 47    This also includes DMC and DMP environments when leveraging our custom make_env function.
 48    Be aware, increasing the number of environments reduces the total length of the individual episodes.
 49
 50    Args:
 51        env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
 52        seed: seed for deterministic behaviour
 53        n_cpu: Number of cpus cores to use in parallel
 54        n_samples: number of samples generated in total by all environments.
 55
 56    Returns: Tuple of (obs, reward, done, info) with type np.ndarray
 57
 58    """
 59    env = gym.vector.AsyncVectorEnv([fancy_gym.make_rank(env_id, seed, i) for i in range(n_cpu)])
 60    # OR
 61    # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
 62
 63    # for plotting
 64    rewards = np.zeros(n_cpu)
 65    buffer = defaultdict(list)
 66
 67    obs = env.reset()
 68
 69    # this would generate more samples than requested if n_samples % num_envs != 0
 70    repeat = int(np.ceil(n_samples / env.num_envs))
 71    for i in range(repeat):
 72        obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 73        buffer['obs'].append(obs)
 74        buffer['reward'].append(reward)
 75        buffer['terminated'].append(terminated)
 76        buffer['truncated'].append(truncated)
 77        buffer['info'].append(info)
 78        rewards += reward
 79
 80        done = terminated or truncated
 81        if np.any(done):
 82            print(f"Reward at iteration {i}: {rewards[done]}")
 83            rewards[done] = 0
 84
 85    # do not return values above threshold
 86    return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
 87
 88
 89if __name__ == '__main__':
 90    render = True
 91
 92    # Basic gym task
 93    example_general("Pendulum-v1", seed=10, iterations=200, render=render)
 94
 95    # Mujoco task from framework
 96    example_general("fancy/Reacher5d-v0", seed=10, iterations=200, render=render)
 97
 98    # # OpenAI Mujoco task
 99    example_general("HalfCheetah-v2", seed=10, render=render)
100
101    # Vectorized multiprocessing environments
102    # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)