1from collections import defaultdict
2
3import gymnasium as gym
4import numpy as np
5
6import fancy_gym
7
8
9def example_general(env_id="Pendulum-v1", seed=1, iterations=1000, render=True):
10 """
11 Example for running any env in the step based setting.
12 This also includes DMC environments when leveraging our custom make_env function.
13
14 Args:
15 env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
16 seed: seed for deterministic behaviour
17 iterations: Number of rollout steps to run
18 render: Render the episode
19
20 Returns:
21
22 """
23
24 env = gym.make(env_id)
25 rewards = 0
26 obs = env.reset(seed=seed)
27 print("Observation shape: ", env.observation_space.shape)
28 print("Action shape: ", env.action_space.shape)
29
30 # number of environment steps
31 for i in range(iterations):
32 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
33 rewards += reward
34
35 if render:
36 env.render()
37
38 if terminated or truncated:
39 print(rewards)
40 rewards = 0
41 obs = env.reset()
42
43
44def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
45 """
46 Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
47 This also includes DMC and DMP environments when leveraging our custom make_env function.
48 Be aware, increasing the number of environments reduces the total length of the individual episodes.
49
50 Args:
51 env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
52 seed: seed for deterministic behaviour
53 n_cpu: Number of cpus cores to use in parallel
54 n_samples: number of samples generated in total by all environments.
55
56 Returns: Tuple of (obs, reward, done, info) with type np.ndarray
57
58 """
59 env = gym.vector.AsyncVectorEnv([fancy_gym.make_rank(env_id, seed, i) for i in range(n_cpu)])
60 # OR
61 # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
62
63 # for plotting
64 rewards = np.zeros(n_cpu)
65 buffer = defaultdict(list)
66
67 obs = env.reset()
68
69 # this would generate more samples than requested if n_samples % num_envs != 0
70 repeat = int(np.ceil(n_samples / env.num_envs))
71 for i in range(repeat):
72 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
73 buffer['obs'].append(obs)
74 buffer['reward'].append(reward)
75 buffer['terminated'].append(terminated)
76 buffer['truncated'].append(truncated)
77 buffer['info'].append(info)
78 rewards += reward
79
80 done = terminated or truncated
81 if np.any(done):
82 print(f"Reward at iteration {i}: {rewards[done]}")
83 rewards[done] = 0
84
85 # do not return values above threshold
86 return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
87
88
89if __name__ == '__main__':
90 render = True
91
92 # Basic gym task
93 example_general("Pendulum-v1", seed=10, iterations=200, render=render)
94
95 # Mujoco task from framework
96 example_general("fancy/Reacher5d-v0", seed=10, iterations=200, render=render)
97
98 # # OpenAI Mujoco task
99 example_general("HalfCheetah-v2", seed=10, render=render)
100
101 # Vectorized multiprocessing environments
102 # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)