Examples

36b1c487 · Matteo Rossi · 93c01831 · 36b1c487 · 36b1c487
Commit 36b1c487 authored Jan 11, 2019 by Matteo Rossi
--- a/example_fullobs.py
+++ b/example_fullobs.py
@@ -6,7 +6,7 @@ env = StirapEnv(full_observation=True)

 reward = np.zeros(env.timesteps)
 for t in range(env.timesteps):
-    env.render()
+    #env.render()

    # Perform a random action
    action = env.action_space.sample()
@@ -14,6 +14,7 @@ for t in range(env.timesteps):

    if done:
        print("Episode finished after {} timesteps".format(t+1))
+        #print(observation)
        break

 print("Score: ", np.sum(reward))
--- a/example_stirap.py
+++ b/example_stirap.py
+import gym
+import gym_stirap
+import numpy as np
+
+env = gym.make('stirap-v2')
+obs = []
+observation = env.reset()
+obs.append(observation)
+        
+#actions = [3] * 200 + [2] * 90 + [4] * 4 + [6] * 120 + [4] * 300
+actions = [4] * 21 + [2] * 90 + [4] * 4 + [6] * 120 + [4] * 300
+#actions = [4] * env.timesteps
+reward = np.zeros(env.timesteps)
+for t in range(env.timesteps):
+    env.render()
+
+    action = actions[t]
+    observation, reward[t], done, info = env.step(action)
+    
+    obs.append(observation)
+    if done:
+        print("Episode finished after {} timesteps".format(t+1))
+        break
+
+print(info)
+print("Score: ", np.sum(reward))
+print(obs[-1])
\ No newline at end of file