Skip to content
Snippets Groups Projects
Commit 36b1c487 authored by Matteo Rossi's avatar Matteo Rossi
Browse files

Examples

parent 93c01831
No related branches found
No related tags found
No related merge requests found
......@@ -6,7 +6,7 @@ env = StirapEnv(full_observation=True)
reward = np.zeros(env.timesteps)
for t in range(env.timesteps):
env.render()
#env.render()
# Perform a random action
action = env.action_space.sample()
......@@ -14,6 +14,7 @@ for t in range(env.timesteps):
if done:
print("Episode finished after {} timesteps".format(t+1))
#print(observation)
break
print("Score: ", np.sum(reward))
import gym
import gym_stirap
import numpy as np
env = gym.make('stirap-v2')
obs = []
observation = env.reset()
obs.append(observation)
#actions = [3] * 200 + [2] * 90 + [4] * 4 + [6] * 120 + [4] * 300
actions = [4] * 21 + [2] * 90 + [4] * 4 + [6] * 120 + [4] * 300
#actions = [4] * env.timesteps
reward = np.zeros(env.timesteps)
for t in range(env.timesteps):
env.render()
action = actions[t]
observation, reward[t], done, info = env.step(action)
obs.append(observation)
if done:
print("Episode finished after {} timesteps".format(t+1))
break
print(info)
print("Score: ", np.sum(reward))
print(obs[-1])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment