Skip to content
Snippets Groups Projects
Commit 04c078a2 authored by Florian Gawrilowicz's avatar Florian Gawrilowicz
Browse files

dagger is extension of behavior_clone.py

parent 82a3e357
No related branches found
No related tags found
No related merge requests found
import roboschool
import numpy as np
import tensorflow as tf
import pickle
import os
from hw1 import tf_util
import gym
from hw1 import roboschool_agents
envname = 'RoboschoolAtlasForwardWalk-v1'
envname = 'RoboschoolHumanoid-v1'
with open(os.path.join('expert_data', envname + '.pkl'), 'rb') as f:
expert_data = pickle.load(f)
X = expert_data['observations']
expert_actions = expert_data['actions']
x = tf.placeholder(tf.float32, shape=[None, expert_data['observations'].shape[1]])
y_true = tf.placeholder(tf.float32, shape=[None, expert_data['actions'].shape[1]])
hidden = tf.layers.Dense(units=128, activation=tf.nn.relu)
model = tf.layers.Dense(units=expert_data['actions'].shape[1], use_bias=False)
y_pred = model(hidden(x))
loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
epochs = 30
training_epochs = 5
batch_size = 256
do_render = False
with tf.Session() as sess:
tf_util.initialize()
env = gym.make(envname)
max_steps = 1000 # env.spec.timestep_limit
pi = roboschool_agents.load_policy(envname, env)
for e in range(epochs):
print(f'epochs {e}')
for _ in range(training_epochs):
idcs = np.random.permutation(X.shape[0])
for i in range(0, X.shape[0], batch_size):
_, loss_value = sess.run(
(train, loss),
feed_dict={x: X[idcs[i:i + batch_size], :], y_true: expert_actions[idcs[i:i + batch_size], :]})
print(loss_value)
# Play
observations = []
actions = []
for d in range(1):
obs = env.reset()
done = False
totalr = 0.
steps = 0
while not done:
action = sess.run(y_pred, feed_dict={x: obs[np.newaxis, :]})
expert_act = pi.act(obs, env)
observations.append(obs)
actions.append(expert_act)
obs, r, done, _ = env.step(np.squeeze(action))
totalr += r
steps += 1
if e == epochs - 1:
env.render()
elif steps >= max_steps:
break
if steps % 100 == 0:
print("%i/%i: %f" % (steps, max_steps, totalr))
totalr = 0.
X = np.concatenate((X, np.vstack(observations)), axis=0)
expert_actions = np.concatenate((expert_actions, np.vstack(actions)), axis=0)
# print(sess.run(y_pred))
# np.mean((expert_data['actions'] - np.mean(expert_data['actions'], axis=0)) ** 2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment