Skip to content
Snippets Groups Projects
Commit 3e1bdf0c authored by Florian Gawrilowicz's avatar Florian Gawrilowicz
Browse files

linear model

parent dde95f4e
Branches
No related tags found
No related merge requests found
import roboschool
import numpy as np
import tensorflow as tf
import pickle
import os
from hw1 import tf_util
import gym
envname = 'RoboschoolAnt-v1'
with open(os.path.join('expert_data', envname + '.pkl'), 'rb') as f:
expert_data = pickle.load(f)
x = tf.placeholder(tf.float32, shape=[None, expert_data['observations'].shape[1]])
y_true = tf.placeholder(tf.float32, shape=[None, expert_data['actions'].shape[1]])
hidden = tf.layers.Dense(units=64, activation=tf.nn.relu)
model = tf.layers.Dense(units=expert_data['actions'].shape[1], use_bias=False)(hidden)
y_pred = model(x)
loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
'''
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)
'''
with tf.Session() as sess:
tf_util.initialize()
for i in range(1000):
_, loss_value = sess.run(
(train, loss),
feed_dict={x: expert_data['observations'], y_true: expert_data['actions']})
print(loss_value)
# Play
env = gym.make(envname)
max_steps = env.spec.timestep_limit
obs = env.reset()
done = False
totalr = 0.
steps = 0
while not done:
action = sess.run(y_pred, feed_dict={x: obs[np.newaxis, :]})
# observations.append(obs)
# actions.append(action)
obs, r, done, _ = env.step(np.squeeze(action))
totalr += r
steps += 1
env.render()
if steps % 100 == 0: print("%i/%i" % (steps, max_steps))
if steps >= max_steps:
break
# print(sess.run(y_pred))
np.mean((expert_data['actions'] - np.mean(expert_data['actions'], axis=0)) ** 2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment