Skip to content
Snippets Groups Projects
Commit 82a3e357 authored by Florian Gawrilowicz's avatar Florian Gawrilowicz
Browse files

playing around

parent 0cb6b38c
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@ from hw1 import tf_util
import gym
envname = 'RoboschoolAnt-v1'
envname = 'RoboschoolHumanoid-v1'
with open(os.path.join('expert_data', envname + '.pkl'), 'rb') as f:
expert_data = pickle.load(f)
......@@ -14,38 +15,38 @@ with open(os.path.join('expert_data', envname + '.pkl'), 'rb') as f:
x = tf.placeholder(tf.float32, shape=[None, expert_data['observations'].shape[1]])
y_true = tf.placeholder(tf.float32, shape=[None, expert_data['actions'].shape[1]])
hidden = tf.layers.Dense(units=64, activation=tf.nn.relu)
model = tf.layers.Dense(units=expert_data['actions'].shape[1], use_bias=False)(hidden)
y_pred = model(x)
hidden = tf.layers.Dense(units=256, activation=tf.nn.relu)
model = tf.layers.Dense(units=expert_data['actions'].shape[1], use_bias=False)
y_pred = model(hidden(x))
loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
optimizer = tf.train.AdamOptimizer(0.001)
train = optimizer.minimize(loss)
'''
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(512, activation=tf.nn.relu),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation=tf.nn.softmax)
tf.keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[expert_data['observations'].shape[1]]),
tf.keras.layers.Dense(expert_data['actions'].shape[1])
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
loss='mse',
metrics=['mae', 'mse'])
model.summary()
model.fit(expert_data['observations'], expert_data['actions'], epochs=5, batch_size=256)
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)
'''
epochs = 300
batch_size = 256
with tf.Session() as sess:
'''
'''
tf_util.initialize()
for i in range(1000):
for e in range(epochs):
for i in range(0, expert_data['observations'].shape[0], batch_size):
_, loss_value = sess.run(
(train, loss),
feed_dict={x: expert_data['observations'], y_true: expert_data['actions']})
feed_dict={x: expert_data['observations'][i:i + batch_size, :], y_true: expert_data['actions'][i:i + batch_size, :]})
print(loss_value)
# Play
......@@ -57,15 +58,17 @@ with tf.Session() as sess:
steps = 0
while not done:
action = sess.run(y_pred, feed_dict={x: obs[np.newaxis, :]})
# action = model.predict(obs[np.newaxis, :])
# observations.append(obs)
# actions.append(action)
obs, r, done, _ = env.step(np.squeeze(action))
totalr += r
steps += 1
env.render()
if steps % 100 == 0: print("%i/%i" % (steps, max_steps))
if steps % 100 == 0:
print("%i/%i" % (steps, max_steps))
if steps >= max_steps:
break
# print(sess.run(y_pred))
np.mean((expert_data['actions'] - np.mean(expert_data['actions'], axis=0)) ** 2)
# print(sess.run(y_pred))
# np.mean((expert_data['actions'] - np.mean(expert_data['actions'], axis=0)) ** 2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment