forked from donjpierce/traffic
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlearn.py
More file actions
86 lines (74 loc) · 2.67 KB
/
learn.py
File metadata and controls
86 lines (74 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# python=3.6 requires using Qt4Agg backend for animation saving
import matplotlib
matplotlib.use('Qt4Agg')
from environment import Env
from keras import Sequential, layers
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
dt = 1 / 1000
N = 1
agent = 0
"""Lower Manhattan"""
# G = ox.load_graphml('lowermanhattan.graphml')
# G = ox.project_graph(G)
# fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
"""San Francisco"""
# G = ox.load_graphml('sanfrancisco.graphml')
# G = ox.project_graph(G)
# fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
"""Piedmont, California"""
G = ox.load_graphml('piedmont.graphml')
G = ox.project_graph(G)
fig, ax = ox.plot_graph(G, node_size=0, edge_linewidth=0.5)
# initialize the environment for the learning agent
env = Env(n=N, fig=fig, ax=ax, agent=agent, dt=dt, animate=False)
# initialize the Keras training model
model = Sequential()
model.add(layers.InputLayer(batch_input_shape=(1, 10)))
model.add(layers.Dense(10, activation='sigmoid'))
model.add(layers.Dense(2, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
# now execute Q learning
y = 0.95
eps = 0.5
decay_factor = 0.999
num_episodes = 10
r_avg_list = []
r_sum_list = []
file = open('diag.txt', 'w')
for i in range(num_episodes):
print("Episode {} of {}".format(i + 1, num_episodes))
eps *= decay_factor
r_sum = 0
done = False
diag_action = 0
diag_reward = 0
state = env.reset((i, num_episodes))
while not done:
env.reset((i, num_episodes))
rand = np.random.random()
if rand < eps:
action = np.random.randint(0, 2)
else:
action = np.argmax(model.predict(np.identity(10)[state:state + 1]))
new_s, r, done, _ = env.step(action=action, num=(i, num_episodes))
target = r + y * np.max(model.predict(np.identity(10)[new_s:new_s + 1]))
target_vec = model.predict(np.identity(10)[state:state + 1])[0]
target_vec[action] = target
model.fit(np.identity(10)[state:state + 1], target_vec.reshape(-1, 2), epochs=1, verbose=0)
state = new_s
r_sum += r
print('Action: {}, Reward: {}'.format(action, r))
file.write('Action: {}, Reward: {}'.format(action, round(r, 2)))
diag_action += action
diag_reward += r
r_avg_list.append(r_sum)
r_sum_list.append(sum(r_avg_list) / (i + 1))
file.write('Episode: {}, Total Rewards: {} \n'.format(i, round(r_sum, 2)))
file.close()
plt.plot(np.arange(num_episodes), r_sum_list)
plt.xlabel('Game number')
plt.ylabel('Average reward per game')
plt.suptitle('Average reward per game for car no. {}'.format(agent))
plt.savefig('avg_rewards.png')