新手向——使用Keras 卷積神經網(wǎng)絡玩小鳥

啊司com 2017-04-25

展開全文

#!/usr/bin/env pythonfrom __future__ import print_functionimport argparseimport skimage as skimagefrom skimage import transform, color, exposurefrom skimage.transform import rotatefrom skimage.viewer import ImageViewerimport syssys.path.append('game/')import wrapped_flappy_bird as gameimport randomimport numpy as npfrom collections import dequeimport jsonfrom keras import initializationsfrom keras.initializations import normal, identityfrom keras.models import model_from_jsonfrom keras.models import Sequentialfrom keras.layers.core import Dense, Dropout, Activation, Flattenfrom keras.layers.convolutional import Convolution2D, MaxPooling2Dfrom keras.optimizers import SGD , AdamGAME = 'bird' # 游戲名CONFIG = 'nothreshold'ACTIONS = 2 # 有效動作數(shù)：不動+跳=2個GAMMA = 0.99 # 折扣系數(shù)，未來的獎勵轉化為現(xiàn)在的要乘的一個系數(shù)OBSERVATION = 3200. # 訓練之前觀察多少步EXPLORE = 3000000. # epsilon衰減的總步數(shù)FINAL_EPSILON = 0.0001 # epsilon的最小值INITIAL_EPSILON = 0.1 # epsilon的初始值，epsilon逐漸減小REPLAY_MEMORY = 50000 # 記住的情景(狀態(tài)s到狀態(tài)s'的所有信息)數(shù)BATCH = 32 # 選取的小批量訓練樣本數(shù)# 一幀一個輸入動作FRAME_PER_ACTION = 1# 預處理后的圖片尺寸img_rows , img_cols = 80, 80# 每次堆疊4幀灰階圖像，相當于4通道img_channels = 4 # 構建神經網(wǎng)絡模型def buildmodel(): print('Now we build the model') # 以下注釋見文中 model = Sequential() model.add(Convolution2D(32, 8, 8, subsample=(4,4),init=lambda shape, name: normal(shape, scale=0.01, name=name), border_mode='same',input_shape=(img_channels,img_rows,img_cols))) model.add(Activation('relu')) model.add(Convolution2D(64, 4, 4, subsample=(2,2),init=lambda shape, name: normal(shape, scale=0.01, name=name), border_mode='same')) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3, subsample=(1,1),init=lambda shape, name: normal(shape, scale=0.01, name=name), border_mode='same')) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(512, init=lambda shape, name: normal(shape, scale=0.01, name=name))) model.add(Activation('relu')) model.add(Dense(2,init=lambda shape, name: normal(shape, scale=0.01, name=name))) adam = Adam(lr=1e-6) model.compile(loss='mse',optimizer=adam) # 使用損失函數(shù)為均方誤差，優(yōu)化器為Adam。 print('We finish building the model') return modeldef trainNetwork(model,args): # 得到一個游戲模擬器 game_state = game.GameState() # 保存之前的觀察到回放存儲器D D = deque() # 什么也不做來得到第一個狀態(tài)然后預處理圖片為80x80x4格式 do_nothing = np.zeros(ACTIONS) do_nothing[0] = 1 # do_nothing 為 array([1,0]) x_t, r_0, terminal = game_state.frame_step(do_nothing) x_t = skimage.color.rgb2gray(x_t) x_t = skimage.transform.resize(x_t,(80,80)) x_t = skimage.exposure.rescale_intensity(x_t,out_range=(0,255)) # 初始化時，堆疊4張圖都為初始的同1張 s_t = np.stack((x_t, x_t, x_t, x_t), axis=0) # s_t為四張圖的堆疊 # 為了在Keras中使用，我們需要調整數(shù)組形狀，在頭部增加一個維度 s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) if args['mode'] == 'Run': OBSERVE = 999999999 # 我們一直觀察，而不訓練 epsilon = FINAL_EPSILON print ('Now we load weight') model.load_weights('model.h5') adam = Adam(lr=1e-6) model.compile(loss='mse',optimizer=adam) print ('Weight load successfully') else: # 否則我們在觀察一段時間之后開始訓練 OBSERVE = OBSERVATION epsilon = INITIAL_EPSILON t = 0 # t為總幀數(shù) while (True): # 每次循環(huán)重新初始化的值 loss = 0 Q_sa = 0 action_index = 0 r_t = 0 a_t = np.zeros([ACTIONS]) # 通過epsilon貪心算法選擇行為 if t % FRAME_PER_ACTION == 0: if random.random() <= epsilon: print('----------Random Action----------') action_index = random.randrange(ACTIONS) # 隨機選取一個動作 a_t[action_index] = 1 # 生成相應的規(guī)范化動作輸入?yún)?shù) else: q = model.predict(s_t) # 輸入當前狀態(tài)得到預測的Q值 max_Q = np.argmax(q) # 返回數(shù)組中最大值的索引 # numpy.argmax(a, axis=None, out=None) # Returns the indices of the maximum values along an axis. action_index = max_Q # 索引0代表啥也不做，索引1代表跳一下 a_t[max_Q] = 1 # 生成相應的規(guī)范化動作輸入?yún)?shù) # 在開始訓練之后并且epsilon小于一定值之前，我們逐步減小epsilon if epsilon > FINAL_EPSILON and t > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE # 執(zhí)行選定的動作，并觀察返回的下一狀態(tài)和獎勵 x_t1_colored, r_t, terminal = game_state.frame_step(a_t) # 將圖像處理為灰階，調整尺寸、亮度 x_t1 = skimage.color.rgb2gray(x_t1_colored) x_t1 = skimage.transform.resize(x_t1,(80,80)) x_t1 = skimage.exposure.rescale_intensity(x_t1, out_range=(0, 255)) # 調整圖像數(shù)組形狀，增加頭兩維到4維 x_t1 = x_t1.reshape(1, 1, x_t1.shape[0], x_t1.shape[1]) # 將s_t的前三幀添加在新幀的后面，新幀的索引為0，形成最后的4幀圖像 s_t1 = np.append(x_t1, s_t[:, :3, :, :], axis=1) # 存儲狀態(tài)轉移到回放存儲器 D.append((s_t, action_index, r_t, s_t1, terminal)) if len(D) > REPLAY_MEMORY: D.popleft() # 如果觀察完成，則 if t > OBSERVE: # 抽取小批量樣本進行訓練 minibatch = random.sample(D, BATCH) # inputs和targets一起構成了Q值表 inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) #32, 80, 80, 4 targets = np.zeros((inputs.shape[0], ACTIONS)) #32, 2 # 開始經驗回放 for i in range(0, len(minibatch)): # 以下序號對應D的存儲順序將信息全部取出， # D.append((s_t, action_index, r_t, s_t1, terminal)) state_t = minibatch[i][0] # 當前狀態(tài) action_t = minibatch[i][1] # 輸入動作 reward_t = minibatch[i][2] # 返回獎勵 state_t1 = minibatch[i][3] # 返回的下一狀態(tài) terminal = minibatch[i][4] # 返回的是否終止的標志 inputs[i:i + 1] = state_t # 保存當前狀態(tài)，即Q(s,a)中的s # 得到預測的以輸入動作x為索引的Q值列表 targets[i] = model.predict(state_t) # 得到下一狀態(tài)下預測的以輸入動作x為索引的Q值列表 Q_sa = model.predict(state_t1) if terminal: # 如果動作執(zhí)行后游戲終止了，該狀態(tài)下(s)該動作(a)的Q值就相當于獎勵 targets[i, action_t] = reward_t else: # 否則，該狀態(tài)(s)下該動作(a)的Q值就相當于動作執(zhí)行后的即時獎勵和下一狀態(tài)下的最佳預期獎勵乘以一個折扣率 targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa) # 用生成的Q值表訓練神經網(wǎng)絡，同時返回當前的誤差 loss += model.train_on_batch(inputs, targets) s_t = s_t1 # 下一狀態(tài)變?yōu)楫斍盃顟B(tài) t = t + 1 # 總幀數(shù)+1 # 每100次迭代存儲下當前的訓練模型 if t % 100 == 0: print('Now we save model') model.save_weights('model.h5', overwrite=True) with open('model.json', 'w') as outfile: json.dump(model.to_json(), outfile) # 輸出信息 state = '' if t <= OBSERVE: state = 'observe' elif t > OBSERVE and t <= OBSERVE + EXPLORE: state = 'explore' else: state = 'train' print('TIMESTEP', t, '/ STATE', state, \ '/ EPSILON', epsilon, '/ ACTION', action_index, '/ REWARD', r_t, \ '/ Q_MAX ' , np.max(Q_sa), '/ Loss ', loss) print('Episode finished!') print('************************')def playGame(args): model = buildmodel() # 先建立模型 trainNetwork(model,args) # 開始訓練def main(): parser = argparse.ArgumentParser(description='Description of your program') parser.add_argument('-m','--mode', help='Train / Run', required=True) #接受參數(shù) mode args = vars(parser.parse_args()) # args是字典，'mode'是鍵 playGame(args) # 開始游戲if __name__ == '__main__': main() #執(zhí)行本腳本時以main函數(shù)開始