defgen(batch_size=32): X = np.zeros((batch_size, height, width, 3), dtype=np.uint8) y = [np.zeros((batch_size, n_class), dtype=np.uint8) for i in range(n_len)] generator = ImageCaptcha(width=width, height=height) whileTrue: for i in range(batch_size): random_str = ''.join([random.choice(characters) for j in
range(4)]) X[i] = generator.generate_image(random_str) for j, ch in enumerate(random_str): y[j][i, :] = 0 y[j][i, characters.find(ch)] = 1 yield X, y
生成器的使用方法很簡單,只需要用 next 函數(shù)即可。下面是一個例子,生成32個數(shù)據(jù),然后顯示第一個數(shù)據(jù)。當(dāng)然,在這里我們還對生成的 One-Hot 編碼后的數(shù)據(jù)進(jìn)行了解碼,首先將它轉(zhuǎn)為 numpy 數(shù)組,然后取36個字符中最大的數(shù)字的位置,因為神經(jīng)網(wǎng)絡(luò)會輸出36個字符的概率,然后將概率最大的四個字符的編號轉(zhuǎn)換為字符串。
1 2 3 4 5 6 7
defdecode(y): y = np.argmax(np.array(y), axis=2)[:,0] return''.join([characters[x] for x in y])
X, y = next(gen(1)) plt.imshow(X[0]) plt.title(decode(y))
構(gòu)建深度卷積神經(jīng)網(wǎng)絡(luò)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
from keras.models import * from keras.layers import *
input_tensor = Input((height, width, 3)) x = input_tensor for i in range(4): x = Convolution2D(32*2**i, 3, 3, activation='relu')(x) x = Convolution2D(32*2**i, 3, 3, activation='relu')(x) x = MaxPooling2D((2, 2))(x)
x = Flatten()(x) x = Dropout(0.25)(x) x = [Dense(n_class, activation='softmax', name='c%d'%(i+1))(x) for i in range(4)] model = Model(input=input_tensor, output=x)
這個 loss 是一個特別神奇的 loss,它可以在只知道序列的順序,不知道具體位置的情況下,讓模型收斂。在這方面百度似乎做得很不錯,利用它來識別音頻信號。(warp-ctc)
https://github.com/baidu-research/warp-ctc
那么在 Keras 里面,CTC Loss 已經(jīng)內(nèi)置了,我們直接定義這樣一個函數(shù),即可實現(xiàn) CTC Loss,由于我們使用的是循環(huán)神經(jīng)網(wǎng)絡(luò),所以默認(rèn)丟掉前面兩個輸出,因為它們通常無意義,且會影響模型的輸出。
from keras.models import * from keras.layers import * rnn_size = 128
input_tensor = Input((width, height, 3)) x = input_tensor for i in range(3): x = Convolution2D(32, 3, 3, activation='relu')(x) x = Convolution2D(32, 3, 3, activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x)
conv_shape = x.get_shape() x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2]* conv_shape[3])))(x)
defgen(batch_size=128): X = np.zeros((batch_size, width, height, 3), dtype=np.uint8) y = np.zeros((batch_size, n_len), dtype=np.uint8) whileTrue: generator = ImageCaptcha(width=width, height=height) for i in range(batch_size): random_str = ''.join([random.choice(characters) for j in range(4)]) X[i] = np.array(generator.generate_image(random_str)). transpose(1, 0, 2) y[i] = [characters.find(x) for x in random_str] yield [X, y, np.ones(batch_size)*int(conv_shape[1]-2), np.ones(batch_size)*n_len], np.ones(batch_size)
評估模型
1 2 3 4 5 6 7 8 9 10 11 12 13
defevaluate(model, batch_num=10): batch_acc = 0 generator = gen() for i in range(batch_num): [X_test, y_test, _, _], _ = next(generator) y_pred = base_model.predict(X_test) shape = y_pred[:,2:,:].shape ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0] out = K.get_value(ctc_decode)[:, :4] if out.shape[1] == 4: batch_acc += ((y_test == out).sum(axis=1) == 4).mean() return batch_acc / batch_num
由于 CTC Loss 收斂很慢,所以我們需要設(shè)置比較大的代數(shù),這里我們設(shè)置了100代,然后添加了一個早期停止的回調(diào)和我們上面定義的回調(diào),但是第一次訓(xùn)練只訓(xùn)練37代就停了,測試準(zhǔn)確率才95%,我又在這個基礎(chǔ)上繼續(xù)訓(xùn)練了一次,停在了25代,得到了98%的準(zhǔn)確率,所以一共訓(xùn)練了62代。
characters2 = characters + ' ' [X_test, y_test, _, _], _ = next(gen(1)) y_pred = base_model.predict(X_test) y_pred = y_pred[:,2:,:] out = K.get_value(K.ctc_decode(y_pred, input_length= np.ones(y_pred.shape[0])*y_pred.shape[1], )[0][0])[:, :4] out = ''.join([characters[x] for x in out[0]]) y_true = ''.join([characters[x] for x in y_test[0]])