【pythonでニューラルネットワーク#10】ニューラルネットワーク(Dropout)
記事の目的
pythonで多値分類のニューラルネットワークにDropoutを加えて実装していきます。ここにある全てのコードは、コピペで再現することが可能です。
目次
1 今回のモデル
2 Dropout
# In[1] import numpy as np np.random.seed(1) # In[2] class Dropout: def __init__(self, ratio): self.ratio = ratio self.train = True def forward(self, x): if self.train: rand = np.random.rand(*x.shape) self.dropout = np.where(rand > self.ratio, 1, 0) self.y = x * self.dropout else: self.y = x return self.y def backward(self, dy): self.dx = dy * self.dropout return self.dx # In[3] x = np.random.rand(25) x # In[4] dropout = Dropout(0.5) # In[5] y = dropout.forward(x) y # In[6] dropout.backward(1)
3 ライブラリとデータ
# In[7] import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split np.random.seed(1) # In[8] dataset = datasets.load_digits() x = np.asarray(dataset.data) t = np.asarray(dataset.target) # In[9] image = x[0,].reshape(8,8) plt.imshow(image, cmap="binary_r") # In[10] t # In[11] x.shape # In[12] x = (x - np.average(x)) / np.std(x) # In[13] t_zero = np.zeros((len(t), 10)) for i in range(len(t_zero)): t_zero[i, t[i]] = 1 # In[14] x_train, x_test, t_train, t_test = train_test_split(x, t_zero) # In[15] x_train.shape, x_test.shape, t_train.shape, t_test.shape
4 モデル
# In[16] class Optimizer: def step(self, lr): self.w -= lr * self.dw self.b -= lr * self.db class Linear(Optimizer): def __init__(self, x_n, y_n): self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n) self.b = np.zeros(y_n) def forward(self, x): self.x = x self.y = np.dot(x, self.w) + self.b return self.y def backward(self, dy): self.dw = np.dot(self.x.T, dy) self.db = np.sum(dy, axis=0) self.dx = np.dot(dy, self.w.T) return self.dx class Relu: def forward(self, x): self.x = x y = np.where(self.x <= 0, 0, self.x) return y def backward(self, dy): dx =dy * np.where(self.x <= 0, 0, 1) return dx class CELoss: def forward(self, x, t): self.t = t self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True) L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y) return L def backward(self): dx = self.y - self.t return dx # In[17] layer1 = Linear(64,256) relu = Relu() # dropout = Dropout(0) dropout = Dropout(0.3) layer2 = Linear(256,10) celoss = CELoss() def model(x): y1 = layer1.forward(x) y2 = relu.forward(y1) y3 = dropout.forward(y2) y4 = layer2.forward(y3) return y4 def loss(y, t): L = celoss.forward(y, t) return L def backward(): dy4 = celoss.backward() dy3 = layer2.backward(dy4) dy2 = dropout.backward(dy3) dy1 = relu.backward(dy2) layer1.backward(dy1) def optimizer(lr): layer1.step(lr) layer2.step(lr)
5 モデルの学習
# In[18] batch_size = 10 batch_n = len(x_train) // batch_size batch_index = np.arange(len(x_train)) loss_train_all = [] loss_test_all = [] for epoch in range(1, 100 + 1): np.random.shuffle(batch_index) for n in range(batch_n): mb_index = batch_index[n*batch_size:(n+1)*batch_size] y = model(x_train[mb_index]) loss_train = loss(y,t_train[mb_index]) backward() optimizer(1e-3) y_train = model(x_train) loss_train = loss(y_train ,t_train) y_test = model(x_test) loss_test = loss(y_test ,t_test) loss_train_all.append(loss_train) loss_test_all.append(loss_test) if epoch == 1 or epoch % 20 == 0: print(f"Epoch {epoch}, loss_train {loss_train:.4f}, loss_test {loss_test:.4f}")
6 モデルの評価
# In[19] plt.plot(range(1,len(loss_train_all)+1), loss_train_all, label="train") plt.plot(range(1,len(loss_test_all)+1), loss_test_all, label="test") plt.legend() # In[20] def accuracy(x,t): acc = sum(model(x).argmax(axis=1) == t.argmax(axis=1))/len(t) return acc # In[21] dropout.train=False # In[22] print(accuracy(x_train,t_train), accuracy(x_test,t_test))