【pythonでニューラルネットワーク#11】ニューラルネットワーク(BatchNorm)
記事の目的
pythonで多値分類のニューラルネットワークにBatchNormを加えて実装していきます。ここにある全てのコードは、コピペで再現することが可能です。
目次
1 今回のモデル
2 BatchNorm
# In[1] import numpy as np np.random.seed(1) # In[2] class BatchNorm: def __init__(self, n): self.w = np.ones(n) self.b = np.zeros(n) def forward(self, x): self.x = x self.mu = np.mean(x, axis=0) self.var = np.var(x, axis=0) self.z = (x-self.mu)/np.sqrt(self.var + 1e-7) self.y = self.w*self.z + self.b return self.y def backward(self, dy): self.dw = np.sum(dy*self.z, axis=0) self.db = np.sum(dy, axis=0) dz = self.w * dy dx = dz/np.sqrt(self.var + 1e-7) dvar = 2*(self.x-self.mu)/len(dy)* -0.5 / \ (np.sqrt(self.var + 1e-7)**3) *\ np.sum((self.x-self.mu)*dz, axis=0) dmu = -np.sum(dx + dvar, axis=0)/len(dy) self.dx = dx + dvar + dmu return self.dx # In[3] x = np.arange(15).reshape(3,5) x # In[4] bn = BatchNorm(5) # In[5] y = bn.forward(x) y # In[6] print(np.mean(y,axis=0), np.std(y,axis=0))
3 ライブラリとデータ
# In[7] import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split np.random.seed(1) # In[8] dataset = datasets.load_digits() x = np.asarray(dataset.data) t = np.asarray(dataset.target) # In[9] image = x[0,].reshape(8,8) plt.imshow(image, cmap="binary_r") # In[10] t # In[11] x.shape # In[12] x = (x - np.average(x)) / np.std(x) # In[13] t_zero = np.zeros((len(t), 10)) for i in range(len(t_zero)): t_zero[i, t[i]] = 1 # In[14] x_train, x_test, t_train, t_test = train_test_split(x, t_zero) # In[15] x_train.shape, x_test.shape, t_train.shape, t_test.shape
4 モデル
# In[16] class Optimizer: def step(self, lr): self.w -= lr * self.dw self.b -= lr * self.db class Linear(Optimizer): def __init__(self, x_n, y_n): self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n) self.b = np.zeros(y_n) def forward(self, x): self.x = x self.y = np.dot(x, self.w) + self.b return self.y def backward(self, dy): self.dw = np.dot(self.x.T, dy) self.db = np.sum(dy, axis=0) self.dx = np.dot(dy, self.w.T) return self.dx class BatchNorm(Optimizer): def __init__(self, n): self.w = np.ones(n) self.b = np.zeros(n) def forward(self, x): self.x = x self.mu = np.mean(x, axis=0) self.var = np.var(x, axis=0) self.z = (x-self.mu)/np.sqrt(self.var + 1e-7) self.y = self.w*self.z + self.b return self.y def backward(self, dy): self.dw = np.sum(dy*self.z, axis=0) self.db = np.sum(dy, axis=0) dz = self.w * dy dx = dz/np.sqrt(self.var + 1e-7) dvar = 2*(self.x-self.mu)/len(dy)* -0.5 / \ (np.sqrt(self.var + 1e-7)**3) *\ np.sum((self.x-self.mu)*dz, axis=0) dmu = -np.sum(dx + dvar, axis=0)/len(dy) self.dx = dx + dvar + dmu return self.dx class Relu: def forward(self, x): self.x = x y = np.where(self.x <= 0, 0, self.x) return y def backward(self, dy): dx =dy * np.where(self.x <= 0, 0, 1) return dx class CELoss: def forward(self, x, t): self.t = t self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True) L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y) return L def backward(self): dx = self.y - self.t return dx # In[17] layer1 = Linear(64,32) bn = BatchNorm(32) relu = Relu() layer2 = Linear(32,10) celoss = CELoss() def model(x): y1 = layer1.forward(x) y2 = bn.forward(y1) y3 = relu.forward(y2) y4 = layer2.forward(y3) return y4 def loss(y, t): L = celoss.forward(y, t) return L def backward(): dy4 = celoss.backward() dy3 = layer2.backward(dy4) dy2 = relu.backward(dy3) dy1 = bn.backward(dy2) layer1.backward(dy1) def optimizer(lr): layer1.step(lr) bn.step(lr) layer2.step(lr)
5 モデルの学習
# In[18] batch_size = 10 batch_n = len(x_train) // batch_size batch_index = np.arange(len(x_train)) loss_train_all = [] loss_test_all = [] for epoch in range(1, 100 + 1): np.random.shuffle(batch_index) for n in range(batch_n): mb_index = batch_index[n*batch_size:(n+1)*batch_size] y = model(x_train[mb_index]) loss_train = loss(y,t_train[mb_index]) backward() optimizer(1e-3) y_train = model(x_train) loss_train = loss(y_train ,t_train) y_test = model(x_test) loss_test = loss(y_test ,t_test) loss_train_all.append(loss_train) loss_test_all.append(loss_test) if epoch == 1 or epoch % 20 == 0: print(f"Epoch {epoch}, loss_train {loss_train:.4f}, loss_test {loss_test:.4f}")
6 モデルの評価
# In[19] plt.plot(range(1,len(loss_train_all)+1), loss_train_all, label="train") plt.plot(range(1,len(loss_test_all)+1), loss_test_all, label="test") plt.legend() # In[20] def accuracy(x,t): acc = sum(model(x).argmax(axis=1) == t.argmax(axis=1))/len(t) return acc # In[21] dropout.train=False # In[22] print(accuracy(x_train,t_train), accuracy(x_test,t_test))