【pythonでニューラルネットワーク#9】ニューラルネットワーク(多値分類)
記事の目的
pythonで多値分類のニューラルネットワークを実装していきます。ここにある全てのコードは、コピペで再現することが可能です。
目次
1 今回のモデル
2 ライブラリとデータ
# In[1] import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split np.random.seed(1) # In[2] dataset = datasets.load_digits() x = np.asarray(dataset.data) t = np.asarray(dataset.target) # In[3] x[0,] # In[4] image = x[0,].reshape(8,8) plt.imshow(image, cmap="binary_r") # In[5] t # In[6] x.shape # In[7] x = (x - np.average(x)) / np.std(x) # In[8] t_zero = np.zeros((len(t), 10)) for i in range(len(t_zero)): t_zero[i, t[i]] = 1 t[1,], t_zero[1,] # In[9] x_train, x_test, t_train, t_test = train_test_split(x, t_zero) # In[10] x_train.shape, x_test.shape, t_train.shape, t_test.shape
3 モデル
# In[11] class Optimizer: def step(self, lr): self.w -= lr * self.dw self.b -= lr * self.db class Linear(Optimizer): def __init__(self, x_n, y_n): self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n) self.b = np.zeros(y_n) def forward(self, x): self.x = x self.y = np.dot(x, self.w) + self.b return self.y def backward(self, dy): self.dw = np.dot(self.x.T, dy) self.db = np.sum(dy, axis=0) self.dx = np.dot(dy, self.w.T) return self.dx class Relu: def forward(self, x): self.x = x y = np.where(self.x <= 0, 0, self.x) return y def backward(self, dy): dx =dy * np.where(self.x <= 0, 0, 1) return dx class CELoss: def forward(self, x, t): self.t = t self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True) L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y) return L def backward(self): dx = self.y - self.t return dx # In[12] layer1 = Linear(64,32) relu = Relu() layer2 = Linear(32,10) celoss = CELoss() def model(x): y1 = layer1.forward(x) y2 = relu.forward(y1) y3 = layer2.forward(y2) return y3 def loss(y, t): L = celoss.forward(y, t) return L def backward(): dy3 = celoss.backward() dy2 = layer2.backward(dy3) dy1 = relu.backward(dy2) layer1.backward(dy1) def optimizer(lr): layer1.step(lr) layer2.step(lr)
4モデルの学習
# In[13] batch_size = 10 batch_n = len(x_train) // batch_size batch_index = np.arange(len(x_train)) loss_train_all = [] loss_test_all = [] for epoch in range(1, 100 + 1): np.random.shuffle(batch_index) for n in range(batch_n): mb_index = batch_index[n*batch_size:(n+1)*batch_size] y = model(x_train[mb_index]) loss_train = loss(y,t_train[mb_index]) backward() optimizer(1e-3) y_train = model(x_train) loss_train = loss(y_train ,t_train) y_test = model(x_test) loss_test = loss(y_test ,t_test) loss_train_all.append(loss_train) loss_test_all.append(loss_test) if epoch == 1 or epoch % 20 == 0: print(f"Epoch {epoch}, loss_train {loss_train:.4f}, loss_test {loss_test:.4f}")
5 モデルの評価
# In[14] plt.plot(range(1,len(loss_train_all)+1), loss_train_all, label="train") plt.plot(range(1,len(loss_test_all)+1), loss_test_all, label="test") plt.legend() # In[15] def accuracy(x,t): acc = sum(model(x).argmax(axis=1) == t.argmax(axis=1))/len(t) return acc # In[16] print(accuracy(x_train,t_train), accuracy(x_test,t_test))