【pythonでニューラルネットワーク#9】ニューラルネットワーク(多値分類)

記事の目的

pythonで多値分類のニューラルネットワークを実装していきます。ここにある全てのコードは、コピペで再現することが可能です。

 

目次

  1. 今回のモデル
  2. ライブラリとデータ
  3. モデル
  4. モデルの学習
  5. モデルの評価

 

1 今回のモデル

 

2 ライブラリとデータ

# In[1]
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
np.random.seed(1)

# In[2]
dataset = datasets.load_digits()
x = np.asarray(dataset.data)
t = np.asarray(dataset.target)

# In[3]
x[0,]

# In[4]
image = x[0,].reshape(8,8)
plt.imshow(image, cmap="binary_r")

# In[5]
t

# In[6]
x.shape

# In[7]
x = (x - np.average(x)) / np.std(x)

# In[8]
t_zero = np.zeros((len(t), 10))
for i in range(len(t_zero)):
    t_zero[i, t[i]] = 1
t[1,], t_zero[1,]

# In[9]
x_train, x_test, t_train, t_test = train_test_split(x, t_zero)

# In[10]
x_train.shape, x_test.shape, t_train.shape, t_test.shape

 

3 モデル

# In[11]
class Optimizer:
    def step(self, lr):
        self.w -= lr * self.dw
        self.b -= lr * self.db

class Linear(Optimizer):
    def __init__(self, x_n, y_n):
        self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n)
        self.b = np.zeros(y_n)
    def forward(self, x):
        self.x = x
        self.y = np.dot(x, self.w) + self.b
        return self.y
    def backward(self, dy):
        self.dw = np.dot(self.x.T, dy)
        self.db = np.sum(dy, axis=0)
        self.dx = np.dot(dy, self.w.T)
        return self.dx
    
class Relu:
    def forward(self, x):
        self.x = x
        y = np.where(self.x <= 0, 0, self.x)
        return y
    def backward(self, dy):
        dx =dy * np.where(self.x <= 0, 0, 1)
        return dx
    
class CELoss:
    def forward(self, x, t):
        self.t = t
        self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)
        L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y)
        return L
    def backward(self):
        dx = self.y - self.t
        return dx

# In[12]
layer1 = Linear(64,32)
relu = Relu()
layer2 = Linear(32,10)
celoss = CELoss()
def model(x):
    y1 = layer1.forward(x)
    y2 = relu.forward(y1)
    y3 = layer2.forward(y2)
    return y3
def loss(y, t):
    L = celoss.forward(y, t)
    return L
def backward():
    dy3 = celoss.backward()
    dy2 = layer2.backward(dy3)
    dy1 = relu.backward(dy2)
    layer1.backward(dy1)
def optimizer(lr):
    layer1.step(lr)
    layer2.step(lr)

 

4モデルの学習

# In[13]
batch_size = 10
batch_n = len(x_train) // batch_size
batch_index = np.arange(len(x_train))

loss_train_all = []
loss_test_all = []

for epoch in range(1, 100 + 1):
    
    np.random.shuffle(batch_index)
    
    for n in range(batch_n):
        
        mb_index = batch_index[n*batch_size:(n+1)*batch_size]
        y = model(x_train[mb_index])
        loss_train = loss(y,t_train[mb_index])
        backward()
        optimizer(1e-3)
    
    y_train = model(x_train)
    loss_train = loss(y_train ,t_train)
    y_test = model(x_test)
    loss_test = loss(y_test ,t_test)
    
    loss_train_all.append(loss_train)
    loss_test_all.append(loss_test)
    

    if epoch == 1 or epoch % 20 == 0:
        print(f"Epoch {epoch}, loss_train {loss_train:.4f}, loss_test {loss_test:.4f}")

 

5 モデルの評価

# In[14]
plt.plot(range(1,len(loss_train_all)+1), loss_train_all, label="train")
plt.plot(range(1,len(loss_test_all)+1), loss_test_all, label="test")
plt.legend()

# In[15]
def accuracy(x,t):
    acc = sum(model(x).argmax(axis=1) == t.argmax(axis=1))/len(t)
    return acc

# In[16]
print(accuracy(x_train,t_train), accuracy(x_test,t_test))