【pythonでニューラルネットワーク#11】ニューラルネットワーク(BatchNorm)

記事の目的

pythonで多値分類のニューラルネットワークにBatchNormを加えて実装していきます。ここにある全てのコードは、コピペで再現することが可能です。

 

目次

  1. 今回のモデル
  2. BatchNorm
  3. ライブラリとデータ
  4. モデル
  5. モデルの学習
  6. モデルの評価

 

1 今回のモデル

 

2 BatchNorm

# In[1]
import numpy as np
np.random.seed(1)

# In[2]
class BatchNorm:
    def __init__(self, n):
        self.w = np.ones(n)
        self.b = np.zeros(n)
            
    def forward(self, x):
        self.x = x
        self.mu = np.mean(x, axis=0)
        self.var = np.var(x, axis=0)
        self.z = (x-self.mu)/np.sqrt(self.var + 1e-7)
        self.y = self.w*self.z + self.b
        return self.y

    def backward(self, dy):
        self.dw = np.sum(dy*self.z, axis=0)
        self.db = np.sum(dy, axis=0)
        dz = self.w * dy
        dx = dz/np.sqrt(self.var + 1e-7)
        dvar = 2*(self.x-self.mu)/len(dy)* -0.5 / \
                (np.sqrt(self.var + 1e-7)**3) *\
                np.sum((self.x-self.mu)*dz, axis=0)
        dmu = -np.sum(dx + dvar, axis=0)/len(dy)
        self.dx = dx + dvar + dmu
        return self.dx

# In[3]
x = np.arange(15).reshape(3,5)
x

# In[4]
bn = BatchNorm(5)

# In[5]
y = bn.forward(x)
y

# In[6]
print(np.mean(y,axis=0), np.std(y,axis=0))

 

3 ライブラリとデータ

# In[7]
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
np.random.seed(1)

# In[8]
dataset = datasets.load_digits()
x = np.asarray(dataset.data)
t = np.asarray(dataset.target)

# In[9]
image = x[0,].reshape(8,8)
plt.imshow(image, cmap="binary_r")

# In[10]
t

# In[11]
x.shape

# In[12]
x = (x - np.average(x)) / np.std(x)

# In[13]
t_zero = np.zeros((len(t), 10))
for i in range(len(t_zero)):
    t_zero[i, t[i]] = 1

# In[14]
x_train, x_test, t_train, t_test = train_test_split(x, t_zero)

# In[15]
x_train.shape, x_test.shape, t_train.shape, t_test.shape

 

4 モデル

# In[16]
class Optimizer:
    def step(self, lr):
        self.w -= lr * self.dw
        self.b -= lr * self.db

class Linear(Optimizer):
    def __init__(self, x_n, y_n):
        self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n)
        self.b = np.zeros(y_n)
    def forward(self, x):
        self.x = x
        self.y = np.dot(x, self.w) + self.b
        return self.y
    def backward(self, dy):
        self.dw = np.dot(self.x.T, dy)
        self.db = np.sum(dy, axis=0)
        self.dx = np.dot(dy, self.w.T)
        return self.dx
    
class BatchNorm(Optimizer):
    def __init__(self, n):
        self.w = np.ones(n)
        self.b = np.zeros(n)
    def forward(self, x):
        self.x = x
        self.mu = np.mean(x, axis=0)
        self.var = np.var(x, axis=0)
        self.z = (x-self.mu)/np.sqrt(self.var + 1e-7)
        self.y = self.w*self.z + self.b
        return self.y
    def backward(self, dy):
        self.dw = np.sum(dy*self.z, axis=0)
        self.db = np.sum(dy, axis=0)
        dz = self.w * dy
        dx = dz/np.sqrt(self.var + 1e-7)
        dvar = 2*(self.x-self.mu)/len(dy)* -0.5 / \
                (np.sqrt(self.var + 1e-7)**3) *\
                np.sum((self.x-self.mu)*dz, axis=0)
        dmu = -np.sum(dx + dvar, axis=0)/len(dy)
        self.dx = dx + dvar + dmu
        return self.dx    
    
class Relu:
    def forward(self, x):
        self.x = x
        y = np.where(self.x <= 0, 0, self.x)
        return y
    def backward(self, dy):
        dx =dy * np.where(self.x <= 0, 0, 1)
        return dx
    
class CELoss:
    def forward(self, x, t):
        self.t = t
        self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True) 
        L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y)
        return L
    def backward(self):
        dx = self.y - self.t
        return dx

# In[17]
layer1 = Linear(64,32)
bn = BatchNorm(32)
relu = Relu()
layer2 = Linear(32,10)
celoss = CELoss()
def model(x):
    y1 = layer1.forward(x)
    y2 = bn.forward(y1)
    y3 = relu.forward(y2)
    y4 = layer2.forward(y3)
    return y4
def loss(y, t):
    L = celoss.forward(y, t)
    return L
def backward():
    dy4 = celoss.backward()
    dy3 = layer2.backward(dy4)
    dy2 = relu.backward(dy3)
    dy1 = bn.backward(dy2)
    layer1.backward(dy1)
def optimizer(lr):
    layer1.step(lr)
    bn.step(lr)
    layer2.step(lr)

 

5 モデルの学習

# In[18]
batch_size = 10
batch_n = len(x_train) // batch_size
batch_index = np.arange(len(x_train))

loss_train_all = []
loss_test_all = []

for epoch in range(1, 100 + 1):
    
    np.random.shuffle(batch_index)
    
    for n in range(batch_n):
        
        mb_index = batch_index[n*batch_size:(n+1)*batch_size]
        y = model(x_train[mb_index])
        loss_train = loss(y,t_train[mb_index])
        backward()
        optimizer(1e-3)
    
    y_train = model(x_train)
    loss_train = loss(y_train ,t_train)
    y_test = model(x_test)
    loss_test = loss(y_test ,t_test)
    
    loss_train_all.append(loss_train)
    loss_test_all.append(loss_test)
    

    if epoch == 1 or epoch % 20 == 0:
        print(f"Epoch {epoch}, loss_train {loss_train:.4f}, loss_test {loss_test:.4f}")

 

6 モデルの評価

# In[19]
plt.plot(range(1,len(loss_train_all)+1), loss_train_all, label="train")
plt.plot(range(1,len(loss_test_all)+1), loss_test_all, label="test")
plt.legend()

# In[20]
def accuracy(x,t):
    acc = sum(model(x).argmax(axis=1) == t.argmax(axis=1))/len(t)
    return acc

# In[21]
dropout.train=False

# In[22]
print(accuracy(x_train,t_train), accuracy(x_test,t_test))