MNIST training with numpy
Get MNIST
- Before training MNIST, getting MNIST data set should be preceded.
import os
import pickle
import numpy as np
# File manager
class MnistManager():
def __init__(self):
# The number of labels
self.nr_labels = 10
# Transform data to one hot encoding format
def encode_one_hot(self, X):
T = np.zeros((X.size, self.nr_labels))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
# Get MNIST data which is normalized and one hot encoded
def getMNIST(self):
# The pickle file for MNIST data set
pklFile = "mnist.pkl"
if not os.path.exists(pklFile):
return (0, 0), (0, 0)
dataset = None
with open(pklFile, "rb") as f:
dataset = pickle.load(f)
# Data normalization
for key in ("train_img", "test_img"):
dataset[key] = dataset[key].astype(np.float32)
dataset[key] /= 255.0
dataset["train_label"] = self.encode_one_hot(\
dataset["train_label"])
dataset["test_label"] = self.encode_one_hot(\
dataset["test_label"])
return (dataset["train_img"], dataset["train_label"]),\
(dataset["test_img"], dataset["test_label"])
Single Layer Neural Network for MNIST
- For MNIST data set, input layer has 784 nodes and output layer has 10 nodes.
- Inut layer(1 x 784) - Single hidden layer(784 x 10) - Output Layer(1 x 10)
- By using batch with size 100, row of input layer and output layer is 100.
import numpy as np
# Single layer neural network
class SingleLayerNeuralNetwork():
def __init__(self, input_size, \
output_size, \
weight_init_std=0.01):
# Dictionary for weights and bias
self._params = {}
# randn returns a sample \
# from the standard normal distribution which is 1.
# Multipying with weight_init_std makes \
# random number array \
# whose standard variation is weight_init_std.
self._params['W'] = weight_init_std * \
np.random.randn(input_size, output_size)
# Set zero to all biases
self._params["b"] = np.zeros(output_size)
# Sigmoid function as an active function
def sigmoid(self, X):
return 1 / (1 + np.exp(-X))
# Softmax function as an output function
def softmax(self, X):
ret = None
if x.ndim == 2:
X = X.T
X = X - np.max(X, axis=0)
Y = np.exp(x) / np.sum(np.exp(X), axis=0)
ret = Y.T
else:
# To avoid overflow
X = X - np.max(X)
ret = np.exp(X) / np.sum(np.exp(X))
return ret
# Cross entropy error function
def cross_entropy_error(self, Y, labels):
# Translate one-hot encoded labels to answer index.
labels = labels.argmax(axis=1)
batch_size = Y.shape[0]
log_val = np.log(Y[np.arange(batch_size), labels])
return -np.sum(log_val) / batch_size
# Prediction function
def predict(self, X):
W = self._params["W"]
b = self._params["b"]
# Logit
logit = np.dot(X, W) + b
hypothesis = self.sigmoid(logit)
output = self.softmax(hypothesis)
return output
# Cost function
def cost(self, X, labels):
Y = self.predict(X)
return self.cross_entropy_error(Y, labels)
# Accuracy function
def accuracy(self, X, labels):
Y = self.predict(X)
Y = np.argmax(Y, axis=1)
labels = np.argmax(labels, axis=1)
accuracy = np.sum(Y == labels) / float(X.shape[0])
return accuracy
# Numerical gradient descent function
def numerical_gradient(self, f, X):
h = 1e-4
# Create array which is filled
# with zero and whose size is the same as X
grad = np.zeros_like(X)
# Create iterator for numpy arrady
it = np.nditer(X, \
flags=["multi_index"], \
op_flags=["readwrite"])
while not it.finished:
idx = it.multi_index
tmp_val = X[idx]
X[idx] = float(tmp_val) + h
fxh1 = f(X)
X[idx] = tmp_val - h
fxh2 = f(X)
grad[idx] = (fxh1 - fxh2) / (2 * h)
X[idx] = tmp_val
it.iternext()
return grad
# Gradient function
def gradient(self, X, labels):
cost_W = lambda W: self.cost(X, labels)
grads = {}
grads["W"] = self.numerical_gradient(cost_W, self._params["W"])
grads["b"] = self.numerical_gradient(cost_W, self._params["b"])
return grads
# Update Weight and bias
def update(self, grad, learning_rate):
for key in ("W", "b"):
self._params[key] -= learning_rate * grad[key]
Training MNIST Data Set
- Train MNIST data set with Single layer neural network.
- The number of instances is 6000, and batch size is 100, so one epoch takes 600 iterations.
import numpy as np
import matplotlib.pyplot as plt
# Create MNIST manager
mnistManager = MnistManager()
# Get Mnist data
(X_train, label_train), (X_test, label_test) = mnistManager.getMNIST()
# Create one layer neural network
network = SingleLayerNeuralNetwork(input_size=784, output_size=10)
# Total number of trials
iters_num = 10001
# The number of instances
train_size = X_train.shape[0]
print("Instances: {0}".format(train_size))
# How many instances will be used for one training trial
batch_size = 100
# Learning rate
learning_rate = 0.1
# Variables for graph
train_costs = []
train_accs = []
test_accs = []
# The number of iterations for one epoch
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
# Stochastic method - randomly choose data set to train
batch_mask = np.random.choice(train_size, batch_size)
# Training input data chosen
X_batch = X_train[batch_mask]
# Traing label data chosen
label_batch = label_train[batch_mask]
# Gradient descent to optimize weights and bias
grad = network.gradient(X_batch, label_batch)
# Update weights ans bias
network.update(grad, learning_rate)
# Total iteration number is 10001.
# Check variations of cost, accuracy per epoch
if i % iter_per_epoch == 0:
# Cost
cost = network.cost(X_batch, label_batch)
train_costs.append(cost)
# Accuracy of training data
train_acc = network.accuracy(X_train, label_train)
train_accs.append(train_acc)
# Accuracy of testing data
test_acc = network.accuracy(X_test, label_test)
test_accs.append(test_acc)
print("Epoch: {0}, Cost: {0:0.5f}, Train acc: {1:0.5f}, Test acc: {2:0.5f}".format(cost, train_acc, test_acc))
# Draw graph
x = np.arange(len(train_accs))
plt.plot(x, train_accs, label="train acc")
plt.plot(x, test_accs, label="test acc", linestyle="--")
plt.xlabel("trials")
plt.ylabel("accuracy")
plt.legend(loc="lower right")
plt.show()
Instances: 60000
Epoch: 0 Cost: 2.28237, Train acc: 0.17830, Test acc: 0.18860
Epoch: 600 Cost: 1.71255, Train acc: 0.84300, Test acc: 0.85470
Epoch: 1200 Cost: 1.68713, Train acc: 0.85812, Test acc: 0.86770
Epoch: 1800 Cost: 1.67359, Train acc: 0.86818, Test acc: 0.87530
Epoch: 2400 Cost: 1.66174, Train acc: 0.87053, Test acc: 0.87920
Epoch: 3000 Cost: 1.67088, Train acc: 0.87428, Test acc: 0.87980
Epoch: 3600 Cost: 1.62298, Train acc: 0.87585, Test acc: 0.88340
Epoch: 4200 Cost: 1.63926, Train acc: 0.87842, Test acc: 0.88470
Epoch: 4800 Cost: 1.62657, Train acc: 0.88058, Test acc: 0.88820
Epoch: 5400 Cost: 1.63765, Train acc: 0.88292, Test acc: 0.89020
Epoch: 6000 Cost: 1.60662, Train acc: 0.88365, Test acc: 0.89040
Image 1. Accuracy of single layer MNIST model
COMMENTS