MNIST training with numpy

tensorflow

Get MNIST

Before training MNIST, getting MNIST data set should be preceded.

import os
import pickle
import numpy as np

# File manager
class MnistManager():
    def __init__(self):
        # The number of labels
        self.nr_labels = 10

    # Transform data to one hot encoding format
    def encode_one_hot(self, X):
        T = np.zeros((X.size, self.nr_labels))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
            
        return T

    # Get MNIST data which is normalized and one hot encoded
    def getMNIST(self):
        # The pickle file for MNIST data set
        pklFile = "mnist.pkl"

        if not os.path.exists(pklFile):
            return (0, 0), (0, 0)

        dataset = None
        with open(pklFile, "rb") as f:
            dataset = pickle.load(f)

        # Data normalization
        for key in ("train_img", "test_img"):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0

        dataset["train_label"] = self.encode_one_hot(\
                                dataset["train_label"])
        dataset["test_label"] = self.encode_one_hot(\
                                dataset["test_label"])

        return (dataset["train_img"], dataset["train_label"]),\
         (dataset["test_img"], dataset["test_label"])

Single Layer Neural Network for MNIST

For MNIST data set, input layer has 784 nodes and output layer has 10 nodes.
Inut layer(1 x 784) - Single hidden layer(784 x 10) - Output Layer(1 x 10)
By using batch with size 100, row of input layer and output layer is 100.

import numpy as np

# Single layer neural network
class SingleLayerNeuralNetwork():
    def __init__(self, input_size, \
                       output_size, \
                       weight_init_std=0.01):
        # Dictionary for weights and bias
        self._params = {}
        # randn returns a sample \
        #  from the standard normal distribution which is 1.
        # Multipying with weight_init_std makes \
        #  random number array \
        #  whose standard variation is weight_init_std.
        self._params['W'] = weight_init_std * \
                    np.random.randn(input_size, output_size)
        # Set zero to all biases
        self._params["b"] = np.zeros(output_size)

    # Sigmoid function as an active function
    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

    # Softmax function as an output function
    def softmax(self, X):
        ret = None
        if x.ndim == 2:
            X = X.T
            X = X - np.max(X, axis=0)
            Y = np.exp(x) / np.sum(np.exp(X), axis=0)
            ret = Y.T
        else:
            # To avoid overflow
            X = X - np.max(X)
            ret = np.exp(X) / np.sum(np.exp(X))
    
        return ret
    
    # Cross entropy error function
    def cross_entropy_error(self, Y, labels): 
        # Translate one-hot encoded labels to answer index.
        labels = labels.argmax(axis=1)
        
        batch_size = Y.shape[0]
        log_val = np.log(Y[np.arange(batch_size), labels])
        return -np.sum(log_val) / batch_size

    # Prediction function
    def predict(self, X):
        W = self._params["W"]
        b = self._params["b"]

        # Logit
        logit = np.dot(X, W) + b
        hypothesis = self.sigmoid(logit)
        output = self.softmax(hypothesis)

        return output

    # Cost function
    def cost(self, X, labels):
        Y = self.predict(X)

        return self.cross_entropy_error(Y, labels)

    # Accuracy function
    def accuracy(self, X, labels):
        Y = self.predict(X)
        Y = np.argmax(Y, axis=1)
        labels = np.argmax(labels, axis=1)

        accuracy = np.sum(Y == labels) / float(X.shape[0])
        return accuracy

    # Numerical gradient descent function
    def numerical_gradient(self, f, X):
        h = 1e-4
        # Create array which is filled 
        #  with zero and whose size is the same as X
        grad = np.zeros_like(X)

        # Create iterator for numpy arrady
        it = np.nditer(X, \
                       flags=["multi_index"], \
                       op_flags=["readwrite"])
        while not it.finished:
            idx = it.multi_index
            tmp_val = X[idx]

            X[idx] = float(tmp_val) + h
            fxh1 = f(X)

            X[idx] = tmp_val - h
            fxh2 = f(X)

            grad[idx] = (fxh1 - fxh2) / (2 * h)

            X[idx] = tmp_val
            it.iternext()

        return grad

    # Gradient function
    def gradient(self, X, labels):
        cost_W = lambda W: self.cost(X, labels)

        grads = {}
        grads["W"] = self.numerical_gradient(cost_W, self._params["W"])
        grads["b"] = self.numerical_gradient(cost_W, self._params["b"])

        return grads
    
    # Update Weight and bias
    def update(self, grad, learning_rate):
        for key in ("W", "b"):
            self._params[key] -= learning_rate * grad[key]

Training MNIST Data Set

Train MNIST data set with Single layer neural network.
The number of instances is 6000, and batch size is 100, so one epoch takes 600 iterations.

import numpy as np
import matplotlib.pyplot as plt

# Create MNIST manager
mnistManager = MnistManager()
# Get Mnist data
(X_train, label_train), (X_test, label_test) = mnistManager.getMNIST()

# Create one layer neural network
network = SingleLayerNeuralNetwork(input_size=784, output_size=10)

# Total number of trials
iters_num = 10001
# The number of instances
train_size = X_train.shape[0]
print("Instances: {0}".format(train_size))
# How many instances will be used for one training trial
batch_size = 100
# Learning rate
learning_rate = 0.1

# Variables for graph
train_costs = []
train_accs = []
test_accs = []

# The number of iterations for one epoch
iter_per_epoch = max(train_size / batch_size, 1)
    
for i in range(iters_num):
    # Stochastic method - randomly choose data set to train
    batch_mask = np.random.choice(train_size, batch_size)
    # Training input data chosen
    X_batch = X_train[batch_mask]
    # Traing label data chosen
    label_batch = label_train[batch_mask]

    # Gradient descent to optimize weights and bias
    grad = network.gradient(X_batch, label_batch)
        
    # Update weights ans bias
    network.update(grad, learning_rate)

    # Total iteration number is 10001.
    # Check variations of cost, accuracy per epoch
    if i % iter_per_epoch == 0:
        # Cost
        cost = network.cost(X_batch, label_batch)
        train_costs.append(cost)
        # Accuracy of training data
        train_acc = network.accuracy(X_train, label_train)
        train_accs.append(train_acc)
        # Accuracy of testing data
        test_acc = network.accuracy(X_test, label_test)
        test_accs.append(test_acc)
        print("Epoch: {0}, Cost: {0:0.5f}, Train acc: {1:0.5f}, Test acc: {2:0.5f}".format(cost, train_acc, test_acc))

# Draw graph
x = np.arange(len(train_accs))
plt.plot(x, train_accs, label="train acc")
plt.plot(x, test_accs, label="test acc", linestyle="--")
plt.xlabel("trials")
plt.ylabel("accuracy")
plt.legend(loc="lower right")
plt.show()

Instances: 60000
Epoch: 0 Cost: 2.28237, Train acc: 0.17830, Test acc: 0.18860
Epoch: 600 Cost: 1.71255, Train acc: 0.84300, Test acc: 0.85470
Epoch: 1200 Cost: 1.68713, Train acc: 0.85812, Test acc: 0.86770
Epoch: 1800 Cost: 1.67359, Train acc: 0.86818, Test acc: 0.87530
Epoch: 2400 Cost: 1.66174, Train acc: 0.87053, Test acc: 0.87920
Epoch: 3000 Cost: 1.67088, Train acc: 0.87428, Test acc: 0.87980
Epoch: 3600 Cost: 1.62298, Train acc: 0.87585, Test acc: 0.88340
Epoch: 4200 Cost: 1.63926, Train acc: 0.87842, Test acc: 0.88470
Epoch: 4800 Cost: 1.62657, Train acc: 0.88058, Test acc: 0.88820
Epoch: 5400 Cost: 1.63765, Train acc: 0.88292, Test acc: 0.89020
Epoch: 6000 Cost: 1.60662, Train acc: 0.88365, Test acc: 0.89040

Image 1. Accuracy of single layer MNIST model

Universe In Computer

Header$type=social_icons

$type=grid$count=3$meta=0$sn=0$rm=0

18. MNIST Training with Numpy

TOC

Get MNIST

Single Layer Neural Network for MNIST

Training MNIST Data Set

라벨:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts

$type=grid$count=3$meta=0$sn=0$rm=0

18. MNIST Training with Numpy

TOC

Get MNIST

Single Layer Neural Network for MNIST

Training MNIST Data Set

라벨:

SHARE:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts