18. MNIST Training with Numpy

MNIST training with numpy

tensorflow

Get MNIST

  • Before training MNIST, getting MNIST data set should be preceded.
import os
import pickle
import numpy as np

# File manager
class MnistManager():
    def __init__(self):
        # The number of labels
        self.nr_labels = 10

    # Transform data to one hot encoding format
    def encode_one_hot(self, X):
        T = np.zeros((X.size, self.nr_labels))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
            
        return T

    # Get MNIST data which is normalized and one hot encoded
    def getMNIST(self):
        # The pickle file for MNIST data set
        pklFile = "mnist.pkl"

        if not os.path.exists(pklFile):
            return (0, 0), (0, 0)

        dataset = None
        with open(pklFile, "rb") as f:
            dataset = pickle.load(f)

        # Data normalization
        for key in ("train_img", "test_img"):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0

        dataset["train_label"] = self.encode_one_hot(\
                                dataset["train_label"])
        dataset["test_label"] = self.encode_one_hot(\
                                dataset["test_label"])

        return (dataset["train_img"], dataset["train_label"]),\
         (dataset["test_img"], dataset["test_label"])

Single Layer Neural Network for MNIST

  • For MNIST data set, input layer has 784 nodes and output layer has 10 nodes.
  • Inut layer(1 x 784) - Single hidden layer(784 x 10) - Output Layer(1 x 10)
  • By using batch with size 100, row of input layer and output layer is 100.
import numpy as np

# Single layer neural network
class SingleLayerNeuralNetwork():
    def __init__(self, input_size, \
                       output_size, \
                       weight_init_std=0.01):
        # Dictionary for weights and bias
        self._params = {}
        # randn returns a sample \
        #  from the standard normal distribution which is 1.
        # Multipying with weight_init_std makes \
        #  random number array \
        #  whose standard variation is weight_init_std.
        self._params['W'] = weight_init_std * \
                    np.random.randn(input_size, output_size)
        # Set zero to all biases
        self._params["b"] = np.zeros(output_size)

    # Sigmoid function as an active function
    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

    # Softmax function as an output function
    def softmax(self, X):
        ret = None
        if x.ndim == 2:
            X = X.T
            X = X - np.max(X, axis=0)
            Y = np.exp(x) / np.sum(np.exp(X), axis=0)
            ret = Y.T
        else:
            # To avoid overflow
            X = X - np.max(X)
            ret = np.exp(X) / np.sum(np.exp(X))
    
        return ret
    
    # Cross entropy error function
    def cross_entropy_error(self, Y, labels): 
        # Translate one-hot encoded labels to answer index.
        labels = labels.argmax(axis=1)
        
        batch_size = Y.shape[0]
        log_val = np.log(Y[np.arange(batch_size), labels])
        return -np.sum(log_val) / batch_size

    # Prediction function
    def predict(self, X):
        W = self._params["W"]
        b = self._params["b"]

        # Logit
        logit = np.dot(X, W) + b
        hypothesis = self.sigmoid(logit)
        output = self.softmax(hypothesis)

        return output

    # Cost function
    def cost(self, X, labels):
        Y = self.predict(X)

        return self.cross_entropy_error(Y, labels)

    # Accuracy function
    def accuracy(self, X, labels):
        Y = self.predict(X)
        Y = np.argmax(Y, axis=1)
        labels = np.argmax(labels, axis=1)

        accuracy = np.sum(Y == labels) / float(X.shape[0])
        return accuracy

    # Numerical gradient descent function
    def numerical_gradient(self, f, X):
        h = 1e-4
        # Create array which is filled 
        #  with zero and whose size is the same as X
        grad = np.zeros_like(X)

        # Create iterator for numpy arrady
        it = np.nditer(X, \
                       flags=["multi_index"], \
                       op_flags=["readwrite"])
        while not it.finished:
            idx = it.multi_index
            tmp_val = X[idx]

            X[idx] = float(tmp_val) + h
            fxh1 = f(X)

            X[idx] = tmp_val - h
            fxh2 = f(X)

            grad[idx] = (fxh1 - fxh2) / (2 * h)

            X[idx] = tmp_val
            it.iternext()

        return grad

    # Gradient function
    def gradient(self, X, labels):
        cost_W = lambda W: self.cost(X, labels)

        grads = {}
        grads["W"] = self.numerical_gradient(cost_W, self._params["W"])
        grads["b"] = self.numerical_gradient(cost_W, self._params["b"])

        return grads
    
    # Update Weight and bias
    def update(self, grad, learning_rate):
        for key in ("W", "b"):
            self._params[key] -= learning_rate * grad[key]

Training MNIST Data Set

  • Train MNIST data set with Single layer neural network.
  • The number of instances is 6000, and batch size is 100, so one epoch takes 600 iterations.
import numpy as np
import matplotlib.pyplot as plt

# Create MNIST manager
mnistManager = MnistManager()
# Get Mnist data
(X_train, label_train), (X_test, label_test) = mnistManager.getMNIST()

# Create one layer neural network
network = SingleLayerNeuralNetwork(input_size=784, output_size=10)

# Total number of trials
iters_num = 10001
# The number of instances
train_size = X_train.shape[0]
print("Instances: {0}".format(train_size))
# How many instances will be used for one training trial
batch_size = 100
# Learning rate
learning_rate = 0.1

# Variables for graph
train_costs = []
train_accs = []
test_accs = []

# The number of iterations for one epoch
iter_per_epoch = max(train_size / batch_size, 1)
    
for i in range(iters_num):
    # Stochastic method - randomly choose data set to train
    batch_mask = np.random.choice(train_size, batch_size)
    # Training input data chosen
    X_batch = X_train[batch_mask]
    # Traing label data chosen
    label_batch = label_train[batch_mask]

    # Gradient descent to optimize weights and bias
    grad = network.gradient(X_batch, label_batch)
        
    # Update weights ans bias
    network.update(grad, learning_rate)

    # Total iteration number is 10001.
    # Check variations of cost, accuracy per epoch
    if i % iter_per_epoch == 0:
        # Cost
        cost = network.cost(X_batch, label_batch)
        train_costs.append(cost)
        # Accuracy of training data
        train_acc = network.accuracy(X_train, label_train)
        train_accs.append(train_acc)
        # Accuracy of testing data
        test_acc = network.accuracy(X_test, label_test)
        test_accs.append(test_acc)
        print("Epoch: {0}, Cost: {0:0.5f}, Train acc: {1:0.5f}, Test acc: {2:0.5f}".format(cost, train_acc, test_acc))

# Draw graph
x = np.arange(len(train_accs))
plt.plot(x, train_accs, label="train acc")
plt.plot(x, test_accs, label="test acc", linestyle="--")
plt.xlabel("trials")
plt.ylabel("accuracy")
plt.legend(loc="lower right")
plt.show()
Instances: 60000
Epoch: 0 Cost: 2.28237, Train acc: 0.17830, Test acc: 0.18860
Epoch: 600 Cost: 1.71255, Train acc: 0.84300, Test acc: 0.85470
Epoch: 1200 Cost: 1.68713, Train acc: 0.85812, Test acc: 0.86770
Epoch: 1800 Cost: 1.67359, Train acc: 0.86818, Test acc: 0.87530
Epoch: 2400 Cost: 1.66174, Train acc: 0.87053, Test acc: 0.87920
Epoch: 3000 Cost: 1.67088, Train acc: 0.87428, Test acc: 0.87980
Epoch: 3600 Cost: 1.62298, Train acc: 0.87585, Test acc: 0.88340
Epoch: 4200 Cost: 1.63926, Train acc: 0.87842, Test acc: 0.88470
Epoch: 4800 Cost: 1.62657, Train acc: 0.88058, Test acc: 0.88820
Epoch: 5400 Cost: 1.63765, Train acc: 0.88292, Test acc: 0.89020
Epoch: 6000 Cost: 1.60662, Train acc: 0.88365, Test acc: 0.89040
Image 1. Accuracy of single layer MNIST model

COMMENTS

Name

0 weights,1,abstract class,1,active function,3,adam,2,Adapter,1,affine,2,argmax,1,back propagation,3,binary classification,3,blog,2,Bucket list,1,C++,11,Casting,1,cee,1,checkButton,1,cnn,3,col2im,1,columnspan,1,comboBox,1,concrete class,1,convolution,2,cost function,6,data preprocessing,2,data set,1,deep learning,31,Design Pattern,12,DIP,1,django,1,dnn,2,Don't Repeat Your code,1,drop out,2,ensemble,2,epoch,2,favicon,1,fcn,1,frame,1,gradient descent,5,gru,1,he,1,identify function,1,im2col,1,initialization,1,Lab,9,learning rate,2,LifeLog,1,linear regression,6,logistic function,1,logistic regression,3,logit,3,LSP,1,lstm,1,machine learning,31,matplotlib,1,menu,1,message box,1,mnist,3,mse,1,multinomial classification,3,mutli layer neural network,1,Non Virtual Interface,1,normalization,2,Note,21,numpy,4,one-hot encoding,3,OOP Principles,2,Open Close Principle,1,optimization,1,overfitting,1,padding,2,partial derivative,2,pooling,2,Prototype,1,pure virtual function,1,queue runner,1,radioButton,1,RBM,1,regularization,1,relu,2,reshape,1,restricted boltzmann machine,1,rnn,2,scrolledText,1,sigmoid,2,sigmoid function,1,single layer neural network,1,softmax,6,softmax classification,3,softmax cross entropy with logits,1,softmax function,2,softmax regression,3,softmax-with-loss,2,spinBox,1,SRP,1,standardization,1,sticky,1,stride,1,tab,1,Template Method,1,TensorFlow,31,testing data,1,this,2,tkinter,5,tooltip,1,Toplevel,1,training data,1,vanishing gradient,1,Virtual Copy Constructor,1,Virtual Destructor,1,Virtual Function,1,weight decay,1,xavier,2,xor,3,
ltr
item
Universe In Computer: 18. MNIST Training with Numpy
18. MNIST Training with Numpy
MNIST training with numpy
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s0/
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s72-c/
Universe In Computer
https://kunicom.blogspot.com/2017/07/18-mnist-training-with-numpy.html
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/2017/07/18-mnist-training-with-numpy.html
true
2543631451419919204
UTF-8
Loaded All Posts Not found any posts VIEW ALL Readmore Reply Cancel reply Delete By Home PAGES POSTS View All RECOMMENDED FOR YOU LABEL ARCHIVE SEARCH ALL POSTS Not found any post match with your request Back Home Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sun Mon Tue Wed Thu Fri Sat January February March April May June July August September October November December Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec just now 1 minute ago $$1$$ minutes ago 1 hour ago $$1$$ hours ago Yesterday $$1$$ days ago $$1$$ weeks ago more than 5 weeks ago Followers Follow THIS CONTENT IS PREMIUM Please share to unlock Copy All Code Select All Code All codes were copied to your clipboard Can not copy the codes / texts, please press [CTRL]+[C] (or CMD+C with Mac) to copy