Deep Neural Network or XOR and MNIST
TOC
- XOR with 2 Layer Neural Network
- XOR with 2 Layer Wide Neural Network
- XOR with Deep Neural Network
- MNIST with Deep Neural Network
XOR with 2 Layer Neural Network
- This is default 2 layer neural network for XOR.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# From reproducibility
tf.set_random_seed(777)
# Learning rate
learning_rate = 0.1
# Inputs data
x_data = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
# Labels
y_data = [[0],
[1],
[1],
[0]]
# Inputs array
x_data = np.array(x_data, dtype=np.float32)
# Labels array
y_data = np.array(y_data, dtype=np.float32)
# Placeholder for Inputs and Labels
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None, 1])
# Weight and bias for the first layer
W1 = tf.Variable(tf.random_normal([2, 2]), name='weight1')
b1 = tf.Variable(tf.random_normal([2]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
# Weight and bias for the second layer
W2 = tf.Variable(tf.random_normal([2, 1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2)
# Cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
tf.log(1 - hypothesis))
# Optimizer
train = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Set threshold.
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
# Accuracy
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
costs= []
accs = []
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
for step in range(10001):
# Train
sess.run(train, feed_dict={X: x_data, Y: y_data})
_cost = sess.run(cost, feed_dict={
X: x_data, Y: y_data})
costs.append(_cost)
_acc = sess.run(accuracy, feed_dict={X: x_data, Y: y_data})
accs.append(_acc)
h, c, a = sess.run([hypothesis, predicted, accuracy],
feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
steps = [i for i in range(len(costs))]
plt.plot(steps, costs)
plt.title("Costs")
plt.xlabel("Steps")
plt.ylabel("Cost")
plt.show()
plt.plot(steps, accs)
plt.title("Accuracies")
plt.xlabel("Steps")
plt.ylabel("Accuracy")
plt.show()
Hypothesis: [[ 0.01272806]
[ 0.98249799]
[ 0.9886651 ]
[ 0.01084627]]
Correct: [[ 0.]
[ 1.]
[ 1.]
[ 0.]]
Accuracy: 1.0
Image 1. Cost for XOR with 2 layer neural network
Image 2. Accuracy for XOR with 2 layer neural network
XOR with 2 Layer Wide Neural Network
- From default 2 layer neural network, it is possible to make it wide to increase accuracy.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# From reproducibility
tf.set_random_seed(777)
# Learning rate
learning_rate = 0.1
# Inputs data
x_data = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
# Labels
y_data = [[0],
[1],
[1],
[0]]
# Inputs array
x_data = np.array(x_data, dtype=np.float32)
# Labels array
y_data = np.array(y_data, dtype=np.float32)
# Placeholder for Inputs and Labels
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None, 1])
# Weight and bias for the first layer
# Weight: 2 x 10
# Bias: 1 x 10
W1 = tf.Variable(tf.random_normal([2, 10]), name='weight1')
b1 = tf.Variable(tf.random_normal([10]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
# Weight and bias for the second layer
# Weight:10 x 1
# Bias: 1 x 1
W2 = tf.Variable(tf.random_normal([10, 1]), name='weight2')
b2 = tf.Variable(tf.random_normal([1]), name='bias2')
hypothesis = tf.sigmoid(tf.matmul(layer1, W2) + b2)
# Cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
tf.log(1 - hypothesis))
# Optimizer
train = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Set threshold.
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
# Accuracy
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
costs= []
accs = []
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
for step in range(10001):
# Train
sess.run(train, feed_dict={X: x_data, Y: y_data})
_cost = sess.run(cost, feed_dict={
X: x_data, Y: y_data})
costs.append(_cost)
_acc = sess.run(accuracy, feed_dict={X: x_data, Y: y_data})
accs.append(_acc)
h, c, a = sess.run([hypothesis, predicted, accuracy],
feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
steps = [i for i in range(len(costs))]
plt.plot(steps, costs)
plt.title("Costs")
plt.xlabel("Steps")
plt.ylabel("Cost")
plt.show()
plt.plot(steps, accs)
plt.title("Accuracies")
plt.xlabel("Steps")
plt.ylabel("Accuracy")
plt.show()
Hypothesis: [[ 0.00504063]
[ 0.99114835]
[ 0.99255556]
[ 0.01169373]]
Correct: [[ 0.]
[ 1.]
[ 1.]
[ 0.]]
Accuracy: 1.0
Image 3. Cost for XOR with 2 layer wide neural network
Image 4. Accuracy for XOR with 2 layer wide neural network
XOR with Deep Neural Network
- It is possible to insert more layers to increase accuracy.
- This model is Deep Neural Network (DNN).
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# From reproducibility
tf.set_random_seed(777)
# Learning rate
learning_rate = 0.1
# Inputs data
x_data = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
# Labels
y_data = [[0],
[1],
[1],
[0]]
# Inputs array
x_data = np.array(x_data, dtype=np.float32)
# Labels array
y_data = np.array(y_data, dtype=np.float32)
# Placeholder for Inputs and Labels
X = tf.placeholder(tf.float32, [None, 2])
Y = tf.placeholder(tf.float32, [None, 1])
# Weight and bias for the first layer
# Weight: 2 x 10
# Bias: 1 x 10
W1 = tf.Variable(tf.random_normal([2, 10]), name='weight1')
b1 = tf.Variable(tf.random_normal([10]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
# Weight and bias for the second layer
# Weight:10 x 10
# Bias: 1 x 10
W2 = tf.Variable(tf.random_normal([10, 10]), name='weight2')
b2 = tf.Variable(tf.random_normal([10]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
# Weight and bias for the third layer
# Weight:10 x 10
# Bias: 1 x 10
W3 = tf.Variable(tf.random_normal([10, 10]), name='weight3')
b3 = tf.Variable(tf.random_normal([10]), name='bias3')
layer3 = tf.sigmoid(tf.matmul(layer2, W3) + b3)
# Weight and bias for the fourth layer
# Weight:10 x 1
# Bias: 1 x 1
W4 = tf.Variable(tf.random_normal([10, 1]), name='weight4')
b4 = tf.Variable(tf.random_normal([10]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)
# Cost function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) *
tf.log(1 - hypothesis))
# Optimizer
train = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Set threshold.
# True if hypothesis>0.5 else False
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
# Accuracy
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))
costs= []
accs = []
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
for step in range(10001):
# Train
sess.run(train, feed_dict={X: x_data, Y: y_data})
_cost = sess.run(cost, feed_dict={
X: x_data, Y: y_data})
costs.append(_cost)
_acc = sess.run(accuracy, feed_dict={X: x_data, Y: y_data})
accs.append(_acc)
h, c, a = sess.run([hypothesis, predicted, accuracy],
feed_dict={X: x_data, Y: y_data})
print("\nHypothesis: ", h, "\nCorrect: ", c, "\nAccuracy: ", a)
steps = [i for i in range(len(costs))]
plt.plot(steps, costs)
plt.title("Costs")
plt.xlabel("Steps")
plt.ylabel("Cost")
plt.show()
plt.plot(steps, accs)
plt.title("Accuracies")
plt.xlabel("Steps")
plt.ylabel("Accuracy")
plt.show()
Hypothesis: [[ 0.00130236]
[ 0.99811018]
[ 0.99866831]
[ 0.00167277]]
Correct: [[ 0.]
[ 1.]
[ 1.]
[ 0.]]
Accuracy: 1.0
Image 5. Cost for XOR with deep neural network
Image 6. Accuracy for XOR with deep neural network
MNIST with Deep Neural Network
- For MNIST data set, DNN can be applied.
import tensorflow as tf
# Tensorflow already incldues MNNIST data set
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import random
# Get input data as one_hot encoding format
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
# Labels: 0 ~ 9
nb_labels = 10
# MNIST data image of shape 28 x 28 = 784
X = tf.placeholder(tf.float32, [None, 784])
# 0 ~ 9 digits recofnition = 10 labels
Y = tf.placeholder(tf.float32, [None, nb_labels])
# Weight
W = tf.Variable(tf.random_normal([784, nb_labels]))
# Bias
b = tf.Variable(tf.random_normal([nb_labels]))
# Weight and bias for the first layer
# Weight: 2 x 10
# Bias: 1 x 10
W1 = tf.Variable(tf.random_normal([784, 100]), name='weight1')
b1 = tf.Variable(tf.random_normal([100]), name='bias1')
layer1 = tf.sigmoid(tf.matmul(X, W1) + b1)
# Weight and bias for the second layer
# Weight:10 x 10
# Bias: 1 x 10
W2 = tf.Variable(tf.random_normal([100, 100]), name='weight2')
b2 = tf.Variable(tf.random_normal([100]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, W2) + b2)
# Weight and bias for the third layer
# Weight:10 x 10
# Bias: 1 x 10
W3 = tf.Variable(tf.random_normal([100, 100]), name='weight3')
b3 = tf.Variable(tf.random_normal([100]), name='bias3')
layer3 = tf.sigmoid(tf.matmul(layer2, W3) + b3)
# Weight and bias for the fourth layer
# Weight:10 x 1
# Bias: 1 x 1
W4 = tf.Variable(tf.random_normal([100, 10]), name='weight4')
b4 = tf.Variable(tf.random_normal([10]), name='bias4')
hypothesis = tf.sigmoid(tf.matmul(layer3, W4) + b4)
# Hypothesis - softmax
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cost
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis =1))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=0.1).minimize(cost)
# Test model
is_correct = tf.equal(tf.argmax(hypothesis, 1), \
tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# Epoch - How many times data will be trained
training_epochs = 15
# Batch - How many data will be trained at once
batch_size = 100
with tf.Session() as sess:
# Initialize TensorFlow variables
sess.run(tf.global_variables_initializer())
# training cycle
for epoch in range(training_epochs):
avg_cost = 0
# Iteration - (the number of data) / (batch size).
max_iteration = int(mnist.train.num_examples / batch_size)
for itr in range(max_iteration):
batch_xs, batch_ys = \
mnist.train.next_batch(batch_size)
c, _ = sess.run([cost, optimizer],
feed_dict={X: batch_xs, Y: batch_ys})
avg_cost += c / max_iteration
print("Epoch: {0:4d}, Cost: {1:0.9f}".format(\
epoch + 1, avg_cost))
print("Learning finished")
# Test the model using test sets
# accuracy.eval() == sess.run()
print("Accuracy: ", accuracy.eval(session=sess, \
feed_dict={X: mnist.test.images, \
Y: mnist.test.labels}))
# Get on and predict
r = random.randint(0, mnist.test.num_examples - 1)
# mnist.test.labels are one-hot encoded
print("Label: {0}".format(\
sess.run(tf.argmax(mnist.test.labels[r:r+1], 1))))
print("Prediction: {0}".format(\
sess.run(tf.argmax(hypothesis, 1), \
feed_dict = {X: mnist.test.images[r:r+1]})))
plt.imshow(mnist.test.images[r:r+1].reshape(28, 28),
cmap="Greys", interpolation="nearest")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 1, Cost: 2.728260803
Epoch: 2, Cost: 1.027512100
Epoch: 3, Cost: 0.826937820
Epoch: 4, Cost: 0.730932239
Epoch: 5, Cost: 0.669988185
Epoch: 6, Cost: 0.626827917
Epoch: 7, Cost: 0.594122318
Epoch: 8, Cost: 0.568072515
Epoch: 9, Cost: 0.546891481
Epoch: 10, Cost: 0.528336101
Epoch: 11, Cost: 0.512735213
Epoch: 12, Cost: 0.498853235
Epoch: 13, Cost: 0.486702925
Epoch: 14, Cost: 0.475801280
Epoch: 15, Cost: 0.465662283
Learning finished
Accuracy: 0.8877
Label: [8]
Prediction: [8]
Image 7. MNIST inference
COMMENTS