How to optimize DNN with TensorFlow

tensorflow

Adam Optimizer

Gradient Descent or Stochastic Gradient Descent is a base algorithm to find optimized weights and biases.
However, many new and efficient optimizers are proposed. Adam is one of powerful optimizers.

Image 1. Adam(src: SRC)

Fortunately, TensorFlow provides Adam method.

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

This is an example for single layer for MNIST with Adam optimizer.

# Single layer MNIST with Adam optimizer
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for single layers
W = tf.Variable(tf.random_normal([784, 10]))
b = tf.Variable(tf.random_normal([10]))

# Default hypothesis is softmax(affine()),
#  but softmax is integrated to softmax_cross_entropy_with_logits().
#  Therefore, input of softmax_cross_entropy_with_logits() is hypothesis, now.
hypothesis = tf.matmul(X, W) + b

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.41734982719475594
Final accuracy: 0.8978000283241272

Image 1. Cost of single layer MNIST training with Adam optimizer

Image 2. Accuracy of single layer MNIST training with Adam optimizer

Multi Layer with Adam Optimizer

This is an example for Multi layer MNIST training with Adam optimizer.

# Multi layer MNIST with Adam optimizer
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
# For Multiple layer, activation function is ReLU.
W1 = tf.Variable(tf.random_normal([784, 256]))
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.Variable(tf.random_normal([256, 256]))
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.Variable(tf.random_normal([256, 10]))
b3 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L2, W3) + b3

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.796504732984805
Final accuracy: 0.9509000182151794

Image 3. Cost of multi layer MNIST training with Adam optimizer

Image 4. Accuracy of multi layer MNIST training with Adam optimizer

Xavier Initializer

This is an example for multi layer MNIST trainig with Adam optimizer and Xavier initializer.

# Multi layer MNIST with Adam optimizer and Xavier initialization
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
W1 = tf.get_variable("W1_X", shape=[784, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2_X", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3_X", shape=[256, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L2, W3) + b3

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.011271840706811581
Final accuracy: 0.9779999852180481

Image 5. Cost of multi layer MNIST training with Adam optimizer and Xavier initializer

Image 6. Accuracy of multi layer MNIST training with Adam optimizer and Xavier initializer

Deep Neural Network with Adam Optimizer and Xavier Initializer

This is an example for deep neural network MNIST trainig with Adam optimizer and Xavier initializer.

# Deep layer MNIST with Adam optimizer and Xavier initialization
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
# Initialization: Xavier
W1 = tf.get_variable("W1_D", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)

W4 = tf.get_variable("W4_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)

W5 = tf.get_variable("W5_D", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L4, W5) + b5

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.014999570602045743
Final accuracy: 0.9779000282287598

Image 7. Cost of deep neural network MNIST training with Adam optimizer and Xavier initializer

Image 8. Accuracy of deep neural network MNIST training with Adam optimizer and Xavier initializer

Deep Neural Network with Adam Optimizer, Xavier Initializer and Drop Out

This is an example for deep neural network MNIST trainig with Adam optimizer, Xavier initializer and Drop out.

# Deep layer MNIST with Adam optimizer, Xavier initialization and Drop out
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

# Weights & bias for multiple layers
# Drop out
W1 = tf.get_variable("W1_DDO", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

W2 = tf.get_variable("W2_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

W3 = tf.get_variable("W3_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

W4 = tf.get_variable("W4_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5_DDO", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
        X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1})
    accs.append(acc)

# Print results
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.04678738850369966
Final accuracy: 0.9828000068664551

Image 9. Cost of deep neural network MNIST training with Adam optimizer, Xavier initializer and Drop out

Image 9. Accuracy of deep neural network MNIST training with Adam optimizer, Xavier initializer and Drop out

Universe In Computer

Header$type=social_icons

$type=grid$count=3$meta=0$sn=0$rm=0

26. DNN Optimization with TensorFlow

TOC

Adam Optimizer

Multi Layer with Adam Optimizer

Xavier Initializer

Deep Neural Network with Adam Optimizer and Xavier Initializer

Deep Neural Network with Adam Optimizer, Xavier Initializer and Drop Out

Batch Normalization

라벨:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts

$type=grid$count=3$meta=0$sn=0$rm=0

26. DNN Optimization with TensorFlow

TOC

Adam Optimizer

Multi Layer with Adam Optimizer

Xavier Initializer

Deep Neural Network with Adam Optimizer and Xavier Initializer

Deep Neural Network with Adam Optimizer, Xavier Initializer and Drop Out

Batch Normalization

라벨:

SHARE:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts