26. DNN Optimization with TensorFlow

How to optimize DNN with TensorFlow

tensorflow

Adam Optimizer

  • Gradient Descent or Stochastic Gradient Descent is a base algorithm to find optimized weights and biases.
  • However, many new and efficient optimizers are proposed. Adam is one of powerful optimizers.
Image 1. Adam(src: SRC)
  • Fortunately, TensorFlow provides Adam method.
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
  • This is an example for single layer for MNIST with Adam optimizer.
# Single layer MNIST with Adam optimizer
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for single layers
W = tf.Variable(tf.random_normal([784, 10]))
b = tf.Variable(tf.random_normal([10]))

# Default hypothesis is softmax(affine()),
#  but softmax is integrated to softmax_cross_entropy_with_logits().
#  Therefore, input of softmax_cross_entropy_with_logits() is hypothesis, now.
hypothesis = tf.matmul(X, W) + b

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.41734982719475594
Final accuracy: 0.8978000283241272
Image 1. Cost of single layer MNIST training with Adam optimizer
Image 2. Accuracy of single layer MNIST training with Adam optimizer

Multi Layer with Adam Optimizer

  • This is an example for Multi layer MNIST training with Adam optimizer.
# Multi layer MNIST with Adam optimizer
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
# For Multiple layer, activation function is ReLU.
W1 = tf.Variable(tf.random_normal([784, 256]))
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.Variable(tf.random_normal([256, 256]))
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.Variable(tf.random_normal([256, 10]))
b3 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L2, W3) + b3

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.796504732984805
Final accuracy: 0.9509000182151794
Image 3. Cost of multi layer MNIST training with Adam optimizer
Image 4. Accuracy of multi layer MNIST training with Adam optimizer

Xavier Initializer

  • This is an example for multi layer MNIST trainig with Adam optimizer and Xavier initializer.
# Multi layer MNIST with Adam optimizer and Xavier initialization
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
W1 = tf.get_variable("W1_X", shape=[784, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2_X", shape=[256, 256],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3_X", shape=[256, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L2, W3) + b3

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.011271840706811581
Final accuracy: 0.9779999852180481
Image 5. Cost of multi layer MNIST training with Adam optimizer and Xavier initializer
Image 6. Accuracy of multi layer MNIST training with Adam optimizer and Xavier initializer

Deep Neural Network with Adam Optimizer and Xavier Initializer

  • This is an example for deep neural network MNIST trainig with Adam optimizer and Xavier initializer.
# Deep layer MNIST with Adam optimizer and Xavier initialization
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Weights & bias for multiple layers
# Initialization: Xavier
W1 = tf.get_variable("W1_D", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)

W4 = tf.get_variable("W4_D", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)

W5 = tf.get_variable("W5_D", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))

hypothesis = tf.matmul(L4, W5) + b5

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
      X: mnist.test.images, Y: mnist.test.labels})
    accs.append(acc)

# Print result
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.014999570602045743
Final accuracy: 0.9779000282287598
Image 7. Cost of deep neural network MNIST training with Adam optimizer and Xavier initializer
Image 8. Accuracy of deep neural network MNIST training with Adam optimizer and Xavier initializer

Deep Neural Network with Adam Optimizer, Xavier Initializer and Drop Out

This is an example for deep neural network MNIST trainig with Adam optimizer, Xavier initializer and Drop out.

# Deep layer MNIST with Adam optimizer, Xavier initialization and Drop out
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Reproducibility
tf.set_random_seed(777)

# Get MNIST data set
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Parameters
learning_rate = 0.001
epochs = 15
batch_size = 100

# Input place holders
X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# Dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

# Weights & bias for multiple layers
# Drop out
W1 = tf.get_variable("W1_DDO", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

W2 = tf.get_variable("W2_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

W3 = tf.get_variable("W3_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

W4 = tf.get_variable("W4_DDO", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5_DDO", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5

# Prediction
prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
# Cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
# Optimizer - Adam
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize
sess = tf.Session()
sess.run(tf.global_variables_initializer())

costs = []
accs = []

# Train
for epoch in range(epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch

    costs.append(avg_cost)
    acc = sess.run(accuracy, feed_dict={
        X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1})
    accs.append(acc)

# Print results
print("Final cost: {0}".format(costs[-1]))
print("Final accuracy: {0}".format(accs[-1]))

# Draw graph
plt.plot(range(epochs), costs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("cost")
plt.title("Costs")
plt.show()

plt.plot(range(epochs), accs)
plt.grid()
plt.xlabel("epoch")
plt.ylabel("accuracy")
plt.title("Accuracies")
plt.show()
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Final cost: 0.04678738850369966
Final accuracy: 0.9828000068664551
Image 9. Cost of deep neural network MNIST training with Adam optimizer, Xavier initializer and Drop out
Image 9. Accuracy of deep neural network MNIST training with Adam optimizer, Xavier initializer and Drop out

Batch Normalization

  • TBD

COMMENTS

Name

0 weights,1,abstract class,1,active function,3,adam,2,Adapter,1,affine,2,argmax,1,back propagation,3,binary classification,3,blog,2,Bucket list,1,C++,11,Casting,1,cee,1,checkButton,1,cnn,3,col2im,1,columnspan,1,comboBox,1,concrete class,1,convolution,2,cost function,6,data preprocessing,2,data set,1,deep learning,31,Design Pattern,12,DIP,1,django,1,dnn,2,Don't Repeat Your code,1,drop out,2,ensemble,2,epoch,2,favicon,1,fcn,1,frame,1,gradient descent,5,gru,1,he,1,identify function,1,im2col,1,initialization,1,Lab,9,learning rate,2,LifeLog,1,linear regression,6,logistic function,1,logistic regression,3,logit,3,LSP,1,lstm,1,machine learning,31,matplotlib,1,menu,1,message box,1,mnist,3,mse,1,multinomial classification,3,mutli layer neural network,1,Non Virtual Interface,1,normalization,2,Note,21,numpy,4,one-hot encoding,3,OOP Principles,2,Open Close Principle,1,optimization,1,overfitting,1,padding,2,partial derivative,2,pooling,2,Prototype,1,pure virtual function,1,queue runner,1,radioButton,1,RBM,1,regularization,1,relu,2,reshape,1,restricted boltzmann machine,1,rnn,2,scrolledText,1,sigmoid,2,sigmoid function,1,single layer neural network,1,softmax,6,softmax classification,3,softmax cross entropy with logits,1,softmax function,2,softmax regression,3,softmax-with-loss,2,spinBox,1,SRP,1,standardization,1,sticky,1,stride,1,tab,1,Template Method,1,TensorFlow,31,testing data,1,this,2,tkinter,5,tooltip,1,Toplevel,1,training data,1,vanishing gradient,1,Virtual Copy Constructor,1,Virtual Destructor,1,Virtual Function,1,weight decay,1,xavier,2,xor,3,
ltr
item
Universe In Computer: 26. DNN Optimization with TensorFlow
26. DNN Optimization with TensorFlow
How to optimize DNN with TensorFlow
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s0/
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s72-c/
Universe In Computer
https://kunicom.blogspot.com/2017/08/26-dnn-optimization-with-tensorflow.html
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/2017/08/26-dnn-optimization-with-tensorflow.html
true
2543631451419919204
UTF-8
Loaded All Posts Not found any posts VIEW ALL Readmore Reply Cancel reply Delete By Home PAGES POSTS View All RECOMMENDED FOR YOU LABEL ARCHIVE SEARCH ALL POSTS Not found any post match with your request Back Home Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sun Mon Tue Wed Thu Fri Sat January February March April May June July August September October November December Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec just now 1 minute ago $$1$$ minutes ago 1 hour ago $$1$$ hours ago Yesterday $$1$$ days ago $$1$$ weeks ago more than 5 weeks ago Followers Follow THIS CONTENT IS PREMIUM Please share to unlock Copy All Code Select All Code All codes were copied to your clipboard Can not copy the codes / texts, please press [CTRL]+[C] (or CMD+C with Mac) to copy