16. Learning Rate & Data Preprocessing with TensorFlow

kinds of data set for machine learning

tensorflow

Large Learning Rate with TensorFlow

import tensorflow as tf
import matplotlib.pyplot as plt

# Training data
x_data = [[1,2,1], [1,3,2], [1,3,4], [1,5,5], 
          [1,7,5], [1,2,5], [1,6,6], [1,7,7]]
y_data = [[0,0,1], [0,0,1], [0,0,1], [0,1,0], 
          [0,1,0], [0,1,0], [1,0,0], [1,0,0]]

# Testing data
x_test = [[2,1,1], [3,1,2], [3,3,4]]
y_test = [[0,0,1], [0,0,1], [0,0,1]]

# Placeholder for inputs and labels
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])

# Weight
W = tf.Variable(tf.random_normal([3, 3]))
# Bias
b = tf.Variable(tf.random_normal([3]))

# Hypothesis
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cost function
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis),\
                      axis=1))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(\
                learning_rate=1.5).minimize(cost)

# Prediction
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variabels
    sess.run(tf.global_variables_initializer())
    
    steps = [i for i in range(201)]
    costs = []
    Ws = []
    
    print("Weights before training:", sess.run(W))
    for i in steps:
        cost_val, W_val, _ = sess.run([cost, W, optimizer],
            feed_dict={X: x_data, Y: y_data})
        costs.append(cost_val)
    print("Weights after training:", sess.run(W))

    # Predict
    predictionResult = sess.run(prediction, \
                            feed_dict={X: x_test})
    print("Prediction")
    for i in range(len(predictionResult)):
        print("x_data[[{0}]: {1}".format(\
                        i, predictionResult[i]))
    print()
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, 
                            feed_dict={X: x_test, Y: y_test}))
    
    # Plot
    plt.plot(steps, costs)
    plt.xlabel("trials")
    plt.ylabel("cost")
    plt.title("Costs")
    plt.show()
Weights before training: [[-1.03583431 -0.5126186  -1.50135481]
 [ 0.91863215  0.36449438  0.07521305]
 [ 0.82262826  0.22673547  0.28540802]]
Weights after training: [[ nan  nan  nan]
 [ nan  nan  nan]
 [ nan  nan  nan]]
Prediction
x_data[[0]: 0
x_data[[1]: 0
x_data[[2]: 0

Accuracy:  0.0
Image 1. Cost with large learning rate
  • NaN means Not a Number - Wiki
  • In TensorFlow, NaN means too large number to handle.
  • By using large learning rate, the weights are NaN, and the cost value become weird

Small Learning Rate with TensorFlow

import tensorflow as tf
import matplotlib.pyplot as plt

# Training data
x_data = [[1,2,1], [1,3,2], [1,3,4], [1,5,5], 
          [1,7,5], [1,2,5], [1,6,6], [1,7,7]]
y_data = [[0,0,1], [0,0,1], [0,0,1], [0,1,0], 
          [0,1,0], [0,1,0], [1,0,0], [1,0,0]]

# Testing data
x_test = [[2,1,1], [3,1,2], [3,3,4]]
y_test = [[0,0,1], [0,0,1], [0,0,1]]

# Placeholder for inputs and labels
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])

# Weight
W = tf.Variable(tf.random_normal([3, 3]))
# Bias
b = tf.Variable(tf.random_normal([3]))

# Hypothesis
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cost function
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(\
                learning_rate=1e-10).minimize(cost)

# Prediction
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variabels
    sess.run(tf.global_variables_initializer())
    
    steps = [i for i in range(201)]
    costs = []
    Ws = []
    
    print("Weights before training:", sess.run(W))
    for i in steps:
        cost_val, W_val, _ = sess.run([cost, W, optimizer],
            feed_dict={X: x_data, Y: y_data})
        costs.append(cost_val)
    print("Weights after training:", sess.run(W))
    
    # Predict
    predictionResult = sess.run(prediction, \
                            feed_dict={X: x_test})
    print("Prediction")
    for i in range(len(predictionResult)):
        print("x_data[[{0}]: {1}".format(\
                        i, predictionResult[i]))
    print()
    # Calculate the accuracy
    print("Accuracy: ", sess.run(accuracy, \
                            feed_dict={X: x_test, Y: y_test}))
    
    # Plot
    plt.plot(steps, costs)
    plt.xlabel("trials")
    plt.ylabel("cost")
    plt.title("Costs")
    plt.show()
Weights before training: [[ 1.11144888 -0.4928776  -1.30249465]
 [-0.16905189 -0.16199942 -0.95170742]
 [ 0.49142101  0.46008062 -1.71425021]]
Weights after training: [[ 1.11144888 -0.4928776  -1.30249465]
 [-0.16905189 -0.16199942 -0.95170742]
 [ 0.49142101  0.46008062 -1.71425021]]
Prediction
x_data[[0]: 0
x_data[[1]: 0
x_data[[2]: 0

Accuracy:  0.0
Image 2. Cost with small learning rate
  • By using small learning rate, costs and weigths are not changed at all.

Non-normalized Inputs

  • Sometimes, non-normalized inputs leads weights to wrong direction.
import tensorflow as tf
import numpy as np

xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
               [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
               [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
               [816, 820.958984, 1008100, 815.48999, 819.23999],
               [819.359985, 823, 1188100, 818.469971, 818.97998],
               [819, 823, 1198100, 816, 820.450012],
               [811.700012, 815.25, 1098100, 809.780029, 813.669983],
               [809.51001, 816.659973, 1398100, 804.539978, 809.559998]])

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

# Placeholder for inputs and labels
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# Weight
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
# Bias
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Cost function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())

steps = [i for i in range(101)]
costs = []
Ws = []

for step in steps:
    cost_val, W_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    if step % 20 == 0:
        costs.append(cost_val)
        Ws.append(W_val)

print("Step        0                     20             40             60             100")
for i in range(len(W_val)):
    print("W[{0}]     {1:10.2f}       {2:10.2f}     {3:10.2f}     {4:10.2f}     {5:10.2f}".format(\
            i, Ws[0][i][0], Ws[1][i][0], Ws[2][i][0], Ws[3][i][0], Ws[4][i][0]))
    
print("Cost     {0:10.2f} {1:10.2f}     {2:10.2f}     {3:10.2f}     {4:10.2f}".format(\
                costs[0], costs[1], costs[2], costs[3], costs[4]))
Step        0                     20             40             60            100
W[0]      879897.62              nan            nan            nan            nan
W[1]     1772247.62              nan            nan            nan            nan
W[2]     1393975.00              nan            nan            nan            nan
W[3]      976905.25              nan            nan            nan            nan
W[4]     1151491.12              nan            nan            nan            nan
W[5]     1161192.00              nan            nan            nan            nan
W[6]     1064205.62              nan            nan            nan            nan
W[7]     1355190.38              nan            nan            nan            nan
Cost     1554994429952.00        nan            nan            nan            nan

Normalized Inputs

  • To make equally sensitive for the inputs, normalization is necessary.
  • To normalize input data, find min and max and use them as denominator.
import tensorflow as tf
import numpy as np

# Normalization
def MinMaxScaler(data):
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # Noise term prevents the zero division
    return numerator / (denominator + 1e-7)

xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
               [823.02002, 828.070007, 1828100, 821.655029, 828.070007],
               [819.929993, 824.400024, 1438100, 818.97998, 824.159973],
               [816, 820.958984, 1008100, 815.48999, 819.23999],
               [819.359985, 823, 1188100, 818.469971, 818.97998],
               [819, 823, 1198100, 816, 820.450012],
               [811.700012, 815.25, 1098100, 809.780029, 813.669983],
               [809.51001, 816.659973, 1398100, 804.539978, 809.559998]])

# Normalization
xy = MinMaxScaler(xy)

x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

# Placeholder for inputs and labels
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])

# Weight
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
# Bias
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Cost function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())

steps = [i for i in range(101)]
costs = []
Ws = []

for step in steps:
    cost_val, W_val, _ = sess.run(
        [cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
    if step % 20 == 0:
        costs.append(cost_val)
        Ws.append(W_val)

print("Step              0             20             40             60             100")
for i in range(len(W_val)):
    print("W[{0}]     {1:10.8f}     {2:10.8f}     {3:10.8f}     {4:10.8f}     {5:10.8f}".format(\
            i, Ws[0][i][0], Ws[1][i][0], Ws[2][i][0], Ws[3][i][0], Ws[4][i][0]))
    
print("Cost     {0:10.8f}     {1:10.8f}     {2:10.8f}     {3:10.8f}     {4:10.8f}".format(\
                costs[0], costs[1], costs[2], costs[3], costs[4]))
Step              0             20             40             60            100
W[0]     2.56734419     2.56579924     2.56425500     2.56271195     2.56116962
W[1]     3.84935522     3.84779811     3.84624195     3.84468651     3.84313178
W[2]     2.53057241     2.52928972     2.52800751     2.52672601     2.52544522
W[3]     1.13361228     1.13263774     1.13166404     1.13069081     1.12971830
W[4]     1.82057595     1.81941223     1.81824923     1.81708705     1.81592548
W[5]     1.83986354     1.83873773     1.83761239     1.83648801     1.83536410
W[6]     0.60744464     0.60668987     0.60593563     0.60518193     0.60442877
W[7]     1.37808812     1.37732553     1.37656343     1.37580156     1.37504029
Cost     2.66795492     2.66428232     2.66061473     2.65695190     2.65329361

COMMENTS

Name

0 weights,1,abstract class,1,active function,3,adam,2,Adapter,1,affine,2,argmax,1,back propagation,3,binary classification,3,blog,2,Bucket list,1,C++,11,Casting,1,cee,1,checkButton,1,cnn,3,col2im,1,columnspan,1,comboBox,1,concrete class,1,convolution,2,cost function,6,data preprocessing,2,data set,1,deep learning,31,Design Pattern,12,DIP,1,django,1,dnn,2,Don't Repeat Your code,1,drop out,2,ensemble,2,epoch,2,favicon,1,fcn,1,frame,1,gradient descent,5,gru,1,he,1,identify function,1,im2col,1,initialization,1,Lab,9,learning rate,2,LifeLog,1,linear regression,6,logistic function,1,logistic regression,3,logit,3,LSP,1,lstm,1,machine learning,31,matplotlib,1,menu,1,message box,1,mnist,3,mse,1,multinomial classification,3,mutli layer neural network,1,Non Virtual Interface,1,normalization,2,Note,21,numpy,4,one-hot encoding,3,OOP Principles,2,Open Close Principle,1,optimization,1,overfitting,1,padding,2,partial derivative,2,pooling,2,Prototype,1,pure virtual function,1,queue runner,1,radioButton,1,RBM,1,regularization,1,relu,2,reshape,1,restricted boltzmann machine,1,rnn,2,scrolledText,1,sigmoid,2,sigmoid function,1,single layer neural network,1,softmax,6,softmax classification,3,softmax cross entropy with logits,1,softmax function,2,softmax regression,3,softmax-with-loss,2,spinBox,1,SRP,1,standardization,1,sticky,1,stride,1,tab,1,Template Method,1,TensorFlow,31,testing data,1,this,2,tkinter,5,tooltip,1,Toplevel,1,training data,1,vanishing gradient,1,Virtual Copy Constructor,1,Virtual Destructor,1,Virtual Function,1,weight decay,1,xavier,2,xor,3,
ltr
item
Universe In Computer: 16. Learning Rate & Data Preprocessing with TensorFlow
16. Learning Rate & Data Preprocessing with TensorFlow
kinds of data set for machine learning
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s0/
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s72-c/
Universe In Computer
https://kunicom.blogspot.com/2017/07/16-learning-rate-data-preprocessing.html
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/2017/07/16-learning-rate-data-preprocessing.html
true
2543631451419919204
UTF-8
Loaded All Posts Not found any posts VIEW ALL Readmore Reply Cancel reply Delete By Home PAGES POSTS View All RECOMMENDED FOR YOU LABEL ARCHIVE SEARCH ALL POSTS Not found any post match with your request Back Home Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sun Mon Tue Wed Thu Fri Sat January February March April May June July August September October November December Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec just now 1 minute ago $$1$$ minutes ago 1 hour ago $$1$$ hours ago Yesterday $$1$$ days ago $$1$$ weeks ago more than 5 weeks ago Followers Follow THIS CONTENT IS PREMIUM Please share to unlock Copy All Code Select All Code All codes were copied to your clipboard Can not copy the codes / texts, please press [CTRL]+[C] (or CMD+C with Mac) to copy