kinds of data set for machine learning
TOC
Large Learning Rate with TensorFlow
import tensorflow as tf
import matplotlib.pyplot as plt
# Training data
x_data = [[1,2,1], [1,3,2], [1,3,4], [1,5,5],
[1,7,5], [1,2,5], [1,6,6], [1,7,7]]
y_data = [[0,0,1], [0,0,1], [0,0,1], [0,1,0],
[0,1,0], [0,1,0], [1,0,0], [1,0,0]]
# Testing data
x_test = [[2,1,1], [3,1,2], [3,3,4]]
y_test = [[0,0,1], [0,0,1], [0,0,1]]
# Placeholder for inputs and labels
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
# Weight
W = tf.Variable(tf.random_normal([3, 3]))
# Bias
b = tf.Variable(tf.random_normal([3]))
# Hypothesis
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cost function
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis),\
axis=1))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=1.5).minimize(cost)
# Prediction
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variabels
sess.run(tf.global_variables_initializer())
steps = [i for i in range(201)]
costs = []
Ws = []
print("Weights before training:", sess.run(W))
for i in steps:
cost_val, W_val, _ = sess.run([cost, W, optimizer],
feed_dict={X: x_data, Y: y_data})
costs.append(cost_val)
print("Weights after training:", sess.run(W))
# Predict
predictionResult = sess.run(prediction, \
feed_dict={X: x_test})
print("Prediction")
for i in range(len(predictionResult)):
print("x_data[[{0}]: {1}".format(\
i, predictionResult[i]))
print()
# Calculate the accuracy
print("Accuracy: ", sess.run(accuracy,
feed_dict={X: x_test, Y: y_test}))
# Plot
plt.plot(steps, costs)
plt.xlabel("trials")
plt.ylabel("cost")
plt.title("Costs")
plt.show()
Weights before training: [[-1.03583431 -0.5126186 -1.50135481]
[ 0.91863215 0.36449438 0.07521305]
[ 0.82262826 0.22673547 0.28540802]]
Weights after training: [[ nan nan nan]
[ nan nan nan]
[ nan nan nan]]
Prediction
x_data[[0]: 0
x_data[[1]: 0
x_data[[2]: 0
Accuracy: 0.0
Image 1. Cost with large learning rate
- NaN means Not a Number - Wiki
- In TensorFlow, NaN means too large number to handle.
- By using large learning rate, the weights are NaN, and the cost value become weird
Small Learning Rate with TensorFlow
import tensorflow as tf
import matplotlib.pyplot as plt
# Training data
x_data = [[1,2,1], [1,3,2], [1,3,4], [1,5,5],
[1,7,5], [1,2,5], [1,6,6], [1,7,7]]
y_data = [[0,0,1], [0,0,1], [0,0,1], [0,1,0],
[0,1,0], [0,1,0], [1,0,0], [1,0,0]]
# Testing data
x_test = [[2,1,1], [3,1,2], [3,3,4]]
y_test = [[0,0,1], [0,0,1], [0,0,1]]
# Placeholder for inputs and labels
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 3])
# Weight
W = tf.Variable(tf.random_normal([3, 3]))
# Bias
b = tf.Variable(tf.random_normal([3]))
# Hypothesis
hypothesis = tf.nn.softmax(tf.matmul(X, W) + b)
# Cost function
cost = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(hypothesis), axis=1))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=1e-10).minimize(cost)
# Prediction
prediction = tf.arg_max(hypothesis, 1)
is_correct = tf.equal(prediction, tf.argmax(Y, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
# Launch graph
with tf.Session() as sess:
# Initialize TensorFlow variabels
sess.run(tf.global_variables_initializer())
steps = [i for i in range(201)]
costs = []
Ws = []
print("Weights before training:", sess.run(W))
for i in steps:
cost_val, W_val, _ = sess.run([cost, W, optimizer],
feed_dict={X: x_data, Y: y_data})
costs.append(cost_val)
print("Weights after training:", sess.run(W))
# Predict
predictionResult = sess.run(prediction, \
feed_dict={X: x_test})
print("Prediction")
for i in range(len(predictionResult)):
print("x_data[[{0}]: {1}".format(\
i, predictionResult[i]))
print()
# Calculate the accuracy
print("Accuracy: ", sess.run(accuracy, \
feed_dict={X: x_test, Y: y_test}))
# Plot
plt.plot(steps, costs)
plt.xlabel("trials")
plt.ylabel("cost")
plt.title("Costs")
plt.show()
Weights before training: [[ 1.11144888 -0.4928776 -1.30249465]
[-0.16905189 -0.16199942 -0.95170742]
[ 0.49142101 0.46008062 -1.71425021]]
Weights after training: [[ 1.11144888 -0.4928776 -1.30249465]
[-0.16905189 -0.16199942 -0.95170742]
[ 0.49142101 0.46008062 -1.71425021]]
Prediction
x_data[[0]: 0
x_data[[1]: 0
x_data[[2]: 0
Accuracy: 0.0
Image 2. Cost with small learning rate
- By using small learning rate, costs and weigths are not changed at all.
Non-normalized Inputs
- Sometimes, non-normalized inputs leads weights to wrong direction.
import tensorflow as tf
import numpy as np
xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
[823.02002, 828.070007, 1828100, 821.655029, 828.070007],
[819.929993, 824.400024, 1438100, 818.97998, 824.159973],
[816, 820.958984, 1008100, 815.48999, 819.23999],
[819.359985, 823, 1188100, 818.469971, 818.97998],
[819, 823, 1198100, 816, 820.450012],
[811.700012, 815.25, 1098100, 809.780029, 813.669983],
[809.51001, 816.659973, 1398100, 804.539978, 809.559998]])
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Placeholder for inputs and labels
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
# Weight
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
# Bias
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Cost function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
steps = [i for i in range(101)]
costs = []
Ws = []
for step in steps:
cost_val, W_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
if step % 20 == 0:
costs.append(cost_val)
Ws.append(W_val)
print("Step 0 20 40 60 100")
for i in range(len(W_val)):
print("W[{0}] {1:10.2f} {2:10.2f} {3:10.2f} {4:10.2f} {5:10.2f}".format(\
i, Ws[0][i][0], Ws[1][i][0], Ws[2][i][0], Ws[3][i][0], Ws[4][i][0]))
print("Cost {0:10.2f} {1:10.2f} {2:10.2f} {3:10.2f} {4:10.2f}".format(\
costs[0], costs[1], costs[2], costs[3], costs[4]))
Step 0 20 40 60 100
W[0] 879897.62 nan nan nan nan
W[1] 1772247.62 nan nan nan nan
W[2] 1393975.00 nan nan nan nan
W[3] 976905.25 nan nan nan nan
W[4] 1151491.12 nan nan nan nan
W[5] 1161192.00 nan nan nan nan
W[6] 1064205.62 nan nan nan nan
W[7] 1355190.38 nan nan nan nan
Cost 1554994429952.00 nan nan nan nan
Normalized Inputs
- To make equally sensitive for the inputs, normalization is necessary.
- To normalize input data, find min and max and use them as denominator.
import tensorflow as tf
import numpy as np
# Normalization
def MinMaxScaler(data):
numerator = data - np.min(data, 0)
denominator = np.max(data, 0) - np.min(data, 0)
# Noise term prevents the zero division
return numerator / (denominator + 1e-7)
xy = np.array([[828.659973, 833.450012, 908100, 828.349976, 831.659973],
[823.02002, 828.070007, 1828100, 821.655029, 828.070007],
[819.929993, 824.400024, 1438100, 818.97998, 824.159973],
[816, 820.958984, 1008100, 815.48999, 819.23999],
[819.359985, 823, 1188100, 818.469971, 818.97998],
[819, 823, 1198100, 816, 820.450012],
[811.700012, 815.25, 1098100, 809.780029, 813.669983],
[809.51001, 816.659973, 1398100, 804.539978, 809.559998]])
# Normalization
xy = MinMaxScaler(xy)
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Placeholder for inputs and labels
X = tf.placeholder(tf.float32, shape=[None, 4])
Y = tf.placeholder(tf.float32, shape=[None, 1])
# Weight
W = tf.Variable(tf.random_normal([4, 1]), name='weight')
# Bias
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Cost function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Optimizer
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
steps = [i for i in range(101)]
costs = []
Ws = []
for step in steps:
cost_val, W_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
if step % 20 == 0:
costs.append(cost_val)
Ws.append(W_val)
print("Step 0 20 40 60 100")
for i in range(len(W_val)):
print("W[{0}] {1:10.8f} {2:10.8f} {3:10.8f} {4:10.8f} {5:10.8f}".format(\
i, Ws[0][i][0], Ws[1][i][0], Ws[2][i][0], Ws[3][i][0], Ws[4][i][0]))
print("Cost {0:10.8f} {1:10.8f} {2:10.8f} {3:10.8f} {4:10.8f}".format(\
costs[0], costs[1], costs[2], costs[3], costs[4]))
Step 0 20 40 60 100
W[0] 2.56734419 2.56579924 2.56425500 2.56271195 2.56116962
W[1] 3.84935522 3.84779811 3.84624195 3.84468651 3.84313178
W[2] 2.53057241 2.52928972 2.52800751 2.52672601 2.52544522
W[3] 1.13361228 1.13263774 1.13166404 1.13069081 1.12971830
W[4] 1.82057595 1.81941223 1.81824923 1.81708705 1.81592548
W[5] 1.83986354 1.83873773 1.83761239 1.83648801 1.83536410
W[6] 0.60744464 0.60668987 0.60593563 0.60518193 0.60442877
W[7] 1.37808812 1.37732553 1.37656343 1.37580156 1.37504029
Cost 2.66795492 2.66428232 2.66061473 2.65695190 2.65329361
COMMENTS