Softmax regression with tensorflow
TOC
- Multinimoial Classification with TensorFlow
- Test for Trained Hypothesis
- Softmax Cross Entropy With Logits
- Animal Classification
Multinomial Classification with TensorFlow
- Hypothesis of multinomial classification is softmax function of affine function.
$$ H(C) = S(W \cdot X + b) $$
- Labels are usually represented with one-hot encoding
import tensorflow as tf
import matplotlib.pyplot as plt
# Number of factors
nb_factors = 4
# Number of Labels
nb_labels = 3
# Input data
x_data = [[1,2,1,1,], [2,1,3,2], [3,1,3,4],\
[4,1,5,5,], [1,7,5,5], [1,2,5,6],\
[1,6,6,6,], [1,7,7,7]]
# Labels
y_data = [[0,0,1], [0,0,1], [0,0,1],\
[0,1,0], [0,1,0], [0,1,0],\
[1,0,0], [1,0,0]]
# Placeholders for Inputs and Labels
X = tf.placeholder("float", [None, nb_factors])
Y = tf.placeholder("float", [None, nb_labels])
# Weight
W = tf.Variable(\
tf.random_normal([nb_factors, nb_labels]),\
name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]),\
name="bias")
# Affine function
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost = tf.reduce_mean(-tf.reduce_sum(Y* tf.log(hypothesis), axis=1))
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=0.1).minimize(cost)
costs= []
# Launch graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(1001):
sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
costs.append(sess.run(cost, feed_dict={X: x_data, Y: y_data}))
costs_beginnig = costs[:100]
plt.plot(costs_beginnig)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost of Multinomial Classification (0 - 100)")
plt.show()
plt.plot(costs)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost of Multinomial Classification (0 - 2000)")
plt.show()
Image 1. Cost from 0 to 100 trials
Image 2. Cost
Test for Trained Hypothesis
import tensorflow as tf
import matplotlib.pyplot as plt
# Number of factors
nb_factors = 4
# Number of Labels
nb_labels = 3
# Input data
x_data = [[1,2,1,1,], [2,1,3,2], [3,1,3,4],\
[4,1,5,5,], [1,7,5,5], [1,2,5,6],\
[1,6,6,6,], [1,7,7,7]]
# Labels
y_data = [[0,0,1], [0,0,1], [0,0,1],\
[0,1,0], [0,1,0], [0,1,0],\
[1,0,0], [1,0,0]]
# Placeholders for Inputs and Labels
X = tf.placeholder("float", [None, nb_factors])
Y = tf.placeholder("float", [None, nb_labels])
# Weight
W = tf.Variable(tf.random_normal([nb_factors, nb_labels]), name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]), name="bias")
# Affine
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost = tf.reduce_mean(-tf.reduce_sum(Y* tf.log(hypothesis), axis=1))
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=0.1).minimize(cost)
# Launch graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(1001):
sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
result = sess.run(hypothesis, feed_dict={X:[[1,11,7,9],\
[1,3,4,3],\
[1,1,0,1]]})
oneHotResult = sess.run(tf.arg_max(result, 1))
print("Item Prob.Label1 Prob.Label2 Prob.Label3 Class")
for i in range(len(result)):
print("{0:4} {1:10.9f} {2:10.9f} {3:10.9f} {4:5}".format(\
i, result[i][0], result[i][1], result[i][2], oneHotResult[i]))
Item Prob.Label1 Prob.Label2 Prob.Label3 Class
0 0.063795552 0.936126411 0.000078112 1
1 0.691038668 0.254544556 0.054416835 0
2 0.000003016 0.003177815 0.996819139 2
Softmax Cross Entropy With Logits
- Multinomial classification is one of the most popular problem set. Therefore, TensorFlow provides softmax_cross_entropy_with_logits method to calculate cost simply.
- Logit function is the inverse of sigmoid function. - Wiki
$$ l(x) = \log \frac{1-x}{x} $$
Image 3. Logit function(src: SRC)
- Logit function and softmax function are integrated to softmax_cross_entropy_logits in order to transform the result of softmax function labels.
nb_labels = 3
Y = [1, 0, 2] # Not one-hot encoding
Y_ONE_HOT = tf.one_hot(Y, nb_labels)
Y_ONE_HOT = tf.reshape(Y_ONE_HOT, [-1, nb_labels])
# Affine
affine = tf.matmul(X, W) + b
# Cost function - Cross Entropy Error
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=affine, \
labels=Y_ONE_HOT)
cost = tf.reduce_mean(cost_i)
- To use softmax_cross_entropy_with_logits(), labels should be one-hot encoding format.
- This transformation of one-hot encoding is done by one_hot function form TensorFlow.
- Unfortunately, one_hot() added one more rank. To remove the additional rank, please use reshape(). -1 in reshape means everything. Therefore, the final rank is None, 3.
- Softmax_cross_entropy_with_logits() returns list of costs, so average should be calculated manually.
Animal Classification
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Number of factors
nb_factors = 16
# Number of Labels
nb_labels = 7 # 0 ~ 6
# Read data
xy = np.loadtxt("data-04-zoo.csv", delimiter=",", dtype=np.float32)
# Input data
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Placeholders for Inputs and Labels
X = tf.placeholder(tf.float32, [None, nb_factors])
Y = tf.placeholder(tf.int32, [None, 1]) # Before one-hot encoding
Y_ONE_HOT = tf.one_hot(Y, nb_labels)
Y_ONE_HOT = tf.reshape(Y_ONE_HOT, [-1, nb_labels])
# Weight
W = tf.Variable(tf.random_normal([nb_factors, nb_labels]),\
name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]), name="bias")
# Affine
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=affine, \
labels=Y_ONE_HOT)
cost = tf.reduce_mean(cost_i)
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
learning_rate=0.1).minimize(cost)
# Prediction
prediction = tf.argmax(hypothesis, 1)
# Compare predictions and given labels
correct_prediction = tf.equal(prediction, tf.argmax(Y_ONE_HOT, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
costs = []
accs = []
# Launch graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(2001):
sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
cost_val, acc_val = sess.run([cost, accuracy],
feed_dict={X: x_data, Y: y_data})
costs.append(cost_val)
accs.append(acc_val)
plt.plot(costs)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost")
plt.show()
plt.plot(accs)
plt.xlabel("trial")
plt.ylabel("accuracy")
plt.title("Accuracy")
plt.show()
Image 4. Cost
Image 5. Accuracy
COMMENTS