Softmax regression with tensorflow

tensorflow

Multinomial Classification with TensorFlow

Hypothesis of multinomial classification is softmax function of affine function.

$$ H(C) = S(W \cdot X + b) $$

Labels are usually represented with one-hot encoding


import tensorflow as tf
import matplotlib.pyplot as plt

# Number of factors
nb_factors = 4
# Number of Labels
nb_labels = 3

# Input data
x_data = [[1,2,1,1,], [2,1,3,2], [3,1,3,4],\
          [4,1,5,5,], [1,7,5,5], [1,2,5,6],\
          [1,6,6,6,], [1,7,7,7]]
# Labels
y_data = [[0,0,1], [0,0,1], [0,0,1],\
          [0,1,0], [0,1,0], [0,1,0],\
          [1,0,0], [1,0,0]]

# Placeholders for Inputs and Labels
X = tf.placeholder("float", [None, nb_factors])
Y = tf.placeholder("float", [None, nb_labels])

# Weight
W = tf.Variable(\
        tf.random_normal([nb_factors, nb_labels]),\
        name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]),\
        name="bias")

# Affine function
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost = tf.reduce_mean(-tf.reduce_sum(Y* tf.log(hypothesis), axis=1))
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
                learning_rate=0.1).minimize(cost)

costs= []
# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(1001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
        costs.append(sess.run(cost, feed_dict={X: x_data, Y: y_data}))

costs_beginnig = costs[:100]
plt.plot(costs_beginnig)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost of Multinomial Classification (0 - 100)")
plt.show()

plt.plot(costs)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost of Multinomial Classification (0 - 2000)")
plt.show()

Image 1. Cost from 0 to 100 trials

Image 2. Cost

Test for Trained Hypothesis

import tensorflow as tf
import matplotlib.pyplot as plt

# Number of factors
nb_factors = 4
# Number of Labels
nb_labels = 3

# Input data
x_data = [[1,2,1,1,], [2,1,3,2], [3,1,3,4],\
          [4,1,5,5,], [1,7,5,5], [1,2,5,6],\
          [1,6,6,6,], [1,7,7,7]]
# Labels
y_data = [[0,0,1], [0,0,1], [0,0,1],\
          [0,1,0], [0,1,0], [0,1,0],\
          [1,0,0], [1,0,0]]

# Placeholders for Inputs and Labels
X = tf.placeholder("float", [None, nb_factors])
Y = tf.placeholder("float", [None, nb_labels])

# Weight
W = tf.Variable(tf.random_normal([nb_factors, nb_labels]), name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]), name="bias")

# Affine
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost = tf.reduce_mean(-tf.reduce_sum(Y* tf.log(hypothesis), axis=1))
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
                learning_rate=0.1).minimize(cost)

# Launch graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for step in range(1001):
    sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
    
result = sess.run(hypothesis, feed_dict={X:[[1,11,7,9],\
                                         [1,3,4,3],\
                                         [1,1,0,1]]})
oneHotResult = sess.run(tf.arg_max(result, 1))

print("Item   Prob.Label1   Prob.Label2   Prob.Label3   Class")
for i in range(len(result)):
    print("{0:4}   {1:10.9f}   {2:10.9f}   {3:10.9f}   {4:5}".format(\
           i, result[i][0], result[i][1], result[i][2], oneHotResult[i]))

Item   Prob.Label1   Prob.Label2   Prob.Label3   Class
   0   0.063795552   0.936126411   0.000078112       1
   1   0.691038668   0.254544556   0.054416835       0
   2   0.000003016   0.003177815   0.996819139       2

Softmax Cross Entropy With Logits

Multinomial classification is one of the most popular problem set. Therefore, TensorFlow provides softmax_cross_entropy_with_logits method to calculate cost simply.
Logit function is the inverse of sigmoid function. - Wiki

$$ l(x) = \log \frac{1-x}{x} $$

Image 3. Logit function(src: SRC)

Logit function and softmax function are integrated to softmax_cross_entropy_logits in order to transform the result of softmax function labels.

nb_labels = 3
Y = [1, 0, 2] # Not one-hot encoding
Y_ONE_HOT = tf.one_hot(Y, nb_labels)
Y_ONE_HOT = tf.reshape(Y_ONE_HOT, [-1, nb_labels])
# Affine
affine = tf.matmul(X, W) + b
# Cost function - Cross Entropy Error
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=affine, \
                                                 labels=Y_ONE_HOT)
cost = tf.reduce_mean(cost_i)

To use softmax_cross_entropy_with_logits(), labels should be one-hot encoding format.
This transformation of one-hot encoding is done by one_hot function form TensorFlow.
Unfortunately, one_hot() added one more rank. To remove the additional rank, please use reshape(). -1 in reshape means everything. Therefore, the final rank is None, 3.
Softmax_cross_entropy_with_logits() returns list of costs, so average should be calculated manually.

Animal Classification

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Number of factors
nb_factors = 16
# Number of Labels
nb_labels = 7  # 0 ~ 6

# Read data
xy = np.loadtxt("data-04-zoo.csv", delimiter=",", dtype=np.float32)
# Input data
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

# Placeholders for Inputs and Labels
X = tf.placeholder(tf.float32, [None, nb_factors])
Y = tf.placeholder(tf.int32, [None, 1])  # Before one-hot encoding

Y_ONE_HOT = tf.one_hot(Y, nb_labels)
Y_ONE_HOT = tf.reshape(Y_ONE_HOT, [-1, nb_labels])

# Weight
W = tf.Variable(tf.random_normal([nb_factors, nb_labels]),\
        name="weight")
# Bias
b = tf.Variable(tf.random_normal([nb_labels]), name="bias")

# Affine
affine = tf.matmul(X, W) + b
# Hypothesis - Softmax(Affine)
hypothesis = tf.nn.softmax(affine)
# Cost function - Cross Entropy Error
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=affine, \
                                                 labels=Y_ONE_HOT)
cost = tf.reduce_mean(cost_i)
# Optimizer - Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(\
                learning_rate=0.1).minimize(cost)

# Prediction
prediction = tf.argmax(hypothesis, 1)
# Compare predictions and given labels
correct_prediction = tf.equal(prediction, tf.argmax(Y_ONE_HOT, 1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

costs = []
accs = []
# Launch graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(2001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})
        
        cost_val, acc_val = sess.run([cost, accuracy],
                                feed_dict={X: x_data, Y: y_data})
        costs.append(cost_val)
        accs.append(acc_val)
    
plt.plot(costs)
plt.xlabel("trial")
plt.ylabel("cost")
plt.title("Cost")
plt.show()

plt.plot(accs)
plt.xlabel("trial")
plt.ylabel("accuracy")
plt.title("Accuracy")
plt.show()

Image 4. Cost

Image 5. Accuracy

Universe In Computer

Header$type=social_icons

$type=grid$count=3$meta=0$sn=0$rm=0

14. Multinomial Classification with TensorFlow

TOC

Multinomial Classification with TensorFlow

Test for Trained Hypothesis

Softmax Cross Entropy With Logits

Animal Classification

라벨:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts

$type=grid$count=3$meta=0$sn=0$rm=0

14. Multinomial Classification with TensorFlow

TOC

Multinomial Classification with TensorFlow

Test for Trained Hypothesis

Softmax Cross Entropy With Logits

Animal Classification

라벨:

SHARE:

COMMENTS

Labels

RECENT$type=list-tab$date=0$au=0$c=5

REPLIES$type=list-tab$com=0$c=4$src=recent-comments

RANDOM$type=list-tab$date=0$au=0$c=5$src=random-posts