31. RNN with TensorFlow

Implementation of RNN

tensorflow

Character Level RNN

  • The training sentence is "hihello"
  • According to the sequence, the next character of "h" will be "i" or "e". This kind of prediction is not profitable for basic DNN.
  • However, RNN can detect the sequence, and can predict the next character well.
import tensorflow as tf
import numpy as np

# Reproducibility
tf.set_random_seed(777)

# Unique characters of "hihello"
uniqueChar = ['h', 'i', 'e', 'l', 'o']

# One hot encoded data
oh = {}
oh['0'] = [0, 0, 0, 0, 0]
for i, c in enumerate(uniqueChar):
    oh[c] = oh['0'].copy()
    oh[c][i] = 1

# Relationship between input and output
#   h -> i
#   i -> h
#   h -> e
#   e -> l
#   l -> l
#   l -> o
x_data = [[0, 1, 0, 2, 3, 3]]   # hihell
x_one_hot = [[oh['h'],
              oh['i'],
              oh['h'],
              oh['e'],
              oh['l'],
              oh['l']]]

y_data = [[1, 0, 2, 3, 3, 4]]    # ihello

num_labels = len(uniqueChar)
# The size of one-hot encoded data
input_dim = len(oh['0'])
# Output from the LSTM. 5 to directly predict one-hot
hidden_size = 5
# One sentence
batch_size = 1
# How many previous data will be used to train
# In this example, all previous data will be used.
# |hihell| for 'o' == 6
sequence_length = len(y_data[0])
# Learning rate
learning_rate = 0.1

# Input and Answer
X = tf.placeholder(
    tf.float32, [None, sequence_length, input_dim])
# Because all previous data will be used to training,
# the deepest dimension of Y is the same as sequence_length
Y = tf.placeholder(tf.int32, [None, sequence_length])

# Training cell
# LSTM is better than RNN
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size)
# Initialization
initial_state = cell.zero_state(batch_size, tf.float32)
# Construct RNN with cell and input data
outputs, _states = tf.nn.dynamic_rnn(
    cell, X, initial_state=initial_state, dtype=tf.float32)

# Remove wrapper list
X_for_fc = tf.reshape(outputs, [-1, hidden_size])

# FCN
outputs = tf.contrib.layers.fully_connected(
    inputs=X_for_fc, num_outputs=num_labels, activation_fn=None)

# Reshape outputs for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_labels])
# Weights for sequence_loss
weights = tf.ones([batch_size, sequence_length])
# Cost function for RNN from TensorFlow
#  Because softmax regression is skipped, logits are previous outputs
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# Prediction
prediction = tf.argmax(outputs, axis=2)

# Train
with tf.Session() as sess:
    # Initialize RNN
    sess.run(tf.global_variables_initializer())
    for i in range(50):
        cost, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_one_hot})

        # print char using dic
        if i % 10 == 0:
            result_str = [uniqueChar[c] for c in np.squeeze(result)]
            print("Step: {0:2d}, Cost: {1:.6f}, Prediction: {2}".format(
                i, cost, ''.join(result_str)))
Step:  0, Cost: 1.643556, Prediction: llllll
Step: 10, Cost: 0.689070, Prediction: ehello
Step: 20, Cost: 0.099703, Prediction: ihello
Step: 30, Cost: 0.010674, Prediction: ihello
Step: 40, Cost: 0.003086, Prediction: ihello

Generate Parameters for RNN

  • Train a sentence which consists of several words: "if you want me"
  • With Python, it is easy to extract necessary values for RNN from input sentence.
import tensorflow as tf
import numpy as np

# Reproducibility
tf.set_random_seed(777)

# Training data
sentence = " if you want me"
uniqueChar = list(set(sentence))
idx4Char = {c: i for i, c in enumerate(uniqueChar)}

# Hyper parameters
# RNN hidden size
rnn_hidden_size = len(uniqueChar)
print("RNN hidden size: {0}".format(rnn_hidden_size))
# Number of labels
num_labels = len(idx4Char)
print("Number of Labels: {0}".format(num_labels))
# One sentence
batch_size = 1
# How many unrolling data will be trained in RNN
#  For this example, all characters are trained simultaneously.
sequence_length = len(sentence) - 1
print("Sequence length: {0}".format(sequence_length))
# Learning rate
learning_rate = 0.001

# Input and output data
input_idx = [idx4Char[c] for c in sentence]
x_data = [input_idx[:-1]]  # X data:  " if you want yo"
y_data = [input_idx[1:]]   # Y label: "if you want you"

# Placeholder for input and label
X = tf.placeholder(tf.int32, [None, sequence_length])  # X data
Y = tf.placeholder(tf.int32, [None, sequence_length])  # Y label

# One hot encoded data
X_one_hot = tf.one_hot(X, num_labels)
print("One hot encoded data for X: {0}".format(X_one_hot.shape))

cell = tf.contrib.rnn.BasicLSTMCell(
    num_units=rnn_hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
rnn_output, _states = tf.nn.dynamic_rnn(
    cell, X_one_hot, initial_state=initial_state, dtype=tf.float32)

# Reshape rnn output data for FCN
output_for_fcn = tf.reshape(rnn_output, [-1, rnn_hidden_size])
print("Reshaped output data: {0}".format(output_for_fcn.shape))

# FCN
affine = tf.contrib.layers.fully_connected(
    inputs=output_for_fcn, num_outputs=num_labels, activation_fn=None)

# Reshape affine for sequence_loss
output = tf.reshape(affine, [batch_size, sequence_length, num_labels])
# Weights for sequence_loss
weights = tf.ones([batch_size, sequence_length])

# Cost function for RNN from Tensorflow
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=output, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# Prediction
prediction = tf.argmax(output, axis=2)

# Train
with tf.Session() as sess:
    # Initialize RNN
    sess.run(tf.global_variables_initializer())

    for i in range(1000):
        cost, _op = sess.run([loss, train], feed_dict={X: x_data, Y: y_data})
        result = sess.run(prediction, feed_dict={X: x_data})

        # Print char using dic
        if i % 100 == 0:
            result_str = [uniqueChar[c] for c in np.squeeze(result)]
            print("Step: {0:5d}, Cost: {1:.6f}, Prediction: {2}".format(
                i, cost, ''.join(result_str)))
RNN hidden size: 12
Number of Labels: 12
Sequence length: 14
One hot encoded data for X: (?, 14, 12)
Reshaped output data: (14, 12)
Step:     0, Cost: 2.482816, Prediction: auaawwwwaawwww
Step:   100, Cost: 2.117091, Prediction:              n
Step:   200, Cost: 1.438025, Prediction: yo y   want me
Step:   300, Cost: 0.947428, Prediction: if yo  want me
Step:   400, Cost: 0.606214, Prediction: if you want me
Step:   500, Cost: 0.363260, Prediction: if you want me
Step:   600, Cost: 0.221725, Prediction: if you want me
Step:   700, Cost: 0.140786, Prediction: if you want me
Step:   800, Cost: 0.097087, Prediction: if you want me
Step:   900, Cost: 0.071458, Prediction: if you want me

Long Data Set Training with Batch

  • Train a sentence which consists of long sentence: "if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea."
  • For long data set, it is possible to split them into several pieces and train them as batched data.
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn

# Reproducibility
tf.set_random_seed(777)

# Training datga
sentence = (" if you want to build a ship, don't drum up people together to "
            "collect wood and don't assign them tasks and work, but rather "
            "teach them to long for the endless immensity of the sea.")
uniqueChar = list(set(sentence))
idx4Char = {w: i for i, w in enumerate(uniqueChar)}

# Hyper parameters
# Data dimension
data_dim = len(uniqueChar)
# Hidden size
hidden_size = len(uniqueChar)
# Number of labels
num_labels = len(uniqueChar)
# Sequence length
sequence_length = 10
# Learning rate
learning_rate = 0.01
# Number of cells
num_cells = 7

# Split the sentence and make input and output data
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]

    # Data translated to index
    x = [idx4Char[c] for c in x_str]
    y = [idx4Char[c] for c in y_str]

    # Store index data to input and output lists
    dataX.append(x)
    dataY.append(y)

# Batch size
batch_size = len(dataX)

# Placeholders for input and output
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])

# One hot encoded data
X_one_hot = tf.one_hot(X, num_labels)

# Make a LSTM cell with hidden_size
def lstm_cell():
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
    return cell

# Make multiple LSTM cells
multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(num_cells)])

# Size of output = sequence_length * hidden size
# Size of state = hidden size
rnn_output, _states = tf.nn.dynamic_rnn(
    multi_cells, X_one_hot, dtype=tf.float32)

# Reshape rnn output data for FCN
output_for_fc = tf.reshape(rnn_output, [-1, hidden_size])

# FCN
affine = tf.contrib.layers.fully_connected(
    output_for_fc, num_labels, activation_fn=None)

# Reshape affine for sequence_loss
output = tf.reshape(affine, [batch_size, sequence_length, num_labels])

# The initial value of all weights is 1.
weights = tf.ones([batch_size, sequence_length])

# Cost function for RNN from TensorFlow
sequence_loss = tf.contrib.seq2seq.sequence_loss(
    logits=output, targets=Y, weights=weights)
# average of cost
mean_loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train_op = tf.train.AdamOptimizer(
    learning_rate=learning_rate).minimize(mean_loss)

def printResult(_step, _cost, _results):
    print("Step: {0:4d}, Cost: {1:.6f}, Sentence: ".format(_step, _cost), end='')
    for j, result in enumerate(_results):
        index = np.argmax(result, axis=1)
        # Print all for the first result
        if j is 0:
            print(''.join([uniqueChar[t] for t in index]), end='')
        else:
            print(uniqueChar[index[-1]], end='')
    print("")

with tf.Session() as sess:
    # Initialize RNN
    sess.run(tf.global_variables_initializer())

    max = 1000
    cost = 0
    # Train
    for i in range(max):
        _op, cost, results = sess.run(
            [train_op, mean_loss, output], feed_dict={X: dataX, Y: dataY})
        if i % 100 == 0:
            #print("Step: {0:4d}, Cost: {1:.6f}".format(i, cost))
            printResult(i, cost, results)

    # Let's print the last char of each result to check it works
    results = sess.run(output, feed_dict={X: dataX})
    printResult(max, cost, results)
Step:    0, Cost: 3.218867, Sentence: n,,,,,,nnnnnnnn,,npnnnneeeieeeii..fi nnnnnppiiiiiii   nn nnnnnfooonnnnnnnnsnnnnnnnneppnnnnnhphii.ckfnnoonpppiiepppnn,nnwe,eepnpbnoomnnnnbffnoonnnnnnnnnnnnnnnoo nrse.c.iiimmff.nnnnn
Step:  100, Cost: 2.839131, Sentence:                                                                                                                                                                                     
Step:  200, Cost: 2.398796, Sentence: tt t tot  t t  tot t d dot t d  to t  t to t  t   t        t  t t   t t  t t t d  to t toti t    t  t ti t d  ti tot t      t     t    t  t  t t   t   t t   tot t  toti to t   tot 
Step:  300, Cost: 1.294974, Sentence: tt dododont doe'uild a thipi don't douc dp deodle ths them toe'odlect dood aod don't ausign them tosks and dorkt aut aother thech them toetoep dor the thdless imm ntiiy ap aoe teal
Step:  400, Cost: 0.691762, Sentence: tm fouowant do build a ship, don't d um up people together to collect wood and don't assign them tosks and work, but rather thach them to bong for the tndless immensity of the sea.
Step:  500, Cost: 0.453372, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step:  600, Cost: 0.362509, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step:  700, Cost: 0.318497, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step:  800, Cost: 0.293086, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step:  900, Cost: 0.277740, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 1000, Cost: 0.268900, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
  • It is difficult to train the first character, "i". However, others are the same as the training sentence.

COMMENTS

Name

0 weights,1,abstract class,1,active function,3,adam,2,Adapter,1,affine,2,argmax,1,back propagation,3,binary classification,3,blog,2,Bucket list,1,C++,11,Casting,1,cee,1,checkButton,1,cnn,3,col2im,1,columnspan,1,comboBox,1,concrete class,1,convolution,2,cost function,6,data preprocessing,2,data set,1,deep learning,31,Design Pattern,12,DIP,1,django,1,dnn,2,Don't Repeat Your code,1,drop out,2,ensemble,2,epoch,2,favicon,1,fcn,1,frame,1,gradient descent,5,gru,1,he,1,identify function,1,im2col,1,initialization,1,Lab,9,learning rate,2,LifeLog,1,linear regression,6,logistic function,1,logistic regression,3,logit,3,LSP,1,lstm,1,machine learning,31,matplotlib,1,menu,1,message box,1,mnist,3,mse,1,multinomial classification,3,mutli layer neural network,1,Non Virtual Interface,1,normalization,2,Note,21,numpy,4,one-hot encoding,3,OOP Principles,2,Open Close Principle,1,optimization,1,overfitting,1,padding,2,partial derivative,2,pooling,2,Prototype,1,pure virtual function,1,queue runner,1,radioButton,1,RBM,1,regularization,1,relu,2,reshape,1,restricted boltzmann machine,1,rnn,2,scrolledText,1,sigmoid,2,sigmoid function,1,single layer neural network,1,softmax,6,softmax classification,3,softmax cross entropy with logits,1,softmax function,2,softmax regression,3,softmax-with-loss,2,spinBox,1,SRP,1,standardization,1,sticky,1,stride,1,tab,1,Template Method,1,TensorFlow,31,testing data,1,this,2,tkinter,5,tooltip,1,Toplevel,1,training data,1,vanishing gradient,1,Virtual Copy Constructor,1,Virtual Destructor,1,Virtual Function,1,weight decay,1,xavier,2,xor,3,
ltr
item
Universe In Computer: 31. RNN with TensorFlow
31. RNN with TensorFlow
Implementation of RNN
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s0/
https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiE9QfIQg9MqxmXv8wo1jRHrMgva3N0n9uaoJIHiM44Vt8k6nlufCwcOrXM4piATO-QqQmLgh_JEZUv2KXJVRIATvdu0xwckn-JPaRyfJpu9tFP929dbQgKHcd0zfVFfe9EjSkH18A4MxU4/s72-c/
Universe In Computer
https://kunicom.blogspot.com/2017/08/toc-character-level-rnn-generate.html
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/
https://kunicom.blogspot.com/2017/08/toc-character-level-rnn-generate.html
true
2543631451419919204
UTF-8
Loaded All Posts Not found any posts VIEW ALL Readmore Reply Cancel reply Delete By Home PAGES POSTS View All RECOMMENDED FOR YOU LABEL ARCHIVE SEARCH ALL POSTS Not found any post match with your request Back Home Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sun Mon Tue Wed Thu Fri Sat January February March April May June July August September October November December Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec just now 1 minute ago $$1$$ minutes ago 1 hour ago $$1$$ hours ago Yesterday $$1$$ days ago $$1$$ weeks ago more than 5 weeks ago Followers Follow THIS CONTENT IS PREMIUM Please share to unlock Copy All Code Select All Code All codes were copied to your clipboard Can not copy the codes / texts, please press [CTRL]+[C] (or CMD+C with Mac) to copy