Implementation of RNN
Character Level RNN
- The training sentence is "hihello"
- According to the sequence, the next character of "h" will be "i" or "e". This kind of prediction is not profitable for basic DNN.
- However, RNN can detect the sequence, and can predict the next character well.
import tensorflow as tf
import numpy as np
# Reproducibility
tf.set_random_seed(777)
# Unique characters of "hihello"
uniqueChar = ['h', 'i', 'e', 'l', 'o']
# One hot encoded data
oh = {}
oh['0'] = [0, 0, 0, 0, 0]
for i, c in enumerate(uniqueChar):
oh[c] = oh['0'].copy()
oh[c][i] = 1
# Relationship between input and output
# h -> i
# i -> h
# h -> e
# e -> l
# l -> l
# l -> o
x_data = [[0, 1, 0, 2, 3, 3]] # hihell
x_one_hot = [[oh['h'],
oh['i'],
oh['h'],
oh['e'],
oh['l'],
oh['l']]]
y_data = [[1, 0, 2, 3, 3, 4]] # ihello
num_labels = len(uniqueChar)
# The size of one-hot encoded data
input_dim = len(oh['0'])
# Output from the LSTM. 5 to directly predict one-hot
hidden_size = 5
# One sentence
batch_size = 1
# How many previous data will be used to train
# In this example, all previous data will be used.
# |hihell| for 'o' == 6
sequence_length = len(y_data[0])
# Learning rate
learning_rate = 0.1
# Input and Answer
X = tf.placeholder(
tf.float32, [None, sequence_length, input_dim])
# Because all previous data will be used to training,
# the deepest dimension of Y is the same as sequence_length
Y = tf.placeholder(tf.int32, [None, sequence_length])
# Training cell
# LSTM is better than RNN
cell = tf.contrib.rnn.BasicLSTMCell(num_units=hidden_size)
# Initialization
initial_state = cell.zero_state(batch_size, tf.float32)
# Construct RNN with cell and input data
outputs, _states = tf.nn.dynamic_rnn(
cell, X, initial_state=initial_state, dtype=tf.float32)
# Remove wrapper list
X_for_fc = tf.reshape(outputs, [-1, hidden_size])
# FCN
outputs = tf.contrib.layers.fully_connected(
inputs=X_for_fc, num_outputs=num_labels, activation_fn=None)
# Reshape outputs for sequence_loss
outputs = tf.reshape(outputs, [batch_size, sequence_length, num_labels])
# Weights for sequence_loss
weights = tf.ones([batch_size, sequence_length])
# Cost function for RNN from TensorFlow
# Because softmax regression is skipped, logits are previous outputs
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=outputs, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# Prediction
prediction = tf.argmax(outputs, axis=2)
# Train
with tf.Session() as sess:
# Initialize RNN
sess.run(tf.global_variables_initializer())
for i in range(50):
cost, _ = sess.run([loss, train], feed_dict={X: x_one_hot, Y: y_data})
result = sess.run(prediction, feed_dict={X: x_one_hot})
# print char using dic
if i % 10 == 0:
result_str = [uniqueChar[c] for c in np.squeeze(result)]
print("Step: {0:2d}, Cost: {1:.6f}, Prediction: {2}".format(
i, cost, ''.join(result_str)))
Step: 0, Cost: 1.643556, Prediction: llllll
Step: 10, Cost: 0.689070, Prediction: ehello
Step: 20, Cost: 0.099703, Prediction: ihello
Step: 30, Cost: 0.010674, Prediction: ihello
Step: 40, Cost: 0.003086, Prediction: ihello
Generate Parameters for RNN
- Train a sentence which consists of several words: "if you want me"
- With Python, it is easy to extract necessary values for RNN from input sentence.
import tensorflow as tf
import numpy as np
# Reproducibility
tf.set_random_seed(777)
# Training data
sentence = " if you want me"
uniqueChar = list(set(sentence))
idx4Char = {c: i for i, c in enumerate(uniqueChar)}
# Hyper parameters
# RNN hidden size
rnn_hidden_size = len(uniqueChar)
print("RNN hidden size: {0}".format(rnn_hidden_size))
# Number of labels
num_labels = len(idx4Char)
print("Number of Labels: {0}".format(num_labels))
# One sentence
batch_size = 1
# How many unrolling data will be trained in RNN
# For this example, all characters are trained simultaneously.
sequence_length = len(sentence) - 1
print("Sequence length: {0}".format(sequence_length))
# Learning rate
learning_rate = 0.001
# Input and output data
input_idx = [idx4Char[c] for c in sentence]
x_data = [input_idx[:-1]] # X data: " if you want yo"
y_data = [input_idx[1:]] # Y label: "if you want you"
# Placeholder for input and label
X = tf.placeholder(tf.int32, [None, sequence_length]) # X data
Y = tf.placeholder(tf.int32, [None, sequence_length]) # Y label
# One hot encoded data
X_one_hot = tf.one_hot(X, num_labels)
print("One hot encoded data for X: {0}".format(X_one_hot.shape))
cell = tf.contrib.rnn.BasicLSTMCell(
num_units=rnn_hidden_size, state_is_tuple=True)
initial_state = cell.zero_state(batch_size, tf.float32)
rnn_output, _states = tf.nn.dynamic_rnn(
cell, X_one_hot, initial_state=initial_state, dtype=tf.float32)
# Reshape rnn output data for FCN
output_for_fcn = tf.reshape(rnn_output, [-1, rnn_hidden_size])
print("Reshaped output data: {0}".format(output_for_fcn.shape))
# FCN
affine = tf.contrib.layers.fully_connected(
inputs=output_for_fcn, num_outputs=num_labels, activation_fn=None)
# Reshape affine for sequence_loss
output = tf.reshape(affine, [batch_size, sequence_length, num_labels])
# Weights for sequence_loss
weights = tf.ones([batch_size, sequence_length])
# Cost function for RNN from Tensorflow
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=output, targets=Y, weights=weights)
loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# Prediction
prediction = tf.argmax(output, axis=2)
# Train
with tf.Session() as sess:
# Initialize RNN
sess.run(tf.global_variables_initializer())
for i in range(1000):
cost, _op = sess.run([loss, train], feed_dict={X: x_data, Y: y_data})
result = sess.run(prediction, feed_dict={X: x_data})
# Print char using dic
if i % 100 == 0:
result_str = [uniqueChar[c] for c in np.squeeze(result)]
print("Step: {0:5d}, Cost: {1:.6f}, Prediction: {2}".format(
i, cost, ''.join(result_str)))
RNN hidden size: 12
Number of Labels: 12
Sequence length: 14
One hot encoded data for X: (?, 14, 12)
Reshaped output data: (14, 12)
Step: 0, Cost: 2.482816, Prediction: auaawwwwaawwww
Step: 100, Cost: 2.117091, Prediction: n
Step: 200, Cost: 1.438025, Prediction: yo y want me
Step: 300, Cost: 0.947428, Prediction: if yo want me
Step: 400, Cost: 0.606214, Prediction: if you want me
Step: 500, Cost: 0.363260, Prediction: if you want me
Step: 600, Cost: 0.221725, Prediction: if you want me
Step: 700, Cost: 0.140786, Prediction: if you want me
Step: 800, Cost: 0.097087, Prediction: if you want me
Step: 900, Cost: 0.071458, Prediction: if you want me
Long Data Set Training with Batch
- Train a sentence which consists of long sentence: "if you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea."
- For long data set, it is possible to split them into several pieces and train them as batched data.
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
# Reproducibility
tf.set_random_seed(777)
# Training datga
sentence = (" if you want to build a ship, don't drum up people together to "
"collect wood and don't assign them tasks and work, but rather "
"teach them to long for the endless immensity of the sea.")
uniqueChar = list(set(sentence))
idx4Char = {w: i for i, w in enumerate(uniqueChar)}
# Hyper parameters
# Data dimension
data_dim = len(uniqueChar)
# Hidden size
hidden_size = len(uniqueChar)
# Number of labels
num_labels = len(uniqueChar)
# Sequence length
sequence_length = 10
# Learning rate
learning_rate = 0.01
# Number of cells
num_cells = 7
# Split the sentence and make input and output data
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
x_str = sentence[i:i + sequence_length]
y_str = sentence[i + 1: i + sequence_length + 1]
# Data translated to index
x = [idx4Char[c] for c in x_str]
y = [idx4Char[c] for c in y_str]
# Store index data to input and output lists
dataX.append(x)
dataY.append(y)
# Batch size
batch_size = len(dataX)
# Placeholders for input and output
X = tf.placeholder(tf.int32, [None, sequence_length])
Y = tf.placeholder(tf.int32, [None, sequence_length])
# One hot encoded data
X_one_hot = tf.one_hot(X, num_labels)
# Make a LSTM cell with hidden_size
def lstm_cell():
cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True)
return cell
# Make multiple LSTM cells
multi_cells = rnn.MultiRNNCell([lstm_cell() for _ in range(num_cells)])
# Size of output = sequence_length * hidden size
# Size of state = hidden size
rnn_output, _states = tf.nn.dynamic_rnn(
multi_cells, X_one_hot, dtype=tf.float32)
# Reshape rnn output data for FCN
output_for_fc = tf.reshape(rnn_output, [-1, hidden_size])
# FCN
affine = tf.contrib.layers.fully_connected(
output_for_fc, num_labels, activation_fn=None)
# Reshape affine for sequence_loss
output = tf.reshape(affine, [batch_size, sequence_length, num_labels])
# The initial value of all weights is 1.
weights = tf.ones([batch_size, sequence_length])
# Cost function for RNN from TensorFlow
sequence_loss = tf.contrib.seq2seq.sequence_loss(
logits=output, targets=Y, weights=weights)
# average of cost
mean_loss = tf.reduce_mean(sequence_loss)
# Optimizer: Adam
train_op = tf.train.AdamOptimizer(
learning_rate=learning_rate).minimize(mean_loss)
def printResult(_step, _cost, _results):
print("Step: {0:4d}, Cost: {1:.6f}, Sentence: ".format(_step, _cost), end='')
for j, result in enumerate(_results):
index = np.argmax(result, axis=1)
# Print all for the first result
if j is 0:
print(''.join([uniqueChar[t] for t in index]), end='')
else:
print(uniqueChar[index[-1]], end='')
print("")
with tf.Session() as sess:
# Initialize RNN
sess.run(tf.global_variables_initializer())
max = 1000
cost = 0
# Train
for i in range(max):
_op, cost, results = sess.run(
[train_op, mean_loss, output], feed_dict={X: dataX, Y: dataY})
if i % 100 == 0:
#print("Step: {0:4d}, Cost: {1:.6f}".format(i, cost))
printResult(i, cost, results)
# Let's print the last char of each result to check it works
results = sess.run(output, feed_dict={X: dataX})
printResult(max, cost, results)
Step: 0, Cost: 3.218867, Sentence: n,,,,,,nnnnnnnn,,npnnnneeeieeeii..fi nnnnnppiiiiiii nn nnnnnfooonnnnnnnnsnnnnnnnneppnnnnnhphii.ckfnnoonpppiiepppnn,nnwe,eepnpbnoomnnnnbffnoonnnnnnnnnnnnnnnoo nrse.c.iiimmff.nnnnn
Step: 100, Cost: 2.839131, Sentence:
Step: 200, Cost: 2.398796, Sentence: tt t tot t t tot t d dot t d to t t to t t t t t t t t t t t d to t toti t t t ti t d ti tot t t t t t t t t t t tot t toti to t tot
Step: 300, Cost: 1.294974, Sentence: tt dododont doe'uild a thipi don't douc dp deodle ths them toe'odlect dood aod don't ausign them tosks and dorkt aut aother thech them toetoep dor the thdless imm ntiiy ap aoe teal
Step: 400, Cost: 0.691762, Sentence: tm fouowant do build a ship, don't d um up people together to collect wood and don't assign them tosks and work, but rather thach them to bong for the tndless immensity of the sea.
Step: 500, Cost: 0.453372, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 600, Cost: 0.362509, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 700, Cost: 0.318497, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 800, Cost: 0.293086, Sentence: tm you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 900, Cost: 0.277740, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
Step: 1000, Cost: 0.268900, Sentence: tf you want to build a ship, don't drum up people together to collect wood and don't assign them tasks and work, but rather teach them to long for the endless immensity of the sea.
- It is difficult to train the first character, "i". However, others are the same as the training sentence.
COMMENTS