import tensorflow as tf
print(tf.__version__)

Sequence modelling¶

Coding tutorials¶

1. The IMDb dataset ¶

2. Padding and masking sequence data ¶

3. The Embedding layer ¶

4. The Embedding Projector ¶

5. Recurrent neural network layers ¶

6. Stacked RNNs and the Bidirectional wrapper ¶

The IMDb Dataset¶

Load the IMDB review sentiment dataset¶

# Import imdb

# Download and assign the data set using load_data()

Inspect the dataset¶

# Inspect the type of the data

# Inspect the shape of the data

# Display the first dataset element input
# Notice encoding

# Display the first dataset element output

Load dataset with different options¶

# Load the dataset with defaults


# ~/.keras/dataset/

# Limit the vocabulary to the top 500 words using num_words

# Ignore the top 10 most frequent words using skip_top

# Limit the sequence lengths to 500 using maxlen

# Use '1' as the character that indicates the start of a sequence

Explore the dataset word index¶

# Load the imdb word index using get_word_index()

# View the word index as a dictionary,
# accounting for index_from.

# Retrieve a specific word's index

# View an input sentence

# Get the sentiment value

Padding and Masking Sequence Data¶

# Load the imdb data set

Preprocess the data with padding¶

# Inspect the input data shape

# Pad the inputs to the maximum length using maxlen

# Inspect the output data shape

Create a Masking layer¶

# Import numpy

# Masking expects to see (batch, sequence, features)
# Create a dummy feature dimension using expand_dims

# Create a Masking layer

# Pass tf_x_train to it

# Look at the dataset

# Look at the ._keras_mask for the dataset

The Embedding layer¶

Create and apply an `Embedding` layer¶

# Create an embedding layer using layers.Embedding
# Specify input_dim, output_dim, input_length

# Inspect an Embedding layer output for a fixed input
# Expects an input of shape (batch, sequence, feature)

# Inspect the Embedding layer weights using get_weights()

# Get the embedding for the 14th index

Create and apply an `Embedding` layer that uses `mask_zero=True`¶

# Create a layer that uses the mask_zero kwarg

# Apply this layer to the sequence and see the _keras_mask property

The Embedding Projector¶

Load and preprocess the IMDb data¶

# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

# Load the dataset

# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

# Get the word index

# Swap the keys and values of the word index

# View the first dataset example sentence

Build an Embedding layer into a model¶

# Get the maximum token value

# Specify an embedding dimension

# Build a model using Sequential:
#     1. Embedding layer
#     2. GlobalAveragePooling1D
#     3. Dense

# Functional API refresher: use the Model to build the same model

model.summary()

Compile, train, and evaluate the model¶

# Compile the model with a binary cross-entropy loss

# Train the model using .fit(), savng its history

# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

The TensorFlow embedding projector¶

The Tensorflow embedding projector can be found here.

# Retrieve the embedding layer's weights from the trained model

# Save the word Embeddings to tsv files
# Two files: 
#     one contains the embedding labels (meta.tsv),
#     one contains the embeddings (vecs.tsv)

import io
from os import path

out_v = io.open(path.join('data', 'vecs.tsv'), 'w', encoding='utf-8')
out_m = io.open(path.join('data', 'meta.tsv'), 'w', encoding='utf-8')

k = 0

for word, token in word_index.items():
    if k != 0:
        out_m.write('\n')
        out_v.write('\n')
    
    out_v.write('\t'.join([str(x) for x in weights[token]]))
    out_m.write(word)
    k += 1
    
out_v.close()
out_m.close()
# beware large collections of embeddings!

Recurrent neural network layers¶

Initialize and pass an input to a SimpleRNN layer¶

# Create a SimpleRNN layer and test it

# Note that only the final cell output is returned

Load and transform the IMDB review sentiment dataset¶

# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

# Load the dataset

# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

# Get the word index using get_imdb_word_index()

Create a recurrent neural network model¶

# Get the maximum index value

# Using Sequential, build the model:
# 1. Embedding.
# 2. LSTM.
# 3. Dense.

Compile and fit the model¶

# Compile the model with binary cross-entropy loss

# Fit the model and save its training history

Plot learning curves¶

# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

Make predictions with the model¶

# View the first test data example sentence
# (invert the word index)

# Get the model prediction using model.predict()

# Get the corresponding label

Stacked RNNs and the Bidirectional wrapper¶

Load and transform the IMDB review sentiment dataset¶

# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

# Load the dataset

# A function to get the dataset word index

def get_imdb_word_index(num_words=10000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

# Get the word index using get_imdb_word_index()

Build stacked and bidirectional recurrent models¶

# Get the maximum index value and specify an embedding dimension

# Using Sequential, build a stacked LSTM model via return_sequences=True

# Using Sequential, build a bidirectional RNN with merge_mode='sum'

# Create a model featuring both stacked recurrent layers and a bidirectional layer

Compile and fit the model¶

# Compile the model

# Train the model, saving its history

# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

sunsky

Sunsky

Sequence Modelling

目录

Sequence modelling¶

Coding tutorials¶

1. The IMDb dataset ¶

2. Padding and masking sequence data ¶

3. The Embedding layer ¶

4. The Embedding Projector ¶

5. Recurrent neural network layers ¶

6. Stacked RNNs and the Bidirectional wrapper ¶

The IMDb Dataset¶

Load the IMDB review sentiment dataset¶

Inspect the dataset¶

Load dataset with different options¶

Explore the dataset word index¶

Padding and Masking Sequence Data¶

Preprocess the data with padding¶

Create a Masking layer¶

The Embedding layer¶

Create and apply an `Embedding` layer¶

Create and apply an `Embedding` layer that uses `mask_zero=True`¶

The Embedding Projector¶

Load and preprocess the IMDb data¶

Build an Embedding layer into a model¶

Compile, train, and evaluate the model¶

The TensorFlow embedding projector¶

Recurrent neural network layers¶

Initialize and pass an input to a SimpleRNN layer¶

Load and transform the IMDB review sentiment dataset¶

Create a recurrent neural network model¶

Compile and fit the model¶

Plot learning curves¶

Make predictions with the model¶

Stacked RNNs and the Bidirectional wrapper¶

Load and transform the IMDB review sentiment dataset¶

Build stacked and bidirectional recurrent models¶

Compile and fit the model¶

目录

Sequence modelling¶

Coding tutorials¶

1. The IMDb dataset¶

2. Padding and masking sequence data¶

3. The Embedding layer¶

4. The Embedding Projector¶

5. Recurrent neural network layers¶

6. Stacked RNNs and the Bidirectional wrapper¶

The IMDb Dataset¶

Load the IMDB review sentiment dataset¶

Inspect the dataset¶

Load dataset with different options¶

Explore the dataset word index¶

Padding and Masking Sequence Data¶

Preprocess the data with padding¶

Create a Masking layer¶

The Embedding layer¶

Create and apply an Embedding layer¶

Create and apply an Embedding layer that uses mask_zero=True¶

The Embedding Projector¶

Load and preprocess the IMDb data¶

Build an Embedding layer into a model¶

Compile, train, and evaluate the model¶

The TensorFlow embedding projector¶

Recurrent neural network layers¶

Initialize and pass an input to a SimpleRNN layer¶

Load and transform the IMDB review sentiment dataset¶

Create a recurrent neural network model¶

Compile and fit the model¶

Plot learning curves¶

Make predictions with the model¶

Stacked RNNs and the Bidirectional wrapper¶

Load and transform the IMDB review sentiment dataset¶

Build stacked and bidirectional recurrent models¶

Compile and fit the model¶

1. The IMDb dataset ¶

2. Padding and masking sequence data ¶

3. The Embedding layer ¶

4. The Embedding Projector ¶

5. Recurrent neural network layers ¶

6. Stacked RNNs and the Bidirectional wrapper ¶

Create and apply an `Embedding` layer¶

Create and apply an `Embedding` layer that uses `mask_zero=True`¶