import tensorflow as tf
print(tf.__version__)

2.0.0

Data Pipeline¶

Coding tutorials¶

1. Keras datasets ¶

2. Dataset generators ¶

3. Keras image data augmentation ¶

4. The Dataset class ¶

5. Training with Datasets ¶

Keras datasets¶

For a list of Keras datasets and documentation on recommended usage, see this link.

import numpy as np
import matplotlib.pyplot as plt

Load the CIFAR-100 Dataset¶

from tensorflow.keras.datasets import cifar100

# Load the CIFAR-100 dataset

(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode="fine")

# Confirm that reloading the dataset does not require a download
(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode="fine")

Examine the Dataset¶

# Examine the shape of the data.
print(train_images.shape)
print(train_labels.shape)

(50000, 32, 32, 3)
(50000, 1)

# Examine one of the images and its corresponding label
plt.imshow(train_images[500])
print(train_labels[500])

[41]

# Load the list of labels from a JSON file

import json

with open('data/cifar100_fine_labels.json', 'r') as fine_labels:
    cifar100_fine_labels = json.load(fine_labels)

The list of labels for the CIFAR-100 dataset are available here.

# Print a few of the labels
print(cifar100_fine_labels[:10])

['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle']

# Print the corresponding label for the example above

cifar100_fine_labels[41]

'lawn_mower'

Load the data using different label modes¶

# Display a few examples from category 87 (index 86) and the list of labels

examples = train_images[(train_labels.T == 86)[0]][:3]
fig, ax = plt.subplots(1,3)
ax[0].imshow(examples[0])
ax[1].imshow(examples[1])
ax[2].imshow(examples[2])

<matplotlib.image.AxesImage at 0x7fb3826254a8>

cifar100_fine_labels[86]

'telephone'

# Reload the data using the 'coarse' label mode

(train_images, train_labels), (test_images, test_labels) = cifar100.load_data(label_mode="coarse")

# Display three images from the dataset with the label 6 (index 5)

examples = train_images[(train_labels.T == 5)[0]][:3]
fig, ax = plt.subplots(1,3)
ax[0].imshow(examples[0])
ax[1].imshow(examples[1])
ax[2].imshow(examples[2])

<matplotlib.image.AxesImage at 0x7fb3826b2b00>

# Load the list of coarse labels from a JSON file

with open('data/cifar100_coarse_labels.json', 'r') as coarse_labels:
    cifar100_coarse_labels = json.load(coarse_labels)

# Print a few of the labels
print(cifar100_coarse_labels[:10])

['aquatic mammals', 'fish', 'flowers', 'food containers', 'fruit and vegetables', 'household electrical device', 'household furniture', 'insects', 'large carnivores', 'large man-made outdoor things']

# Print the corresponding label for the example above
print(cifar100_coarse_labels[5])

household electrical device

Load the IMDB Dataset¶

from tensorflow.keras.datasets import imdb

# Load the IMDB dataset

(train_data, train_labels,), (test_data, test_labels) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
17465344/17464789 [==============================] - 6s 0us/step

# Print an example from the training dataset, along with its corresponding label

print(train_data[0])
print(train_labels[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 22665, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 21631, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 31050, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
1

# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

# Determine the maximum and minimum sequence length
print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

2494
11

Using Keyword Arguments¶

# Load the data ignoring the 50 most frequent words, use oov_char=2 (this is the default)
(train_data, train_labels,), (test_data, test_labels) = imdb.load_data(skip_top=50, oov_char=2)

# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

# Determine the maximum and minimum sequence length
print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

1648
3

# Define functions for filtering the sequences

def remove_oov_char(element):
    ''' Filter function for removing the oov_char. '''
    return [word for word in element if word!=2]

def filter_list(lst):
    ''' Run remove_oov_char on elements in a list. '''
    return [remove_oov_char(element) for element in lst]

# Remove the oov_char from the sequences using the filter_list function

train_data = filter_list(train_data)

# Get the lengths of the input sequences

sequence_lengths = [len(seq) for seq in train_data]

# Determine the maximum and minimum sequence length

print(np.max(sequence_lengths))
print(np.min(sequence_lengths))

1648
3

Dataset generators¶

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Load the UCI Fertility Dataset¶

We will be using a dataset available at https://archive.ics.uci.edu/ml/datasets/Fertility from UC Irvine.

# Load the fertility dataset

headers = ['Season', 'Age', 'Diseases', 'Trauma', 'Surgery', 'Fever', 'Alcohol', 'Smoking', 'Sitting', 'Output']
fertility = pd.read_csv('data/fertility_diagnosis.txt', delimiter=',', header=None, names=headers)

# Print the shape of the DataFrame

print(fertility.shape)

(100, 10)

# Show the head of the DataFrame
fertility.head()

Process the data¶

# Map the 'Output' feature from 'N' to 0 and from 'O' to 1

fertility['Output'] = fertility['Output'].map(lambda x : 0.0 if x=='N' else 1.0)

# Show the head of the DataFrame

fertility.head()

# Convert the DataFrame so that the features are mapped to floats

fertility = fertility.astype('float32')

# Shuffle the DataFrame

fertility = fertility.sample(frac=1).reset_index(drop=True)

# Show the head of the DataFrame

fertility.head()

# Convert the field Season to a one-hot encoded vector

fertility = pd.get_dummies(fertility, prefix='Season', columns=['Season'])

# Show the head of the DataFrame

fertility.head()

# Move the Output column such that it is the last column in the DataFrame

fertility.columns = [col for col in fertility.columns if col != 'Output'] + ['Output']

# Show the head of the DataFrame

fertility.head()

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-99-01ea85e37fe1> in <module>
      1 # Show the head of the DataFrame
      2 
----> 3 fertility.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'

# Convert the DataFrame to a numpy array.

fertility = fertility.to_numpy()

Split the Data¶

# Split the dataset into training and validation set

training = fertility[0:70]
validation = fertility[70:100]

# Verify the shape of the training data

training.shape

(70, 13)

# Separate the features and labels for the validation and training data

training_features = training[:,0:-1]
training_labels = training[:,-1]
validation_features = validation[:,0:-1]
validation_labels = validation[:,-1]

Create the Generator¶

# Create a function that returns a generator producing inputs and labels

def get_generator(features, labels, batch_size=1):
    for n in range(int(len(features)/batch_size)):
        yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])

# Apply the function to our training features and labels with a batch size of 10

train_generator = get_generator(training_features, training_labels, batch_size=10)

# Test the generator using the next() function
next(train_generator)

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-177-ffc102bd325c> in <module>
      1 # Test the generator using the next() function
----> 2 next(train_generator)

StopIteration:

Build the model¶

# Create a model using Keras with 3 layers

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Input, BatchNormalization

input_shape = (12,)
output_shape = (1,)

model_input = Input(input_shape)
batch_1 = BatchNormalization(momentum=0.8)(model_input)
dense_1 = Dense(100, activation='relu')(batch_1)
batch_2 = BatchNormalization(momentum=0.8)(dense_1)
output = Dense(1, activation='sigmoid')(batch_2)

model = Model([model_input], output)

# Display the model summary to show the resultant structure

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 12)]              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 12)                48        
_________________________________________________________________
dense (Dense)                (None, 100)               1300      
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
=================================================================
Total params: 1,849
Trainable params: 1,625
Non-trainable params: 224
_________________________________________________________________

Compile the model¶

# Create the optimizer object

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)

# Compile the model with loss function and metric

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

Train and evaluate the model using the generator¶

# Calculate the number of training steps per epoch for the given batch size.

batch_size = 5
train_steps = len(training) // batch_size

# Set the epochs to 3

epochs = 3

# Train the model
for epoch in range(epochs):
    train_generator = get_generator(training_features, training_labels, batch_size=batch_size)
    validation_generator = get_generator(validation_features, validation_labels, batch_size=30)
    model.fit_generator(train_generator, steps_per_epoch=train_steps, 
                         validation_data = validation_generator,
                         validation_steps=1)

14/14 [==============================] - 0s 33ms/step - loss: 0.0033 - accuracy: 1.0000 - val_loss: 0.0133 - val_accuracy: 1.0000
14/14 [==============================] - 0s 31ms/step - loss: 0.0029 - accuracy: 1.0000 - val_loss: 0.0129 - val_accuracy: 1.0000
14/14 [==============================] - 0s 30ms/step - loss: 0.0026 - accuracy: 1.0000 - val_loss: 0.0126 - val_accuracy: 1.0000

# Try to run the fit_generator function once more; observe what happens

model.fit_generator(train_generator, steps_per_epoch=train_steps)

WARNING:tensorflow:Your dataset iterator ran out of data; interrupting training. Make sure that your iterator can generate at least `steps_per_epoch * epochs` batches (in this case, 14 batches). You may need touse the repeat() function when building your dataset.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-137-6009e73e3e9e> in <module>
      1 # Try to run the fit_generator function once more; observe what happens
      2 
----> 3 model.fit_generator(train_generator, steps_per_epoch=train_steps)

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
   1295         shuffle=shuffle,
   1296         initial_epoch=initial_epoch,
-> 1297         steps_name='steps_per_epoch')
   1298 
   1299   def evaluate_generator(self,

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
    299         break
    300 
--> 301     aggregator.finalize()
    302     results = aggregator.results
    303     epoch_logs = cbks.make_logs(model, epoch_logs, results, mode)

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py in finalize(self)
    138   def finalize(self):
    139     if not self.results:
--> 140       raise ValueError('Empty training data.')
    141     self.results[0] /= (self.num_samples or self.steps)
    142 

ValueError: Empty training data.

Make an infinitely looping generator¶

# Create a function that returns an infinitely looping generator

def get_generator_cyclic(features, labels, batch_size=1):
    while True:
        for n in range(int(len(features)/batch_size)):
            yield (features[n*batch_size: (n+1)*batch_size], labels[n*batch_size: (n+1)*batch_size])
        permuted = np.random.permutation(len(features))
        features = features[permuted]
        labels = labels[permuted]

# Create a generator using this function.

train_generator_cyclic = get_generator_cyclic(training_features, training_labels, batch_size=batch_size)

# Assert that the new cyclic generator does not raise a StopIteration

for i in range(2*train_steps):
    next(train_generator_cyclic)

# Generate a cyclic validation generator

validation_generator_cyclic = get_generator_cyclic(validation_features, validation_labels, batch_size=batch_size)

# Train the model

model.fit_generator(train_generator_cyclic, steps_per_epoch=train_steps,
                    validation_data= validation_generator_cyclic, validation_steps=1, epochs=3, verbose=1)

Epoch 1/3
14/14 [==============================] - 0s 31ms/step - loss: 0.0979 - accuracy: 0.9571 - val_loss: 3.2068e-06 - val_accuracy: 1.0000
Epoch 2/3
14/14 [==============================] - 0s 28ms/step - loss: 0.0849 - accuracy: 0.9857 - val_loss: 5.2992e-05 - val_accuracy: 1.0000
Epoch 3/3
14/14 [==============================] - 0s 28ms/step - loss: 0.0051 - accuracy: 1.0000 - val_loss: 9.0131e-05 - val_accuracy: 1.0000

<tensorflow.python.keras.callbacks.History at 0x7fb36bca3240>

Evaluate the model and get predictions¶

# Let's obtain a validation data generator.

validation_generator = get_generator(validation_features, validation_labels, batch_size=30)

# Get predictions on the validation data

prediction = model.predict_generator(validation_generator, steps=1)
print(np.round(prediction.T[0]))

[0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1.
 1. 0. 1. 0. 0. 0.]

# Print the corresponding validation labels

print(validation_labels)

[0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1.
 1. 0. 1. 0. 0. 0.]

# Obtain a validation data generator

validation_generator = get_generator_cyclic(validation_features, validation_labels, batch_size=30)

# Evaluate the model

model.evaluate_generator(validation_generator, steps=1)

[0.00015975440328475088, 1.0]

Keras image data augmentation¶

import matplotlib.pyplot as plt
import numpy as np

Load the CIFAR-10 Dataset¶

from tensorflow.keras.datasets import cifar10

# Load the CIFAR-10 dataset

(training_features, training_labels), (test_features, test_labels) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
170500096/170498071 [==============================] - 3s 0us/step

# Convert the labels to a one-hot encoding

num_classes = 10

training_labels = tf.keras.utils.to_categorical(training_labels, num_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes)

Create a generator function¶

# Create a function that returns a data generator

def get_generator(features, labels, batch_size=1):
    for n in range(int(len(features)/batch_size)):
        yield (features[n*batch_size:(n+1)*batch_size], labels[n*batch_size:(n+1)*batch_size])

# Use the function we created to get a training data generator with a batch size of 1

training_generator = get_generator(training_features, training_labels)

# Assess the shape of the items generated by training_generator using the `next` function to yield an item.

image, label = next(training_generator)
print(image.shape)
print(label.shape)

(1, 32, 32, 3)
(1, 10)

# Test the training generator by obtaining an image using the `next` generator function, and then using imshow to plot it.
# Print the corresponding label

from matplotlib.pyplot import imshow

image, label = next(training_generator)
image_unbatched = image[0,:,:,:]
imshow(image_unbatched)
print(label)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]

# Reset the generator by re-running the `get_generator` function.

train_generator = get_generator(training_features, training_labels)

Create a data augmention generator¶

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Create a function to convert an image to monochrome

def monochrome(x):
    def func_bw(a):
        average_colour = np.mean(a)
        return [average_colour, average_colour, average_colour]
    x = np.apply_along_axis(func_bw, -1, x)
    return x

# Create an ImageDataGenerator object

image_generator = ImageDataGenerator(preprocessing_function=monochrome,
                                    rotation_range=180, 
                                    rescale=(1/255.0))
image_generator.fit(training_features)

Check the documentation for the full list of image data augmentation options.

# Create an iterable generator using the `flow` function

image_generator_iterable = image_generator.flow(training_features, training_labels, batch_size=1, shuffle=False)

# Show a sample from the generator and compare with the original

image, label = next(image_generator_iterable)
image_orig, label_orig = next(train_generator)
figs, axes = plt.subplots(1,2)
axes[0].imshow(image[0,:,:,:])
axes[0].set_title('Transformed')
axes[1].imshow(image_orig[0,:,:,:])
axes[1].set_title('Original')
plt.show()

Flow from directory¶

# Inspect the directory structure

train_path = 'data/flowers-recognition-split/train'
val_path = 'data/flowers-recognition-split/val'

# Create an ImageDataGenerator object

datagenerator = ImageDataGenerator(rescale=(1/255.0))

classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

# Create a training data generator
train_generator = datagenerator.flow_from_directory(train_path, batch_size=64, classes=classes, target_size=(16,16))

Found 3027 images belonging to 5 classes.

# Create a validation data generator

val_generator = datagenerator.flow_from_directory(val_path, batch_size=64, classes=classes, target_size=(16,16))

Found 1296 images belonging to 5 classes.

# Get and display an image and label from the training generator

x = next(train_generator)
imshow(x[0][4])
print(x[1][4])

[0. 1. 0. 0. 0.]

# Reset the training generator

train_generator = datagenerator.flow_from_directory(train_path, batch_size=64, classes=classes, target_size=(16,16))

Found 3027 images belonging to 5 classes.

Create a model to train¶

# Build a CNN model

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense

model = tf.keras.Sequential()
model.add(Input((16,16,3)))
model.add(Conv2D(8, (8, 8), padding='same', activation='relu'))
model.add(MaxPooling2D((4,4)))
model.add(Conv2D(8, (8, 8), padding='same', activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(4, (4, 4), padding='same', activation='relu'))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(5, activation='softmax'))

# Create an optimizer object

optimizer = tf.keras.optimizers.Adam(1e-3)

# Compile the model

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 16, 16, 8)         1544      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 4, 4, 8)           0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 4, 4, 8)           4104      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 2, 2, 8)           0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 2, 2, 4)           516       
_________________________________________________________________
flatten (Flatten)            (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 45        
=================================================================
Total params: 6,617
Trainable params: 6,617
Non-trainable params: 0
_________________________________________________________________

Train the model¶

# Calculate the training generator and test generator steps per epoch

train_steps_per_epoch = train_generator.n // train_generator.batch_size
val_steps = val_generator.n // val_generator.batch_size
print(train_steps_per_epoch, val_steps)

47 20

# Fit the model

model.fit_generator(train_generator, steps_per_epoch=train_steps_per_epoch, epochs=5)

Epoch 1/5
47/47 [==============================] - 44s 943ms/step - loss: 1.6033 - accuracy: 0.2616
Epoch 2/5
47/47 [==============================] - 28s 598ms/step - loss: 1.5292 - accuracy: 0.3032
Epoch 3/5
47/47 [==============================] - 28s 585ms/step - loss: 1.4261 - accuracy: 0.3317
Epoch 4/5
47/47 [==============================] - 28s 594ms/step - loss: 1.3963 - accuracy: 0.3861
Epoch 5/5
47/47 [==============================] - 28s 585ms/step - loss: 1.3308 - accuracy: 0.4256

<tensorflow.python.keras.callbacks.History at 0x7fb36e985048>

Evaluate the model¶

# Evaluate the model
model.evaluate_generator(val_generator, steps=val_steps)

[1.3088390469551086, 0.44296876]

Predict using the generator¶

# Predict labels with the model
predictions = model.predict_generator(val_generator, steps=1)
print(np.round(predictions, 2))

[[0.1  0.08 0.41 0.21 0.2 ]
 [0.23 0.13 0.18 0.1  0.36]
 [0.1  0.29 0.11 0.45 0.05]
 [0.04 0.01 0.51 0.02 0.42]
 [0.26 0.25 0.12 0.15 0.23]
 [0.27 0.27 0.1  0.16 0.2 ]
 [0.26 0.36 0.05 0.23 0.1 ]
 [0.02 0.   0.52 0.   0.46]
 [0.16 0.07 0.28 0.08 0.4 ]
 [0.23 0.32 0.08 0.24 0.12]
 [0.06 0.31 0.05 0.56 0.02]
 [0.09 0.34 0.03 0.53 0.01]
 [0.25 0.26 0.11 0.19 0.19]
 [0.13 0.35 0.05 0.44 0.03]
 [0.14 0.34 0.07 0.4  0.05]
 [0.08 0.33 0.04 0.54 0.02]
 [0.21 0.16 0.19 0.16 0.28]
 [0.2  0.08 0.21 0.07 0.45]
 [0.11 0.26 0.12 0.45 0.06]
 [0.29 0.27 0.09 0.15 0.21]
 [0.13 0.06 0.34 0.08 0.38]
 [0.22 0.32 0.1  0.23 0.13]
 [0.16 0.32 0.09 0.37 0.07]
 [0.26 0.32 0.08 0.19 0.15]
 [0.22 0.34 0.08 0.26 0.1 ]
 [0.15 0.34 0.09 0.35 0.06]
 [0.16 0.35 0.08 0.37 0.06]
 [0.12 0.35 0.05 0.45 0.03]
 [0.28 0.25 0.1  0.14 0.23]
 [0.08 0.31 0.07 0.52 0.02]
 [0.3  0.22 0.09 0.12 0.26]
 [0.19 0.28 0.13 0.28 0.13]
 [0.3  0.28 0.07 0.15 0.2 ]
 [0.02 0.   0.57 0.01 0.4 ]
 [0.11 0.03 0.32 0.04 0.51]
 [0.27 0.19 0.13 0.14 0.27]
 [0.11 0.03 0.32 0.04 0.49]
 [0.26 0.36 0.06 0.22 0.11]
 [0.03 0.   0.44 0.   0.53]
 [0.01 0.   0.49 0.   0.5 ]
 [0.12 0.34 0.06 0.45 0.03]
 [0.02 0.   0.6  0.   0.38]
 [0.23 0.1  0.17 0.07 0.42]
 [0.13 0.19 0.24 0.31 0.13]
 [0.05 0.01 0.41 0.01 0.52]
 [0.22 0.11 0.19 0.08 0.39]
 [0.25 0.33 0.08 0.21 0.13]
 [0.19 0.36 0.06 0.34 0.05]
 [0.23 0.3  0.11 0.21 0.16]
 [0.25 0.27 0.11 0.18 0.19]
 [0.25 0.33 0.08 0.2  0.14]
 [0.09 0.33 0.05 0.5  0.02]
 [0.16 0.04 0.22 0.04 0.55]
 [0.2  0.1  0.22 0.08 0.4 ]
 [0.24 0.23 0.14 0.17 0.23]
 [0.28 0.19 0.12 0.12 0.29]
 [0.02 0.   0.58 0.   0.4 ]
 [0.02 0.   0.5  0.   0.47]
 [0.27 0.32 0.07 0.19 0.15]
 [0.08 0.02 0.39 0.03 0.48]
 [0.02 0.   0.38 0.   0.6 ]
 [0.28 0.31 0.07 0.17 0.17]
 [0.   0.   0.7  0.   0.3 ]
 [0.17 0.06 0.25 0.06 0.47]]

The Dataset Class¶

import matplotlib.pyplot as plt
import numpy as np
import os

Create a simple dataset¶

x = np.zeros((100,10,2,2))

# Create a dataset from the tensor x

dataset1 = tf.data.Dataset.from_tensor_slices(x)

# Inspect the Dataset object

print(dataset1)
print(dataset1.element_spec)

<TensorSliceDataset shapes: (10, 2, 2), types: tf.float64>
TensorSpec(shape=(10, 2, 2), dtype=tf.float64, name=None)

x2 = [np.zeros((10,2,2)), np.zeros((5,2,2))]

# Try creating a dataset from the tensor x2

dataset2 = tf.data.Dataset.from_tensor_slices(x2)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-e4370cf142de> in <module>
      1 # Try creating a dataset from the tensor x2
      2 
----> 3 dataset2 = tf.data.Dataset.from_tensor_slices(x2)

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in from_tensor_slices(tensors)
    433       Dataset: A `Dataset`.
    434     """
--> 435     return TensorSliceDataset(tensors)
    436 
    437   class _GeneratorState(object):

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py in __init__(self, element)
   2352   def __init__(self, element):
   2353     """See `Dataset.from_tensor_slices()` for details."""
-> 2354     element = structure.normalize_element(element)
   2355     batched_spec = structure.type_spec_from_value(element)
   2356     self._tensors = structure.to_batched_tensor_list(batched_spec, element)

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/data/util/structure.py in normalize_element(element)
    109         else:
    110           normalized_components.append(
--> 111               ops.convert_to_tensor(t, name="component_%d" % i))
    112   return nest.pack_sequence_as(element, normalized_components)
    113 

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in convert_to_tensor(value, dtype, name, preferred_dtype, dtype_hint)
   1182   preferred_dtype = deprecation.deprecated_argument_lookup(
   1183       "dtype_hint", dtype_hint, "preferred_dtype", preferred_dtype)
-> 1184   return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
   1185 
   1186 

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
   1240       name=name,
   1241       preferred_dtype=dtype_hint,
-> 1242       as_ref=False)
   1243 
   1244 

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accept_composite_tensors)
   1294 
   1295     if ret is None:
-> 1296       ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
   1297 
   1298     if ret is NotImplemented:

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in _constant_tensor_conversion_function(v, dtype, name, as_ref)
    284                                          as_ref=False):
    285   _ = as_ref
--> 286   return constant(v, dtype=dtype, name=name)
    287 
    288 

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in constant(value, dtype, shape, name)
    225   """
    226   return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 227                         allow_broadcast=True)
    228 
    229 

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
    233   ctx = context.context()
    234   if ctx.executing_eagerly():
--> 235     t = convert_to_eager_tensor(value, ctx, dtype)
    236     if shape is None:
    237       return t

/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
     94       dtype = dtypes.as_dtype(dtype).as_datatype_enum
     95   ctx.ensure_initialized()
---> 96   return ops.EagerTensor(value, ctx.device_name, dtype)
     97 
     98 

ValueError: Can't convert non-rectangular Python sequence to Tensor.

x2 = [np.zeros((10,1)), np.zeros((10,1)), np.zeros((10,1))]

# Create another dataset from the new x2 and inspect the Dataset object

dataset2 = tf.data.Dataset.from_tensor_slices(x2)

# Print the element_spec

print(dataset2.element_spec)

TensorSpec(shape=(10, 1), dtype=tf.float64, name=None)

Create a zipped dataset¶

# Combine the two datasets into one larger dataset

dataset_zipped = tf.data.Dataset.zip((dataset1, dataset2))

# Print the element_spec

print(dataset_zipped.element_spec)

(TensorSpec(shape=(10, 2, 2), dtype=tf.float64, name=None), TensorSpec(shape=(10, 1), dtype=tf.float64, name=None))

# Define a function to find the number of batches in a dataset

def get_batches(dataset):
    iter_dataset = iter(dataset)
    i = 0
    try:
        while next(iter_dataset):
            i = i+1
    except:
        return i

# Find the number of batches in the zipped Dataset

get_batches(dataset_zipped)

3

Create a dataset from numpy arrays¶

# Load the MNIST dataset

(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.mnist.load_data()

print(type(train_features), type(train_labels))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>

# Create a Dataset from the MNIST data

mnist_dataset = tf.data.Dataset.from_tensor_slices((train_features, train_labels))

# Inspect the Dataset object

print(mnist_dataset.element_spec)

(TensorSpec(shape=(28, 28), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.uint8, name=None))

# Inspect the length of an element using the take method

element = next(iter(mnist_dataset.take(1)))
len(element)

2

# Examine the shapes of the data

print(element[0].shape)
print(element[1].shape)

(28, 28)
()

Create a dataset from text data¶

# Print the list of text files

text_files = sorted([f.path for f in os.scandir('data/shakespeare')])

print(text_files)

['data/shakespeare/tempest.1.1.txt', 'data/shakespeare/tempest.1.2.txt', 'data/shakespeare/tempest.2.1.txt', 'data/shakespeare/tempest.2.2.txt', 'data/shakespeare/tempest.3.1.txt', 'data/shakespeare/tempest.3.2.txt', 'data/shakespeare/tempest.3.3.txt', 'data/shakespeare/tempest.4.1.txt', 'data/shakespeare/tempest.5.1.txt']

# Load the first file using python and print the first 5 lines.

with open(text_files[0], 'r') as fil:
    contents = [fil.readline() for i in range(5)]
    for line in contents:
        print(line)

SCENE I. On a ship at sea: a tempestuous noise

of thunder and lightning heard.

Enter a Master and a Boatswain


Master

# Load the lines from the files into a dataset using TextLineDataset

shakespare_dataset = tf.data.TextLineDataset(text_files)

# Use the take method to get and print the first 5 lines of the dataset

first_5_lines_dataset = iter(shakespare_dataset.take(5))
lines = [line for line in first_5_lines_dataset]
for line in lines:
    print(line)

tf.Tensor(b'SCENE I. On a ship at sea: a tempestuous noise', shape=(), dtype=string)
tf.Tensor(b'of thunder and lightning heard.', shape=(), dtype=string)
tf.Tensor(b'Enter a Master and a Boatswain', shape=(), dtype=string)
tf.Tensor(b'', shape=(), dtype=string)
tf.Tensor(b'Master', shape=(), dtype=string)

# Compute the number of lines in the first file

lines = []
with open(text_files[0], 'r') as fil:
    line = fil.readline()
    while line:
        lines.append(line)
        line = fil.readline()
    print(len(lines))

121

# Compute the number of lines in the shakespeare dataset we created

shakespeare_dataset_iterator = iter(shakespare_dataset)
lines = [line for line in shakespeare_dataset_iterator]
print(len(lines))

3134

Interleave lines from the text data files¶

# Create a dataset of the text file strings

text_files_dataset = tf.data.Dataset.from_tensor_slices(text_files)
files = [file for file in text_files_dataset]
for file in files:
    print(file)

tf.Tensor(b'data/shakespeare/tempest.1.1.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.1.2.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.2.1.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.2.2.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.3.1.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.3.2.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.3.3.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.4.1.txt', shape=(), dtype=string)
tf.Tensor(b'data/shakespeare/tempest.5.1.txt', shape=(), dtype=string)

# Interleave the lines from the text files

interleaved_shakespeare_dataset = text_files_dataset.interleave(tf.data.TextLineDataset, cycle_length=9)

# Print the first 10 elements of the interleaved dataset

lines = [line for line in iter(interleaved_shakespeare_dataset.take(10))]
for line in lines:
    print(line)

tf.Tensor(b'SCENE I. On a ship at sea: a tempestuous noise', shape=(), dtype=string)
tf.Tensor(b"SCENE II. The island. Before PROSPERO'S cell.", shape=(), dtype=string)
tf.Tensor(b'SCENE I. Another part of the island.', shape=(), dtype=string)
tf.Tensor(b'SCENE II. Another part of the island.', shape=(), dtype=string)
tf.Tensor(b"SCENE I. Before PROSPERO'S Cell.", shape=(), dtype=string)
tf.Tensor(b'SCENE II. Another part of the island.', shape=(), dtype=string)
tf.Tensor(b'SCENE III. Another part of the island.', shape=(), dtype=string)
tf.Tensor(b"SCENE I. Before PROSPERO'S cell.", shape=(), dtype=string)
tf.Tensor(b"SCENE I. Before PROSPERO'S cell.", shape=(), dtype=string)
tf.Tensor(b'of thunder and lightning heard.', shape=(), dtype=string)

Training with Datasets¶

import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

Load the UCI Bank Marketing Dataset¶

# Load the CSV file into a pandas DataFrame

bank_dataframe = pd.read_csv('data/bank/bank-full.csv', delimiter=';')

# Show the head of the DataFrame

bank_dataframe.head()

# Print the shape of the DataFrame

print(bank_dataframe.shape)

(45211, 17)

# Select features from the DataFrame

features = ['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
            'loan', 'contact', 'campaign', 'pdays', 'poutcome']
labels = ['y']

bank_dataframe = bank_dataframe.filter(features + labels)

# Show the head of the DataFrame

bank_dataframe.head()

Preprocess the data¶

# Convert the categorical features in the DataFrame to one-hot encodings

from sklearn.preprocessing import LabelBinarizer

encoder = LabelBinarizer()
categorical_features = ['default', 'housing', 'job', 'loan', 'education', 'contact', 'poutcome']

for feature in categorical_features:
    bank_dataframe[feature] = tuple(encoder.fit_transform(bank_dataframe[feature]))

# Show the head of the DataFrame

bank_dataframe.head()

# Shuffle the DataFrame

bank_dataframe = bank_dataframe.sample(frac=1).reset_index(drop=True)

Create the Dataset object¶

# Convert the DataFrame to a Dataset

bank_dataset = tf.data.Dataset.from_tensor_slices(dict(bank_dataframe))

# Inspect the Dataset object

bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.string, name=None)}

Filter the Dataset¶

# First check that there are records in the dataset for non-married individuals

def check_divorced():
    bank_dataset_iterable = iter(bank_dataset)
    for x in bank_dataset_iterable:
        if x['marital'] != 'divorced':
            print('Found a person with marital status: {}'.format(x['marital']))
            return
    print('No non-divorced people were found!')

check_divorced()

Found a person with marital status: b'married'

# Filter the Dataset to retain only entries with a 'divorced' marital status

bank_dataset = bank_dataset.filter(lambda x : tf.equal(x['marital'], tf.constant([b'divorced']))[0] )

# Check the records in the dataset again

check_divorced()

No non-divorced people were found!

Map a function over the dataset¶

# Convert the label ('y') to an integer instead of 'yes' or 'no'
def map_label(x):
    x['y'] = 0 if (x['y'] == tf.constant([b'no'], dtype=tf.string)) else 1
    return x
bank_dataset = bank_dataset.map(map_label)

# Inspect the Dataset object

bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'marital': TensorSpec(shape=(), dtype=tf.string, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

# Remove the 'marital' column

bank_dataset = bank_dataset.map(lambda x : {key:val for key,val in x.items() if key != 'marital'})

# Inspect the Dataset object

bank_dataset.element_spec

{'age': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'job': TensorSpec(shape=(12,), dtype=tf.int32, name=None),
 'education': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'default': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'balance': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'housing': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'loan': TensorSpec(shape=(1,), dtype=tf.int32, name=None),
 'contact': TensorSpec(shape=(3,), dtype=tf.int32, name=None),
 'campaign': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'pdays': TensorSpec(shape=(), dtype=tf.int32, name=None),
 'poutcome': TensorSpec(shape=(4,), dtype=tf.int32, name=None),
 'y': TensorSpec(shape=(), dtype=tf.int32, name=None)}

Create input and output data tuples¶

# Create an input and output tuple for the dataset

def map_feature_label(x):
    features = [[x['age']], [x['balance']], [x['campaign']], x['contact'], x['default'],
                x['education'], x['housing'], x['job'], x['loan'], [x['pdays']], x['poutcome']]
    return (tf.concat(features, axis=0), x['y'])

# Map this function over the dataset

bank_dataset = bank_dataset.map(map_feature_label)

# Inspect the Dataset object

bank_dataset.element_spec

(TensorSpec(shape=(30,), dtype=tf.int32, name=None),
 TensorSpec(shape=(), dtype=tf.int32, name=None))

Split into a training and a validation set¶

# Determine the length of the Dataset

dataset_length = 0
for _ in bank_dataset:
    dataset_length += 1
print(dataset_length)

5207

# Make training and validation sets from the dataset
training_elements = int(dataset_length * 0.7)
train_dataset = bank_dataset.take(training_elements)
validation_dataset = bank_dataset.skip(training_elements)

Build a classification model¶

Now let's build a model to classify the features.

# Build a classifier model

from tensorflow.keras.layers import Dense, Input, Concatenate, BatchNormalization
from tensorflow.keras import Sequential

model = Sequential()
model.add(Input(shape=(30,)))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(400, activation='relu'))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1, activation='sigmoid'))

# Compile the model

optimizer = tf.keras.optimizers.Adam(1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Show the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
batch_normalization (BatchNo (None, 30)                120       
_________________________________________________________________
dense (Dense)                (None, 400)               12400     
_________________________________________________________________
batch_normalization_1 (Batch (None, 400)               1600      
_________________________________________________________________
dense_1 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_2 (Batch (None, 400)               1600      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 401       
=================================================================
Total params: 176,521
Trainable params: 174,861
Non-trainable params: 1,660
_________________________________________________________________

Train the model¶

# Create batched training and validation datasets
train_dataset = train_dataset.batch(20, drop_remainder=True)
validation_dataset = validation_dataset.batch(100)

# Shuffle the training data

train_dataset = train_dataset.shuffle(1000)

# Fit the model
history = model.fit(train_dataset, validation_data=validation_dataset, epochs=5)

Epoch 1/5
182/182 [==============================] - 21s 115ms/step - loss: 0.6554 - accuracy: 0.6519 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
182/182 [==============================] - 18s 99ms/step - loss: 0.4834 - accuracy: 0.8283 - val_loss: 0.4942 - val_accuracy: 0.8273
Epoch 3/5
182/182 [==============================] - 18s 101ms/step - loss: 0.3796 - accuracy: 0.8942 - val_loss: 0.4522 - val_accuracy: 0.8375
Epoch 4/5
182/182 [==============================] - 19s 102ms/step - loss: 0.3082 - accuracy: 0.9187 - val_loss: 0.4339 - val_accuracy: 0.8688
Epoch 5/5
182/182 [==============================] - 18s 99ms/step - loss: 0.2550 - accuracy: 0.9319 - val_loss: 0.4098 - val_accuracy: 0.8644

# Plot the training and validation accuracy

plt.plot(history.epoch, history.history["accuracy"], label="training")
plt.plot(history.epoch, history.history["val_accuracy"], label="validation")
plt.legend()
plt.xlabel("Epoch")

Text(0.5, 0, 'Epoch')

	Season	Age	Diseases	Trauma	Surgery	Alcohol	Smoking	Sitting	Output
0	-0.33	0.69	0	1	1	0.8	0	0.88	N
1	-0.33	0.94	1	0	1	0.8	1	0.31	O
2	-0.33	0.50	1	0	0	1.0	-1	0.50	N
3	-0.33	0.75	0	1	1	1.0	-1	0.38	N
4	-0.33	0.67	1	1	0	0.8	-1	0.50	O

	Season	Age	Diseases	Surgery	Alcohol	Smoking	Sitting
0	-0.33	0.56	0.0	1.0	1.0	-1.0	0.56
1	1.00	0.61	1.0	1.0	1.0	-1.0	0.63
2	0.33	0.78	1.0	0.0	1.0	1.0	0.06
3	1.00	0.67	0.0	1.0	0.8	-1.0	0.25
4	-0.33	0.50	1.0	0.0	1.0	-1.0	0.50

	age	job	marital	education	default	balance	housing	loan	contact	day	month	duration	campaign	pdays	poutcome	y
0	58	management	married	tertiary	no	2143	yes	no	unknown	5	may	261	1	-1	unknown	no
1	44	technician	single	secondary	no	29	yes	no	unknown	5	may	151	1	-1	unknown	no
2	33	entrepreneur	married	secondary	no	2	yes	yes	unknown	5	may	76	1	-1	unknown	no
3	47	blue-collar	married	unknown	no	1506	yes	no	unknown	5	may	92	1	-1	unknown	no
4	33	unknown	single	unknown	no	1	no	no	unknown	5	may	198	1	-1	unknown	no

	age	job	marital	education	default	balance	housing	loan	contact	campaign	pdays	poutcome	y
0	58	(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0)	married	(0, 0, 1, 0)	(0,)	2143	(1,)	(0,)	(0, 0, 1)	1	-1	(0, 0, 0, 1)	no
1	44	(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0)	single	(0, 1, 0, 0)	(0,)	29	(1,)	(0,)	(0, 0, 1)	1	-1	(0, 0, 0, 1)	no
2	33	(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)	married	(0, 1, 0, 0)	(0,)	2	(1,)	(1,)	(0, 0, 1)	1	-1	(0, 0, 0, 1)	no
3	47	(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)	married	(0, 0, 0, 1)	(0,)	1506	(1,)	(0,)	(0, 0, 1)	1	-1	(0, 0, 0, 1)	no
4	33	(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)	single	(0, 0, 0, 1)	(0,)	1	(0,)	(0,)	(0, 0, 1)	1	-1	(0, 0, 0, 1)	no

目录

Data Pipeline¶

Coding tutorials¶

1. Keras datasets¶

2. Dataset generators¶

3. Keras image data augmentation¶

4. The Dataset class¶

5. Training with Datasets¶

Keras datasets¶

Load the CIFAR-100 Dataset¶

Examine the Dataset¶

Load the data using different label modes¶

Load the IMDB Dataset¶

Using Keyword Arguments¶

Dataset generators¶

Load the UCI Fertility Dataset¶

Process the data¶

Split the Data¶

Create the Generator¶

Build the model¶

Compile the model¶

Train and evaluate the model using the generator¶

Make an infinitely looping generator¶

Evaluate the model and get predictions¶

Keras image data augmentation¶

Load the CIFAR-10 Dataset¶

Create a generator function¶

Create a data augmention generator¶

Flow from directory¶

Create a model to train¶

Train the model¶

Evaluate the model¶

Predict using the generator¶

The Dataset Class¶

Create a simple dataset¶

Create a zipped dataset¶

Create a dataset from numpy arrays¶

Create a dataset from text data¶

Interleave lines from the text data files¶

Training with Datasets¶

Load the UCI Bank Marketing Dataset¶

Preprocess the data¶

Create the Dataset object¶

Filter the Dataset¶

Map a function over the dataset¶

Create input and output data tuples¶

Split into a training and a validation set¶

Build a classification model¶

Train the model¶

1. Keras datasets ¶

2. Dataset generators ¶

3. Keras image data augmentation ¶

4. The Dataset class ¶

5. Training with Datasets ¶