<!DOCTYPE html>
In [32]:
import tensorflow as tf
print(tf.__version__)
Model subclassing and custom training loops¶
In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Softmax, concatenate
Create a simple model using the model subclassing API¶
In [21]:
# Build the model
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.dense_1 = Dense(64, activation="relu")
self.dense_2 = Dense(10)
self.dense_3 = Dense(5)
self.softmax = Softmax()
def call(self, inputs, training=True):
x = self.dense_1(inputs)
y1 = self.dense_2(x)
y2 = self.dense_3(y1)
concat = concatenate([x, y2])
return self.softmax(concat)
In [22]:
# Print the model summary
mymodel = MyModel()
mymodel(tf.random.uniform([1, 10]))
mymodel.summary()
In [24]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax
Create custom layers¶
In [37]:
# Create a custom layer
class MyLayer(Layer):
def __init__(self, input_dims, units):
super(MyLayer, self).__init__()
self.w = self.add_weight(shape=(input_dims, units),
initializer="random_normal")
self.b = self.add_weight(shape=(units,),
initializer="zeros")
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
dense_layer = MyLayer(5, 3)
x = tf.ones((1, 5))
print(dense_layer(x))
print(dense_layer.weights)
In [39]:
# Specify trainable weights
class MyLayer(Layer):
def __init__(self, input_dims, units):
super(MyLayer, self).__init__()
self.w = self.add_weight(shape=(input_dims, units),
initializer="random_normal",
trainable=False)
self.b = self.add_weight(shape=(units,),
initializer="zeros",
trainable=False)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
dense_layer = MyLayer(5, 3)
In [40]:
print('trainable weights:', len(dense_layer.trainable_weights))
print('non-trainable weights:', len(dense_layer.non_trainable_weights))
In [44]:
# Create a custom layer to accumulate means of output values
class MyLayerMean(Layer):
def __init__(self, input_dims, units):
super(MyLayerMean, self).__init__()
self.w = self.add_weight(shape=(input_dims, units),
initializer="random_normal")
self.b = self.add_weight(shape=(units,),
initializer="zeros")
self.sum_activation = tf.Variable(initial_value=tf.zeros((units,)),
trainable=False)
self.number_call = tf.Variable(initial_value=0,
trainable=False)
def call(self, inputs):
activation = tf.matmul(inputs, self.w) + self.b
self.sum_activation.assign_add(tf.reduce_sum(activation, axis=0))
self.number_call.assign_add(inputs.shape[0])
return activation, self.sum_activation / tf.cast(self.number_call, tf.float32)
dense_layer = MyLayerMean(5, 3)
In [55]:
# Test the layer
y, activation_means = dense_layer(tf.ones((1, 5)))
print(activation_means.numpy())
print(dense_layer.number_call)
In [57]:
# Create a Dropout layer as a custom layer
class MyDropout(Layer):
def __init__(self, rate):
super(MyDropout, self).__init__()
self.rate = rate
def call(self, inputs):
# Define forward pass for dropout layer
return tf.nn.dropout(inputs, rate=self.rate)
Implement the custom layers into a model¶
In [58]:
# Build the model using custom layers with the model subclassing API
class MyModel(Model):
def __init__(self, units_1, input_dim_1, units_2, units_3):
super(MyModel, self).__init__()
# Define layers
self.layer_1 = MyLayer(input_dims=input_dim_1, units=units_1)
self.dropout_1 = MyDropout(0.5)
self.layer_2 = MyLayer(units_1, units_2)
self.dropout_2 = MyDropout(0.5)
self.layer_3 = MyLayer(units_2, units_3)
self.softmax = Softmax()
def call(self, inputs):
# Define forward pass
x = self.layer_1(inputs)
x = tf.nn.relu(x)
x = self.dropout_1(x)
x = self.layer_2(x)
x = tf.nn.relu(x)
x = self.dropout_2(x)
x = self.layer_3(x)
x = self.softmax(x)
return x
In [59]:
# Instantiate a model object
model = MyModel(64,10000,64,46)
print(model(tf.ones((1, 10000))))
model.summary()
In [2]:
import numpy as np
import matplotlib.pyplot as plt
Create synthetic data¶
In [3]:
# Create data from a noise contaminated linear model
def MakeNoisyData(m, b, n=20):
x = tf.random.uniform(shape=(n,))
noise = tf.random.normal(shape=(len(x),), stddev=0.1)
y = m * x + b + noise
return x, y
m=1
b=2
x_train, y_train = MakeNoisyData(m,b)
plt.plot(x_train, y_train, 'b.')
Out[3]:
Define a linear regression model¶
In [4]:
from tensorflow.keras.layers import Layer
In [28]:
# Build a custom layer for the linear regression model
class LinearLayer(Layer):
def __init__(self):
super(LinearLayer, self).__init__()
self.m = self.add_weight(shape=(1, ),
initializer="random_normal")
self.b = self.add_weight(shape=(1, ),
initializer="zeros")
def call(self, inputs):
return self.m * inputs + self.b
linear_regression = LinearLayer()
print(linear_regression(x_train))
print(linear_regression.weights)
Define the loss function¶
In [29]:
# Define the mean squared error loss function
def SquaredError(y_pred, y_true):
return tf.reduce_mean(tf.square(y_pred - y_true))
starting_loss = SquaredError(linear_regression(x_train), y_train)
print("Starting loss", starting_loss.numpy())
Train and plot the model¶
In [40]:
# Implement a gradient descent training loop for the linear regression model
learning_rate = 0.05
steps = 2500
for i in range(steps):
with tf.GradientTape() as tape:
predictions = linear_regression(x_train)
loss = SquaredError(predictions, y_train)
gradients = tape.gradient(loss, linear_regression.trainable_variables)
linear_regression.m.assign_sub(learning_rate * gradients[0])
linear_regression.b.assign_sub(learning_rate * gradients[1])
#print("Step %d, Loss %f" % (i, loss.numpy()))
In [41]:
# Plot the learned regression model
print("m:{}, trained m:{}".format(m,linear_regression.m.numpy()))
print("b:{}, trained b:{}".format(b,linear_regression.b.numpy()))
plt.plot(x_train, y_train, 'b.')
x_linear_regression=np.linspace(min(x_train), max(x_train),50)
plt.plot(x_linear_regression, linear_regression.m*x_linear_regression+linear_regression.b, 'r.')
#plt.plot(linear_regression.m*x_linear_regression+linear_regression.b, 'r.')
Out[41]:
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time
Build the model¶
In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax
In [4]:
# Define the custom layers and model
class MyLayer(Layer):
def __init__(self, units):
super(MyLayer, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units),
initializer="random_normal",
name="kernel")
self.b = self.add_weight(shape=(self.units, ),
initializer="zeros",
name="bias")
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
class MyDropout(Layer):
def __init__(self, rate):
super(MyDropout, self).__init__()
self.rate = rate
def call(self, inputs):
return tf.nn.dropout(inputs, rate=self.rate)
class MyModel(Model):
def __init__(self, units_1, units_2, units_3):
super(MyModel, self).__init__()
self.layer_1 = MyLayer(units_1)
self.dropout_1 = MyDropout(0.5)
self.layer_2 = MyLayer(units_2)
self.dropout_2 = MyDropout(0.5)
self.layer_3 = MyLayer(units_3)
self.softmax = Softmax()
def call(self, inputs):
x = self.layer_1(inputs)
x = tf.nn.relu(x)
x = self.dropout_1(x)
x = self.layer_2(x)
x = tf.nn.relu(x)
x = self.dropout_2(x)
x = self.layer_3(x)
return self.softmax(x)
In [5]:
model = MyModel(64, 64, 46)
print(model(tf.ones((1, 10000))))
model.summary()
Load the reuters dataset and define the class_names¶
In [6]:
# Load the dataset
from tensorflow.keras.datasets import reuters
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)
class_names = ['cocoa','grain','veg-oil','earn','acq','wheat','copper','housing','money-supply',
'coffee','sugar','trade','reserves','ship','cotton','carcass','crude','nat-gas',
'cpi','money-fx','interest','gnp','meal-feed','alum','oilseed','gold','tin',
'strategic-metal','livestock','retail','ipi','iron-steel','rubber','heat','jobs',
'lei','bop','zinc','orange','pet-chem','dlr','gas','silver','wpi','hog','lead']
In [7]:
# Print the class of the first sample
print("Label: {}".format(class_names[train_labels[0]]))
In [8]:
train_labels[0]
Out[8]:
Get the dataset word index¶
In [9]:
# Load the Reuters word index
word_to_index = reuters.get_word_index()
invert_word_index = dict([(value, key) for (key, value) in word_to_index.items()])
text_news = ' '.join([invert_word_index.get(i - 3, '?') for i in train_data[0]])
In [11]:
# Print the first data example sentence
print(text_news)
Preprocess the data¶
In [12]:
# Define a function that encodes the data into a 'bag of words' representation
def bag_of_words(text_samples, elements=10000):
output = np.zeros((len(text_samples), elements))
for i, word in enumerate(text_samples):
output[i, word] = 1.
return output
x_train = bag_of_words(train_data)
x_test = bag_of_words(test_data)
print("Shape of x_train:", x_train.shape)
print("Shape of x_test:", x_test.shape)
Define the loss function and optimizer¶
In [17]:
# Define the categorical cross entropy loss and Adam optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
def loss(model, x, y, wd):
kernel_variables = []
for l in model.layers:
for w in l.weights:
if 'kernel' in w.name:
kernel_variables.append(w)
wd_penalty = wd * tf.reduce_sum([tf.reduce_sum(tf.square(k)) for k in kernel_variables])
y_ = model(x)
return loss_object(y_true=y, y_pred=y_) + wd_penalty
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
Train the model¶
In [18]:
# Define a function to compute the forward and backward pass
def grad(model, inputs, targets, wd):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets, wd)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
In [21]:
# Implement the training loop
from tensorflow.keras.utils import to_categorical
start_time = time.time()
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, train_labels))
train_dataset = train_dataset.batch(32)
#Keep result for plotting
train_loss_results = []
train_accuracy_results = []
num_epochs = 10
weight_decay = 0.005
for epoch in range(num_epochs):
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
for x, y in train_dataset:
loss_value, grads = grad(model, x, y, weight_decay)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
epoch_loss_avg(loss_value)
epoch_accuracy(to_categorical(y), model(x))
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
print("Epoch {:03d} Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, epoch_loss_avg.result(),
epoch_accuracy.result()))
print("Duration :{:.3f}".format(time.time() - start_time))
Evaluate the model¶
In [22]:
# Create a Dataset object for the test set
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, test_labels))
test_dataset = test_dataset.batch(32)
In [23]:
# Collect average loss and accuracy
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
In [24]:
# Loop over the test set and print scores
from tensorflow.keras.utils import to_categorical
for x, y in test_dataset:
# Optimize the model
loss_value = loss(model, x, y, weight_decay)
# Compute current loss
epoch_loss_avg(loss_value)
# Compare predicted label to actual label
epoch_accuracy(to_categorical(y), model(x))
print("Test loss: {:.3f}".format(epoch_loss_avg.result().numpy()))
print("Test accuracy: {:.3%}".format(epoch_accuracy.result().numpy()))
Plot the learning curves¶
In [25]:
# Plot the training loss and accuracy
fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')
axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)
axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)
plt.show()
Predict from the model¶
In [26]:
# Get the model prediction for an example input
predicted_label = np.argmax(model(x_train[np.newaxis,0]),axis=1)[0]
print("Prediction: {}".format(class_names[predicted_label]))
print(" Label: {}".format(class_names[train_labels[0]]))
In [31]:
model(x_train[np.newaxis,0])
Out[31]:
In [33]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import reuters
import numpy as np
import matplotlib.pyplot as plt
import time
Build the model¶
In [45]:
# Initialize a new model
model = MyModel(64, 64, 46)
Redefine the grad function using the @tf.function decorator¶
In [46]:
# Use the @tf.function decorator
@tf.function
def grad(model, inputs, targets, wd):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets, wd)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
Train the model¶
In [47]:
# Re-run the training loop
from tensorflow.keras.utils import to_categorical
start_time = time.time()
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, train_labels))
train_dataset = train_dataset.batch(32)
#Keep result for plotting
train_loss_results = []
train_accuracy_results = []
num_epochs = 10
weight_decay = 0.005
for epoch in range(num_epochs):
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
for x, y in train_dataset:
loss_value, grads = grad(model, x, y, weight_decay)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
epoch_loss_avg(loss_value)
epoch_accuracy(to_categorical(y), model(x))
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
print("Epoch {:03d} Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, epoch_loss_avg.result(),
epoch_accuracy.result()))
print("Duration :{:.3f}".format(time.time() - start_time))
Print the autograph code¶
In [ ]:
# Use tf.autograph.to_code to see the generated code
print(tf.autograph.to_code(grad.python_function))
In [ ]: