Model subclassing and custom training loops

目录

<!DOCTYPE html>

Coding_Tutorial
In [32]:
import tensorflow as tf
print(tf.__version__)
2.0.0

Model subclassing and custom training loops


Model subclassing

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Softmax, concatenate

Create a simple model using the model subclassing API

In [21]:
# Build the model
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.dense_1 = Dense(64, activation="relu")
        self.dense_2 = Dense(10)
        self.dense_3 = Dense(5)
        self.softmax = Softmax()
    def call(self, inputs, training=True):
        x = self.dense_1(inputs)
        y1 = self.dense_2(x)
        y2 = self.dense_3(y1)
        concat = concatenate([x, y2])
        return self.softmax(concat)
In [22]:
# Print the model summary

mymodel = MyModel()
mymodel(tf.random.uniform([1, 10]))
mymodel.summary()
Model: "my_model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_20 (Dense)             multiple                  704       
_________________________________________________________________
dense_21 (Dense)             multiple                  650       
_________________________________________________________________
dense_22 (Dense)             multiple                  55        
_________________________________________________________________
softmax (Softmax)            multiple                  0         
=================================================================
Total params: 1,409
Trainable params: 1,409
Non-trainable params: 0
_________________________________________________________________

Custom layers

In [24]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax

Create custom layers

In [37]:
# Create a custom layer
class MyLayer(Layer):
    def __init__(self, input_dims, units):
        super(MyLayer, self).__init__()
        self.w = self.add_weight(shape=(input_dims, units), 
                                initializer="random_normal")
        self.b = self.add_weight(shape=(units,),
                                initializer="zeros")
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

dense_layer = MyLayer(5, 3)
x = tf.ones((1, 5))
print(dense_layer(x))
print(dense_layer.weights)
tf.Tensor([[-0.04967789 -0.19962817  0.11406139]], shape=(1, 3), dtype=float32)
[<tf.Variable 'Variable:0' shape=(5, 3) dtype=float32, numpy=
array([[ 0.0234185 , -0.02780751, -0.02603882],
       [ 0.00773788, -0.0663674 ,  0.07986604],
       [-0.02095985, -0.02214642,  0.00174421],
       [-0.07695527, -0.03400336,  0.00114597],
       [ 0.01708087, -0.04930348,  0.05734399]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]
In [39]:
# Specify trainable weights
class MyLayer(Layer):
    def __init__(self, input_dims, units):
        super(MyLayer, self).__init__()
        self.w = self.add_weight(shape=(input_dims, units), 
                                initializer="random_normal",
                                trainable=False)
        self.b = self.add_weight(shape=(units,),
                                initializer="zeros",
                                trainable=False)
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

dense_layer = MyLayer(5, 3)
In [40]:
print('trainable weights:', len(dense_layer.trainable_weights))
print('non-trainable weights:', len(dense_layer.non_trainable_weights))
trainable weights: 0
non-trainable weights: 2
In [44]:
# Create a custom layer to accumulate means of output values

class MyLayerMean(Layer):
    def __init__(self, input_dims, units):
        super(MyLayerMean, self).__init__()
        self.w = self.add_weight(shape=(input_dims, units), 
                                initializer="random_normal")
        self.b = self.add_weight(shape=(units,),
                                initializer="zeros")
        self.sum_activation = tf.Variable(initial_value=tf.zeros((units,)),
                                         trainable=False)
        self.number_call = tf.Variable(initial_value=0,
                                      trainable=False)
    def call(self, inputs):
        activation = tf.matmul(inputs, self.w) + self.b
        self.sum_activation.assign_add(tf.reduce_sum(activation, axis=0))
        self.number_call.assign_add(inputs.shape[0])
        return activation, self.sum_activation / tf.cast(self.number_call, tf.float32)

dense_layer = MyLayerMean(5, 3)
In [55]:
# Test the layer

y, activation_means = dense_layer(tf.ones((1, 5)))
print(activation_means.numpy())
print(dense_layer.number_call)
[ 0.20949477  0.07882436 -0.17136861]
<tf.Variable 'Variable:0' shape=() dtype=int32, numpy=26>
In [57]:
# Create a Dropout layer as a custom layer

class MyDropout(Layer):

    def __init__(self, rate):
        super(MyDropout, self).__init__()
        self.rate = rate
        
    def call(self, inputs):
        # Define forward pass for dropout layer
        return tf.nn.dropout(inputs, rate=self.rate)

Implement the custom layers into a model

In [58]:
# Build the model using custom layers with the model subclassing API

class MyModel(Model):

    def __init__(self, units_1, input_dim_1, units_2, units_3):
        super(MyModel, self).__init__()
        # Define layers
        self.layer_1 = MyLayer(input_dims=input_dim_1, units=units_1)
        self.dropout_1 = MyDropout(0.5)
        self.layer_2 = MyLayer(units_1, units_2)
        self.dropout_2 = MyDropout(0.5)
        self.layer_3 = MyLayer(units_2, units_3)
        self.softmax = Softmax()
           
    def call(self, inputs):
        # Define forward pass
        x = self.layer_1(inputs)
        x = tf.nn.relu(x)
        x = self.dropout_1(x)
        x = self.layer_2(x)
        x = tf.nn.relu(x)
        x = self.dropout_2(x)
        x = self.layer_3(x)
        x = self.softmax(x)
        return x
In [59]:
# Instantiate a model object

model = MyModel(64,10000,64,46)
print(model(tf.ones((1, 10000))))
model.summary()
tf.Tensor(
[[0.02489853 0.01084028 0.00708136 0.011239   0.00647628 0.01125843
  0.02465697 0.02715545 0.0106438  0.01812194 0.00729887 0.02460364
  0.01560491 0.03345494 0.03605006 0.00696433 0.01502816 0.0085166
  0.03674844 0.01680389 0.01668386 0.07529215 0.01180458 0.01297116
  0.00303056 0.04027725 0.01722521 0.02785044 0.02322018 0.01318904
  0.01779605 0.0271759  0.02585552 0.01091689 0.01024993 0.01401119
  0.02018325 0.02149687 0.01418947 0.14319772 0.00700873 0.02422531
  0.01921816 0.0080635  0.02735423 0.01406699]], shape=(1, 46), dtype=float32)
Model: "my_model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
my_layer_6 (MyLayer)         multiple                  640064    
_________________________________________________________________
my_dropout (MyDropout)       multiple                  0         
_________________________________________________________________
my_layer_7 (MyLayer)         multiple                  4160      
_________________________________________________________________
my_dropout_1 (MyDropout)     multiple                  0         
_________________________________________________________________
my_layer_8 (MyLayer)         multiple                  2990      
_________________________________________________________________
softmax_1 (Softmax)          multiple                  0         
=================================================================
Total params: 647,214
Trainable params: 0
Non-trainable params: 647,214
_________________________________________________________________

Automatic differentiation

In [2]:
import numpy as np
import matplotlib.pyplot as plt

Create synthetic data

In [3]:
# Create data from a noise contaminated linear model

def MakeNoisyData(m, b, n=20):
    x = tf.random.uniform(shape=(n,))
    noise = tf.random.normal(shape=(len(x),), stddev=0.1)
    y = m * x + b + noise
    return x, y

m=1
b=2
x_train, y_train = MakeNoisyData(m,b)
plt.plot(x_train, y_train, 'b.')
Out[3]:
[<matplotlib.lines.Line2D at 0x7ff3d076a5c0>]

Define a linear regression model

In [4]:
from tensorflow.keras.layers import Layer
In [28]:
# Build a custom layer for the linear regression model
class LinearLayer(Layer):
    
    def __init__(self):
        super(LinearLayer, self).__init__()
        self.m = self.add_weight(shape=(1, ),
                              initializer="random_normal")
        self.b = self.add_weight(shape=(1, ),
                              initializer="zeros")
    def call(self, inputs):
        return self.m * inputs + self.b
linear_regression = LinearLayer()
print(linear_regression(x_train))
print(linear_regression.weights)
tf.Tensor(
[0.0010017  0.00195054 0.00159761 0.0023566  0.00039557 0.00025922
 0.00203139 0.00104975 0.00176174 0.00250651 0.00065012 0.00088429
 0.00039591 0.00068689 0.00120224 0.00130667 0.00098007 0.00133596
 0.00120854 0.00209736], shape=(20,), dtype=float32)
[<tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.00298285], dtype=float32)>, <tf.Variable 'Variable:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

Define the loss function

In [29]:
# Define the mean squared error loss function

def SquaredError(y_pred, y_true):
    return tf.reduce_mean(tf.square(y_pred - y_true)) 

starting_loss = SquaredError(linear_regression(x_train), y_train)
print("Starting loss", starting_loss.numpy())
Starting loss 6.0469003

Train and plot the model

In [40]:
# Implement a gradient descent training loop for the linear regression model
learning_rate = 0.05
steps = 2500

for i in range(steps):
    
    with tf.GradientTape() as tape:
        predictions = linear_regression(x_train)
        loss = SquaredError(predictions,  y_train)
    
    gradients = tape.gradient(loss, linear_regression.trainable_variables)

    linear_regression.m.assign_sub(learning_rate * gradients[0])
    linear_regression.b.assign_sub(learning_rate * gradients[1])
    
    #print("Step %d, Loss %f" % (i, loss.numpy()))
In [41]:
# Plot the learned regression model

print("m:{},  trained m:{}".format(m,linear_regression.m.numpy()))
print("b:{},  trained b:{}".format(b,linear_regression.b.numpy()))

plt.plot(x_train, y_train, 'b.')

x_linear_regression=np.linspace(min(x_train), max(x_train),50)
plt.plot(x_linear_regression, linear_regression.m*x_linear_regression+linear_regression.b, 'r.')
#plt.plot(linear_regression.m*x_linear_regression+linear_regression.b, 'r.')
m:1,  trained m:[1.015685]
b:2,  trained b:[2.0114112]
Out[41]:
[<matplotlib.lines.Line2D at 0x7ff39414cba8>]

Custom training loops

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time

Build the model

In [3]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax
In [4]:
# Define the custom layers and model

class MyLayer(Layer):
    
    def __init__(self, units):
        super(MyLayer, self).__init__()
        self.units = units
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                initializer="random_normal",
                                name="kernel")
        self.b = self.add_weight(shape=(self.units, ),
                                initializer="zeros",
                                name="bias")
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
class MyDropout(Layer):
    
    def __init__(self, rate):
        super(MyDropout, self).__init__()
        self.rate = rate
        
    def call(self, inputs):
        return tf.nn.dropout(inputs, rate=self.rate)
class MyModel(Model):
    def __init__(self, units_1, units_2, units_3):
        super(MyModel, self).__init__()
        self.layer_1 = MyLayer(units_1)
        self.dropout_1 = MyDropout(0.5)
        self.layer_2 = MyLayer(units_2)
        self.dropout_2 = MyDropout(0.5)
        self.layer_3 = MyLayer(units_3)
        self.softmax = Softmax()
        
    def call(self, inputs):
        
        x = self.layer_1(inputs)
        x = tf.nn.relu(x)
        x = self.dropout_1(x)
        x = self.layer_2(x)
        x = tf.nn.relu(x)
        x = self.dropout_2(x)
        x = self.layer_3(x)
        return self.softmax(x)
        
In [5]:
model = MyModel(64, 64, 46)
print(model(tf.ones((1, 10000))))
model.summary()
tf.Tensor(
[[0.01772136 0.02747736 0.02107654 0.02190472 0.02839988 0.01724724
  0.01409584 0.01121495 0.01428777 0.01504129 0.02005377 0.01554544
  0.01977161 0.00902303 0.01555876 0.01219317 0.01500386 0.02096696
  0.0205863  0.01422443 0.01804048 0.02644591 0.02294829 0.0499467
  0.03822591 0.03323207 0.0130417  0.01527712 0.00718926 0.01329936
  0.02423214 0.04488055 0.02256041 0.04332861 0.01037556 0.01695264
  0.02960963 0.01081825 0.04905513 0.02775375 0.02934232 0.03288709
  0.02131436 0.0105742  0.01693919 0.02033512]], shape=(1, 46), dtype=float32)
Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
my_layer (MyLayer)           multiple                  640064    
_________________________________________________________________
my_dropout (MyDropout)       multiple                  0         
_________________________________________________________________
my_layer_1 (MyLayer)         multiple                  4160      
_________________________________________________________________
my_dropout_1 (MyDropout)     multiple                  0         
_________________________________________________________________
my_layer_2 (MyLayer)         multiple                  2990      
_________________________________________________________________
softmax (Softmax)            multiple                  0         
=================================================================
Total params: 647,214
Trainable params: 647,214
Non-trainable params: 0
_________________________________________________________________

Load the reuters dataset and define the class_names

In [6]:
# Load the dataset

from tensorflow.keras.datasets import reuters

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

class_names = ['cocoa','grain','veg-oil','earn','acq','wheat','copper','housing','money-supply',
   'coffee','sugar','trade','reserves','ship','cotton','carcass','crude','nat-gas',
   'cpi','money-fx','interest','gnp','meal-feed','alum','oilseed','gold','tin',
   'strategic-metal','livestock','retail','ipi','iron-steel','rubber','heat','jobs',
   'lei','bop','zinc','orange','pet-chem','dlr','gas','silver','wpi','hog','lead']
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
2113536/2110848 [==============================] - 0s 0us/step
In [7]:
# Print the class of the first sample

print("Label: {}".format(class_names[train_labels[0]]))
Label: earn
In [8]:
train_labels[0]
Out[8]:
3

Get the dataset word index

In [9]:
# Load the Reuters word index

word_to_index = reuters.get_word_index()

invert_word_index = dict([(value, key) for (key, value) in word_to_index.items()])
text_news = ' '.join([invert_word_index.get(i - 3, '?') for i in train_data[0]])
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters_word_index.json
557056/550378 [==============================] - 0s 0us/step
In [11]:
# Print the first data example sentence

print(text_news)
? ? ? said as a result of its december acquisition of space co it expects earnings per share in 1987 of 1 15 to 1 30 dlrs per share up from 70 cts in 1986 the company said pretax net should rise to nine to 10 mln dlrs from six mln dlrs in 1986 and rental operation revenues to 19 to 22 mln dlrs from 12 5 mln dlrs it said cash flow per share this year should be 2 50 to three dlrs reuter 3

Preprocess the data

In [12]:
# Define a function that encodes the data into a 'bag of words' representation

def bag_of_words(text_samples, elements=10000):
    output = np.zeros((len(text_samples), elements))
    for i, word in enumerate(text_samples):
        output[i, word] = 1.
    return output

x_train = bag_of_words(train_data)
x_test = bag_of_words(test_data)

print("Shape of x_train:", x_train.shape)
print("Shape of x_test:", x_test.shape)
Shape of x_train: (8982, 10000)
Shape of x_test: (2246, 10000)

Define the loss function and optimizer

In [17]:
# Define the categorical cross entropy loss and Adam optimizer

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

def loss(model, x, y, wd):
    kernel_variables = []
    for l in model.layers:
        for w in l.weights:
            if 'kernel' in w.name:
                kernel_variables.append(w)
    wd_penalty = wd * tf.reduce_sum([tf.reduce_sum(tf.square(k)) for k in kernel_variables])
    y_ = model(x)
    return loss_object(y_true=y, y_pred=y_) + wd_penalty

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

Train the model

In [18]:
# Define a function to compute the forward and backward pass

def grad(model, inputs, targets, wd):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets, wd)
    return loss_value, tape.gradient(loss_value, model.trainable_variables)
In [21]:
# Implement the training loop

from tensorflow.keras.utils import to_categorical

start_time = time.time()
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, train_labels))
train_dataset = train_dataset.batch(32)
  
#Keep result for plotting
train_loss_results = []
train_accuracy_results = []

num_epochs = 10
weight_decay = 0.005
for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
    
    for x, y in train_dataset:
        
        loss_value, grads  = grad(model, x, y, weight_decay)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        epoch_loss_avg(loss_value)
        epoch_accuracy(to_categorical(y), model(x))
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())
    print("Epoch {:03d} Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, epoch_loss_avg.result(),
                                                              epoch_accuracy.result()))
    
print("Duration :{:.3f}".format(time.time() - start_time))
WARNING:tensorflow:Layer my_model is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.

If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.

To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 000 Loss: 3.308, Accuracy: 48.130%
Epoch 001 Loss: 1.899, Accuracy: 61.801%
Epoch 002 Loss: 1.803, Accuracy: 66.310%
Epoch 003 Loss: 1.764, Accuracy: 68.326%
Epoch 004 Loss: 1.734, Accuracy: 69.060%
Epoch 005 Loss: 1.732, Accuracy: 69.851%
Epoch 006 Loss: 1.722, Accuracy: 69.918%
Epoch 007 Loss: 1.709, Accuracy: 70.029%
Epoch 008 Loss: 1.697, Accuracy: 70.207%
Epoch 009 Loss: 1.694, Accuracy: 70.597%
Duration :236.378

Evaluate the model

In [22]:
# Create a Dataset object for the test set

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, test_labels))
test_dataset = test_dataset.batch(32)
In [23]:
# Collect average loss and accuracy

epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
In [24]:
# Loop over the test set and print scores

from tensorflow.keras.utils import to_categorical

for x, y in test_dataset:
    # Optimize the model
    loss_value = loss(model, x, y, weight_decay)    
    # Compute current loss
    epoch_loss_avg(loss_value)  
    # Compare predicted label to actual label
    epoch_accuracy(to_categorical(y), model(x))

print("Test loss: {:.3f}".format(epoch_loss_avg.result().numpy()))
print("Test accuracy: {:.3%}".format(epoch_accuracy.result().numpy()))
Test loss: 1.853
Test accuracy: 67.409%

Plot the learning curves

In [25]:
# Plot the training loss and accuracy

fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))
fig.suptitle('Training Metrics')

axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)

axes[1].set_ylabel("Accuracy", fontsize=14)
axes[1].set_xlabel("Epoch", fontsize=14)
axes[1].plot(train_accuracy_results)
plt.show()

Predict from the model

In [26]:
# Get the model prediction for an example input

predicted_label = np.argmax(model(x_train[np.newaxis,0]),axis=1)[0]
print("Prediction: {}".format(class_names[predicted_label]))
print("     Label: {}".format(class_names[train_labels[0]]))
Prediction: earn
     Label: earn
In [31]:
model(x_train[np.newaxis,0])
Out[31]:
<tf.Tensor: id=807244, shape=(1, 46), dtype=float32, numpy=
array([[2.0535091e-04, 2.1657012e-03, 1.9819100e-04, 9.6147978e-01,
        5.1521608e-03, 1.6181401e-04, 3.6535074e-04, 1.4510546e-04,
        1.3037691e-03, 1.4215536e-04, 4.7751077e-04, 1.5286735e-04,
        2.7043189e-04, 1.1501311e-03, 1.8601213e-04, 1.4800340e-04,
        1.7261537e-02, 5.9980171e-04, 3.1137478e-04, 7.3267973e-04,
        9.7757834e-04, 4.1972424e-04, 8.5123749e-05, 4.2101150e-04,
        2.9234416e-04, 1.8532608e-03, 9.1949471e-05, 7.0792237e-05,
        1.5166773e-04, 1.6312358e-04, 2.6926893e-04, 2.8232252e-04,
        1.1763624e-04, 1.2518957e-04, 3.3679346e-04, 6.0613282e-05,
        2.3652115e-04, 1.1878930e-04, 2.2053282e-04, 2.8367169e-04,
        8.2094120e-05, 3.4605098e-04, 8.8187262e-05, 1.1700453e-04,
        7.4783129e-05, 1.0418057e-04]], dtype=float32)>

tf.function decorator

In [33]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Softmax
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import reuters
import numpy as np
import matplotlib.pyplot as plt
import time

Build the model

In [45]:
# Initialize a new model
model = MyModel(64, 64, 46)

Redefine the grad function using the @tf.function decorator

In [46]:
# Use the @tf.function decorator
@tf.function

def grad(model, inputs, targets, wd):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets, wd)
    return loss_value, tape.gradient(loss_value, model.trainable_variables)

Train the model

In [47]:
# Re-run the training loop

from tensorflow.keras.utils import to_categorical

start_time = time.time()
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, train_labels))
train_dataset = train_dataset.batch(32)
  
#Keep result for plotting
train_loss_results = []
train_accuracy_results = []

num_epochs = 10
weight_decay = 0.005
for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.CategoricalAccuracy()
    
    for x, y in train_dataset:
        
        loss_value, grads  = grad(model, x, y, weight_decay)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        epoch_loss_avg(loss_value)
        epoch_accuracy(to_categorical(y), model(x))
    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())
    print("Epoch {:03d} Loss: {:.3f}, Accuracy: {:.3%}".format(epoch, epoch_loss_avg.result(),
                                                              epoch_accuracy.result()))
    
print("Duration :{:.3f}".format(time.time() - start_time))
WARNING:tensorflow:Layer my_model_4 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.

If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.

To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

WARNING:tensorflow:Layer my_model_4 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.

If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.

To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 000 Loss: 2.442, Accuracy: 54.643%
Epoch 001 Loss: 1.960, Accuracy: 62.625%
Epoch 002 Loss: 1.855, Accuracy: 65.954%
Epoch 003 Loss: 1.826, Accuracy: 67.257%
Epoch 004 Loss: 1.770, Accuracy: 68.526%
Epoch 005 Loss: 1.754, Accuracy: 69.105%
Epoch 006 Loss: 1.729, Accuracy: 68.849%
Epoch 007 Loss: 1.725, Accuracy: 69.094%
Epoch 008 Loss: 1.711, Accuracy: 69.639%
Epoch 009 Loss: 1.719, Accuracy: 69.684%
Duration :188.342
In [ ]:
# Use tf.autograph.to_code to see the generated code

print(tf.autograph.to_code(grad.python_function))
In [ ]: