from matplotlib import pyplot as plt
import numpy as np
from imageio import imread
import pandas as pd
from time import time as timer
import tensorflow as tf
%matplotlib inline
from matplotlib import animation
from IPython.display import HTML
The following creates a 'model'. It is an object containing the ML model itself - a simple 3-layer fully connected neural network, optimization parameters, as well as tha interface for model training.
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
Model summary provides information about the model's layers and trainable parameters
model.summary()
The fit
function is the interface for model training.
Here one can specify training and validation datasets, minibatch size, and the number of training epochs.
We will also save the state of the trainable variables after each epoch:
fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train = x_train/255
x_test = x_test/255
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
Here during training we also save the trained models checkpoints after each epoch of training.
save_path = 'save/mnist_{epoch}.ckpt'
save_callback = tf.keras.callbacks.ModelCheckpoint(filepath=save_path, save_weights_only=True)
hist = model.fit(x=x_train, y=y_train,
epochs=50, batch_size=128,
validation_data=(x_test, y_test),
callbacks=[save_callback])
fig, axs = plt.subplots(1, 2, figsize=(10,5))
axs[0].plot(hist.epoch, hist.history['loss'])
axs[0].plot(hist.epoch, hist.history['val_loss'])
axs[0].legend(('training loss', 'validation loss'), loc='lower right')
axs[1].plot(hist.epoch, hist.history['accuracy'])
axs[1].plot(hist.epoch, hist.history['val_accuracy'])
axs[1].legend(('training accuracy', 'validation accuracy'), loc='lower right')
plt.show()
Current model performance can be evaluated on a dataset:
model.evaluate(x_test, y_test, verbose=2)
We can test trained model on a image:
im_id = 0
y_pred = model(x_test)
y_pred_most_probable = np.argmax(y_pred[im_id])
print('true lablel: ', y_test[im_id],
'; predicted: ', y_pred_most_probable,
f'({class_names[y_pred_most_probable]})')
plt.imshow(x_test[im_id], cmap='gray');
As well as inspect on which samples does the model fail:
y_pred_most_probable_all = np.argmax(y_pred, axis=1)
wrong_pred_map = y_pred_most_probable_all!=y_test
wrong_pred_idx = np.arange(len(wrong_pred_map))[wrong_pred_map]
im_id = wrong_pred_idx[0]
y_pred_most_probable = y_pred_most_probable_all[im_id]
print('true lablel: ', y_test[im_id],
f'({class_names[y_test[im_id]]})',
'; predicted: ', y_pred_most_probable,
f'({class_names[y_pred_most_probable]})')
plt.imshow(x_test[im_id], cmap='gray');
model.load_weights('save/mnist_1.ckpt')
model.evaluate(x_test, y_test, verbose=2)
model.load_weights('save/mnist_12.ckpt')
model.evaluate(x_test, y_test, verbose=2)
model.load_weights('save/mnist_18.ckpt')
model.evaluate(x_test, y_test, verbose=2)
We can obtain the trained variables from model layers:
l = model.get_layer(index=1)
w, b = l.weights
w = w.numpy()
b = b.numpy()
print(w.shape, b.shape)
w = w.reshape((28,28,-1)).transpose((2, 0, 1))
Let's visualize first 5:
n = 10
fig, axs = plt.subplots(1, n, figsize=(4.1*n,4))
for i, wi in enumerate(w[:n]):
axs[i].imshow(wi, cmap='gray')
axs[i].set_title(class_names[i])
We can also evaluate the gradients of each output with respect to an input:
idx = 112
inp_v = x_train[idx:idx+1] # use some image to compute gradients with respect to
inp = tf.constant(inp_v) # create tf constant tensor
with tf.GradientTape() as tape: # gradient tape for gradint evaluation
tape.watch(inp) # take inp as variable
preds = model(inp) # evaluate model output
grads = tape.jacobian(preds, inp) # evaluate d preds[i] / d inp[j]
print(grads.shape, '<- (Batch_preds, preds[i], Batch_inp, inp[y], inp[x])')
grads = grads.numpy()[0,:,0]
print('prediction:', np.argmax(preds[0]))
fig, axs = plt.subplots(1, 11, figsize=(4.1*11,4))
axs[0].imshow(inp_v[0])
axs[0].set_title('raw')
vmin,vmax = grads.min(), grads.max()
for i, g in enumerate(grads):
axs[i+1].imshow(g, cmap='gray', vmin=vmin, vmax=vmax)
axs[i+1].set_title(r'$\frac{\partial\;P(digit\,%d)}{\partial\;input}$' % i, fontdict={'size':16})
Make a deeper model, with wider layers. Remember to 'softmax'
activation in the last layer, as required for the classification task to encode pseudoprobabilities. In the other layers you could use 'relu'
.
Try to achieve 90% accuracy. Does your model overfit?
# 1. create model
# 2. train the model
# 3. plot the loss and accuracy evolution during training
# 4. evaluate model in best point (before overfitting)