In [1]:
# code for loading the format for the notebook
import os

# path : store the current path to convert back to it later
path = os.getcwd()
os.chdir(os.path.join('..', 'notebook_format'))

from formats import load_style
load_style(plot_style=False)
Out[1]:
In [2]:
os.chdir(path)

# 1. magic to print version
# 2. magic so that the notebook will reload external python modules
%load_ext watermark
%load_ext autoreload 
%autoreload 2

import numpy as np
import pandas as pd
from keras.datasets import mnist
from keras.utils import np_utils
from keras.optimizers import RMSprop
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation

%watermark -a 'Ethen' -d -t -v -p numpy,pandas,keras
Using TensorFlow backend.
Ethen 2018-08-26 22:56:05 

CPython 3.6.4
IPython 6.4.0

numpy 1.14.1
pandas 0.23.0
keras 2.2.2

Keras Basics

Basic Keras API to build a simple multi-layer neural network.

In [3]:
n_classes = 10
n_features = 784 # mnist is a 28 * 28 image

# load the dataset and some preprocessing step that can be skipped
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, n_features)
X_test = X_test.reshape(10000, n_features)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# images takes values between 0 - 255, we can normalize it
# by dividing every number by 255
X_train /= 255
X_test /= 255

print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
60000 train samples
10000 test samples
In [4]:
# convert class vectors to binary class matrices (one-hot encoding)
# note: you HAVE to to this step
Y_train = np_utils.to_categorical(y_train, n_classes)
Y_test = np_utils.to_categorical(y_test , n_classes)

Basics of training a model:

The easiest way to build models in keras is to use Sequential model and the .add() method to stack layers together in sequence to build up our network.

  • We start with Dense (fully-connected layers), where we specify how many nodes you wish to have for the layer. Since the first layer that we're going to add is the input layer, we have to make sure that the input_dim parameter matches the number of features (columns) in the training set. Then after the first layer, we don't need to specify the size of the input anymore.
  • Then we specify the Activation function for that layer, and add a Dropout layer if we wish.
  • For the last Dense and Activation layer we need to specify the number of class as the output and softmax to tell it to output the predicted class's probability.
In [5]:
# define the model
model = Sequential()
model.add(Dense(512, input_dim = n_features))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(n_classes))
model.add(Activation('softmax'))
In [6]:
# we can check the summary to check the number of parameters
model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
activation_2 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
=================================================================
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________

Once our model looks good, we can configure its learning process with .compile(), where you need to specify which optimizer to use, and the loss function ( categorical_crossentropy is the typical one for multi-class classification) and the metrics to track.

Finally, .fit() the model by passing in the training, validation set, the number of epochs and batch size. For the batch size, we typically specify this number to be power of 2 for computing efficiency.

In [7]:
model.compile(loss = 'categorical_crossentropy', optimizer = RMSprop(), metrics = ['accuracy'])

n_epochs = 10
batch_size = 128 
history = model.fit(
    X_train, 
    Y_train,
    batch_size = batch_size, 
    epochs = n_epochs,
    verbose = 1,  # set it to 0 if we don't want to have progess bars
    validation_data = (X_test, Y_test)
)
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
60000/60000 [==============================] - 5s 76us/step - loss: 0.2464 - acc: 0.9243 - val_loss: 0.1205 - val_acc: 0.9610
Epoch 2/10
60000/60000 [==============================] - 4s 73us/step - loss: 0.1029 - acc: 0.9691 - val_loss: 0.0954 - val_acc: 0.9684
Epoch 3/10
60000/60000 [==============================] - 4s 72us/step - loss: 0.0741 - acc: 0.9774 - val_loss: 0.0746 - val_acc: 0.9776
Epoch 4/10
60000/60000 [==============================] - 4s 71us/step - loss: 0.0610 - acc: 0.9812 - val_loss: 0.0724 - val_acc: 0.9798
Epoch 5/10
60000/60000 [==============================] - 4s 73us/step - loss: 0.0508 - acc: 0.9846 - val_loss: 0.0709 - val_acc: 0.9815
Epoch 6/10
60000/60000 [==============================] - 5s 79us/step - loss: 0.0429 - acc: 0.9876 - val_loss: 0.0687 - val_acc: 0.9810
Epoch 7/10
60000/60000 [==============================] - 4s 73us/step - loss: 0.0380 - acc: 0.9888 - val_loss: 0.0728 - val_acc: 0.9827
Epoch 8/10
60000/60000 [==============================] - 4s 72us/step - loss: 0.0347 - acc: 0.9900 - val_loss: 0.0888 - val_acc: 0.9815
Epoch 9/10
60000/60000 [==============================] - 4s 73us/step - loss: 0.0308 - acc: 0.9907 - val_loss: 0.0765 - val_acc: 0.9831
Epoch 10/10
60000/60000 [==============================] - 5s 75us/step - loss: 0.0294 - acc: 0.9915 - val_loss: 0.0831 - val_acc: 0.9834
In [8]:
# history attribute stores the training and validation score and loss
history.history
Out[8]:
{'val_loss': [0.12054000333249569,
  0.09538325125724077,
  0.0746443172362633,
  0.07235066388248233,
  0.07087009320242796,
  0.06869937015355099,
  0.07279114324423717,
  0.08881157057585878,
  0.07649361667371704,
  0.08308388180260735],
 'val_acc': [0.961,
  0.9684,
  0.9776,
  0.9798,
  0.9815,
  0.981,
  0.9827,
  0.9815,
  0.9831,
  0.9834],
 'loss': [0.24640614926020304,
  0.1029205946157376,
  0.07406270666122436,
  0.06099520227760077,
  0.0507756207327048,
  0.042891469335804386,
  0.03797459708166619,
  0.03471902108440796,
  0.030829096661073467,
  0.029378291251541427],
 'acc': [0.9242833333015442,
  0.9690666666666666,
  0.9773999999682108,
  0.9811666666666666,
  0.9846333333015442,
  0.9875666666348776,
  0.9887666666984558,
  0.9899833333651225,
  0.9906999999682109,
  0.9914833333015441]}
In [10]:
# .evaluate gives the loss and metric evaluation score for the dataset,
# here the result matches the validation set's history above
print('metrics: ', model.metrics_names)
score = model.evaluate(X_test, Y_test, verbose = 0)
score
metrics:  ['loss', 'acc']
Out[10]:
[0.08308388501826912, 0.9834]
In [11]:
# stores the weight of the model,
# it's a list, note that the length is 6 because we have 3 dense layer
# and each one has it's associated bias term
weights = model.get_weights()
print(len(weights))

# W1 should have 784, 512 for the 784
# feauture column and the 512 the number 
# of dense nodes that we've specified
W1, b1, W2, b2, W3, b3 = weights
print(W1.shape)
print(b1.shape)
6
(784, 512)
(512,)
In [12]:
# predict the accuracy
y_pred = model.predict_classes(X_test, verbose = 0)
accuracy = np.sum(y_test == y_pred) / X_test.shape[0]
print('valid accuracy: %.2f' % (accuracy * 100))
valid accuracy: 98.34

Saving and loading the models

It is not recommended to use pickle or cPickle to save a Keras model. By saving it as a HDF5 file, we can preserve the configuration and weights of the model.

In [13]:
model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
del model  # deletes the existing model

# returns a compiled model
# identical to the previous one
model = load_model('my_model.h5')
In [14]:
# testing: predict the accuracy using the loaded model
y_pred = model.predict_classes(X_test, verbose = 0)
accuracy = np.sum(y_test == y_pred) / X_test.shape[0]
print('valid accuracy: %.2f' % (accuracy * 100))
valid accuracy: 98.34

Reference