from scipy.stats import mode
import numpy as np
#from mnist import MNIST
from time import time
import pandas as pd
import os
import matplotlib.pyplot as matplot
import matplotlib
%matplotlib inline
import random
matplot.rcdefaults()
from time import time
from IPython.display import display, HTML
from itertools import chain
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import seaborn as sb
from sklearn.model_selection import ParameterGrid
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/')
train = mnist.train.images
validation = mnist.validation.images
test = mnist.test.images
trlab = mnist.train.labels
vallab = mnist.validation.labels
tslab = mnist.test.labels
train = np.concatenate((train, validation), axis=0)
trlab = np.concatenate((trlab, vallab), axis=0)
mlp = MLPClassifier()
mlp.fit(train, trlab)
accuracy_score(tslab, mlp.predict(test)) # Test Accuracy
We choose Alpha and Max_iter as the parameter to run the model on and select the best from those.
According to Scikit Learn- MLP classfier documentation,
Alpha is L2 or ridge penalty (regularization term) parameter.
Max_iter is Maximum number of iterations, the solver iterates until convergence.
So, these ones makes sense as we could see some changing values of accuracy while tuning them,
i = 0
df = pd.DataFrame(columns = ['alpha','max_iter','train_acc','test_acc','train_time'])
for a in [0.00001,0.0001,0.001,0.01, 0.1, 1, 10]:
for mi in [10,100,200,500,1000,2000]:
st = time()
mlp = MLPClassifier(alpha=a, max_iter=mi)
mlp.fit(train, trlab)
end = time() - st
acc_tr = accuracy_score(trlab, mlp.predict(train)) # Train Accuracy
acc = accuracy_score(tslab, mlp.predict(test)) # Test Accuracy
df.loc[i] = [a,mi,acc_tr,acc,end]
i=i+1
df # Results
As we can see that Model number 26 gives the best testing accuracy so parameters we select are
Parameters:
Alpha = 0.1
Max Iterations = 200
acc = []
acc_tr = []
timelog = []
for l in [10,20,50,100,200,500,1000]:
t = time()
mlp = MLPClassifier(alpha=0.1, max_iter=200, hidden_layer_sizes=(l,))
mlp.fit(train, trlab)
endt = time() - t
a_tr = accuracy_score(trlab, mlp.predict(train)) # Train Accuracy
a = accuracy_score(tslab, mlp.predict(test)) # Test Accuracy
acc_tr.append(a_tr)
acc.append(a)
timelog.append(endt)
l = [10,20,50,100,200,500,1000]
N = len(l)
l2 = np.arange(N)
matplot.subplots(figsize=(10, 5))
matplot.plot(l2, acc, label="Testing Accuracy")
matplot.plot(l2, acc_tr, label="Training Accuracy")
matplot.xticks(l2,l)
matplot.grid(True)
matplot.xlabel("Hidden Layer Nodes")
matplot.ylabel("Accuracy")
matplot.legend()
matplot.title('Accuracy versus Nodes in the Hidden Layer for MLPClassifier', fontsize=12)
matplot.show()
l = [10,20,50,100,200,500,1000]
N = len(l)
l2 = np.arange(N)
matplot.subplots(figsize=(10, 5))
matplot.plot(l2, timelog, label="Training time in s")
matplot.xticks(l2,l)
matplot.grid(True)
matplot.xlabel("Hidden Layer Nodes")
matplot.ylabel("Time (s)")
matplot.legend()
matplot.title('Training Time versus Nodes in the Hidden Layer for MLPClassifier', fontsize=12)
matplot.show()
Final model parameters for highest test accuracy:
Alpha = 0.1
Max Iterations = 200
Hidden Layer Nodes = 500
Accuracy listed belongs to the best parameters in each model.
Model | Accuracy | Training Time |
---|---|---|
MLP | ~98.2 % | ~ 180 s |
kNN | ~97.17 % | 24 minutes |
NB | 81.49 % | 12 minutes |
Log Reg | ~87.77 % | 2 hrs |
SVM | ~96.4 % | 18 minutes |
*Note: the training time to be taken with a grain of salt as all the times reported from the previous models were run on different systems and are either the average or best case times for those models.
Regardless of the time inconsistencies in measuring, MLP is still miles ahead of all the other models in terms of magnitude of time and also in Accuracy.
MLP Claasifier performs the best and most optimally, giving us the best test accuracy i.e. 98.2% and it trains within 3 minutes or under 180 seconds.