import os
import gzip
import math
import operator
import sklearn.model_selection
import random
import matplotlib.pyplot as matplot
import matplotlib
%matplotlib inline
import pandas as pd
import numpy as np
import pickle as cPickle
from time import time
from itertools import chain
from collections import Counter
from PIL import Image
from scipy.stats import mode
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/')
train = mnist.train.images
validation = mnist.validation.images
test = mnist.test.images
trlab = mnist.train.labels
vallab = mnist.validation.labels
tslab = mnist.test.labels
train = np.concatenate((train, validation), axis=0)
trlab = np.concatenate((trlab, vallab), axis=0)
train.shape
trlab.shape
test.shape
x = np.reshape(train[0], [28,28])
matplot.imshow(x, cmap='Greys_r')
0 = Black ; 1 = White
matplot.subplots(figsize=(12, 8))
matplot.hist(train[0])
matplot.show()
classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(classes)
samples = 8
matplot.subplots(figsize=(15, 10))
for y, cls in enumerate(classes):
idxs = np.nonzero([i == y for i in trlab])
idxs = np.random.choice(idxs[0], samples, replace=False)
for i , idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
matplot.subplot(samples, num_classes, plt_idx)
matplot.imshow(train[idx].reshape((28, 28)))
matplot.axis("off")
if i == 0:
matplot.title(cls)
matplot.show()
We can see that the machine can confuse 1 with 7 similarly, 9 is very close to 4, 0 to 8 and 2 is very close to 7 in some instances. We might encounter error in prediction in these cases
def knn(train, train_label, test, test_label, k):
pred = []
for w in range(len(test)):
test_1 = test[w]
diff = (train - test_1)
dist = np.einsum('ij, ij->i', diff, diff) #distance measure
nearest_lbs = train_label[np.argsort(dist)[:k]]
major = mode(nearest_lbs)[0][0]
pred.append(major)
cm = pd.DataFrame(confusion_matrix(test_label, pred))
err = 1-accuracy_score(test_label, pred)
return [err,cm,pred]
st = time()
Q1 = knn(train,trlab,test,tslab,1)
t = time() - st
print(Q1[0]) #Error Rate
print(t) #time taken (in seconds) for classifying 10000 test images
print(t/10000) #Query time (in seconds) for 1 image classification
print(1-Q1[0]) #Accuracy
print(Q1[1]) #Confusion Matrix: Original Label VS Predicted Value
0 - 0.007173
1 - 0.005286
2 - 0.038760
3 - 0.039604
4 - 0.038697
5 - 0.035874
6 - 0.014614
7 - 0.035019
8 - 0.055441
9 - 0.041625
- After implementing the 1NN we get a very good error rate of 3.09 %
- Error is particularly high in 2,3,4,5,7,8 and 9 highest among them is 8
- there is misprediction between 7 with 1; 8 and 5 with 3; 9 with 4; and 7 with 2
def knncv(data, label, klist):
df = pd.DataFrame(index=range(len(label)), columns=range(len(klist)))
for p in range(len(label)):
te = data[p]
te_lb = label[p]
tr = np.delete(data, p, 0)
train_label = np.delete(label, p)
diff = (tr - te)
dis = np.einsum('ij, ij->i', diff, diff)
for i, k in enumerate(klist):
near = train_label[np.argsort(dis)[:k]]
pick = mode(near)[0][0]
if pick == te_lb:
df.iloc[p][i] = 0
else:
df.iloc[p][i] = 1
return df
Q2 = knncv(train, trlab, range(1,21))
# print(np.mean(Q2, axis = 0)) # Error rate for K values (1 to 20)
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(Q2, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value')
matplot.show()
st = time()
Q2a = knn(train,trlab,test,tslab,3)
t = time()-st
print(t/10000) #Query time (in seconds) for 1 image classification for 3NN
print(Q2a[0]*100) #Error Rate
print(Q2a[1]) #Confusion Matrix
0 - 0.006122
1 - 0.001762
2 - 0.034884
3 - 0.033663
4 - 0.032587
5 - 0.036996
6 - 0.014614
7 - 0.035992
8 - 0.061602
9 - 0.040634
- We improved a bit
- After implementing the 3NN we get a better error rate of 2.95 %
- Error is decresed little in 2,3,4,5,7,8 and 9 highest among them is still 8, where error increased
- there is still some misprediction between 7 with 1; 8 and 5 with 3; 9 with 4; and 7 with 2
Importing data
import os
import gzip
import math
import operator
import sklearn.model_selection
import random
import matplotlib.pyplot as matplot
import matplotlib
%matplotlib inline
import pandas as pd
import numpy as np
import pickle as cPickle
from time import time
from itertools import chain
from collections import Counter
from PIL import Image
from scipy.stats import mode
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/')
train = mnist.train.images
validation = mnist.validation.images
test = mnist.test.images
trlab = mnist.train.labels
vallab = mnist.validation.labels
tslab = mnist.test.labels
train = np.concatenate((train, validation), axis=0)
trlab = np.concatenate((trlab, vallab), axis=0)
KNN function
def knn(train, train_label, test, test_label, k):
pred = []
for w in range(len(test)):
test_1 = test[w]
diff = (train - test_1)
dist = np.einsum('ij, ij->i', diff, diff) #distance measure
nearest_lbs = train_label[np.argsort(dist)[:k]]
major = mode(nearest_lbs)[0][0]
pred.append(major)
cm = pd.DataFrame(confusion_matrix(test_label, pred))
err = 1-accuracy_score(test_label, pred)
return [err,cm,pred]
Defining the error rate for the kNN to display in a tabulated format
def error_rate(confusion_matrix):
a = confusion_matrix
b = a.sum(axis=1)
df = []
for i in range(0,10):
temp = 1-a[i][i]/b[i]
df.append(temp)
df = pd.DataFrame(df)
df.columns = ['Error rate']
return df
KNN with Leave-one-out Cross validation
def knncv(data, label, klist):
df = pd.DataFrame(index=range(len(label)), columns=range(len(klist)))
for p in range(len(label)):
te = data[p]
te_lb = label[p]
tr = np.delete(data, p, 0)
train_label = np.delete(label, p)
diff = (tr - te)
dis = np.einsum('ij, ij->i', diff, diff)
for i, k in enumerate(klist):
near = train_label[np.argsort(dis)[:k]]
pick = mode(near)[0][0]
if pick == te_lb:
df.iloc[p][i] = 0
else:
df.iloc[p][i] = 1
return df
#generating a random sequence for sampling
seq = np.random.randint(0,60000,6000)
train_samp = train[seq]
trlab_samp = trlab[seq]
train_samp.shape
trlab_samp.shape
fig, ax = matplot.subplots(1,2, figsize=(15,7))
ax[0].hist(trlab_samp)
ax[1].hist(trlab)
fig.show
matplot.show()
def downsamples(n, data):
rn = range(0,784,n)
data = data[:, rn]
return data
q3tr = downsamples(4, train_samp)
q3tr_full = downsamples(4, train)
q3ts = downsamples(4, test)
q3n4 = knncv(q3tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q3n4, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for downsampling n=4)')
matplot.show()
st = time()
q3a = knn(q3tr_full, trlab, q3ts, tslab, 1)
end = time() - st
t3a = end/len(tslab) # Query time
print(q3a[0]) #Error rate
q3a[1] #Confusion matrix
error_rate(q3a[1]) #Individual digit errors
q3btr = downsamples(7, train_samp)
q3btr_full = downsamples(7, train)
q3bts = downsamples(7, test)
q3n7 = knncv(q3btr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q3n7, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for downsampling n=7)')
matplot.show()
st = time()
q3b = knn(q3btr_full, trlab, q3bts, tslab, 5)
end = time() - st
t3b=end/len(tslab) # Query time
print(q3b[0]) #Error rate
q3b[1] #Confusion Matrix
error_rate(q3b[1]) #Individual digit errors
q3ctr = downsamples(14, train_samp)
q3ctr_full = downsamples(14, train)
q3cts = downsamples(14, test)
q3n14 = knncv(q3ctr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q3n14, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for downsampling n=14)')
matplot.show()
st = time()
q3c = knn(q3ctr_full, trlab, q3cts, tslab, 9)
end = time() - st
t3c = end/len(tslab) # Query time
print(q3c[0]) #Error rate
q3c[1] #Confusion Matrix
error_rate(q3c[1]) #Individual digit errors
q3dtr = downsamples(2, train_samp)
q3dtr_full = downsamples(2, train)
q3dts = downsamples(2, test)
q3n2 = knncv(q3dtr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q3n2, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for downsampling n=2)')
matplot.show()
st = time()
q3d = knn(q3dtr_full, trlab, q3dts, tslab, 1)
end = time() - st
t3d = end/len(tslab) # Query time
print(q3d[0]) #Error rate
q3d[1] #Confusion Matrix
error_rate(q3d[1])
n = [2,4,7,14]
err = [q3d[0],q3a[0],q3b[0],q3c[0]]
t = [t3d,t3a,t3b,t3c]
matplot.subplots(figsize=(20, 10))
matplot.plot(n, err, label="Error") # Blue
matplot.plot(n, t, label="Query time in s") # Orange
matplot.xticks(n,n)
matplot.grid(True)
matplot.legend()
matplot.title('N vs Error (Blue) & N vs Query time in s (Orange)')
matplot.show()
n = [2,4,7,14]
k_val = [1,1,5,9]
matplot.subplots(figsize=(10, 5))
matplot.plot(n, k_val, label="K")
matplot.xticks(n,n)
matplot.legend()
matplot.grid(True)
matplot.title('N vs K-value')
matplot.show()
def downsample2(n, data):
res = []
for i in range(len(data)):
temp = []
sample = np.reshape(data[i], (28,28))
row = np.asarray(np.split(sample, int(28 / n), axis=0))
for j in range(int(28 / n)):
col = np.asarray(np.split(row[j], int(28 / n), axis=1))
add = np.asarray([np.sum(x) for x in col])
temp.append(np.ndarray.tolist(add))
res.append(list(chain.from_iterable(temp)))
ndim = int(28 / n) * int(28 / n)
data = np.asarray([np.reshape(y, (ndim)) for y in res])
return data
q4atr = downsample2(4, train_samp)
q4atr_full = downsample2(4, train)
q4ats = downsample2(4, test)
Smartsampled image
x1 = np.reshape(q4atr_full[0], [7,7])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q4n4 = knncv(q4atr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q4n4, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for smartsampling n=4)')
matplot.show()
st = time()
q4a = knn(q4atr_full, trlab, q4ats, tslab, 1)
end = time() - st
t4a = end/len(tslab) # Query time
print(q4a[0]) # Error rate
q4a[1] # Confusion matrix
error_rate(q4a[1])
q4btr = downsample2(7, train_samp)
q4btr_full = downsample2(7, train)
q4bts = downsample2(7, test)
x1 = np.reshape(q4btr_full[0], [4,4])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q4n7 = knncv(q4btr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q4n7, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for smartsampling n=7)')
matplot.show()
st = time()
q4b = knn(q4btr_full, trlab, q4bts, tslab, 7)
end = time() - st
t4b = end/len(tslab) # Query time
print(q4b[0]) # Error rate
q4b[1] # Confusion matrix
error_rate(q4b[1])
q4ctr = downsample2(14, train_samp)
q4ctr_full = downsample2(14, train)
q4cts = downsample2(14, test)
x1 = np.reshape(q4ctr_full[0], [2,2])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q4n14 = knncv(q4ctr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q4n14, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for smartsampling n=14)')
matplot.show()
st = time()
q4c = knn(q4ctr_full, trlab, q4cts, tslab, 20)
end = time() - st
t4c = end/len(tslab) # Query time
print(q4c[0]) # Error rate
q4c[1] # Confusion matrix
error_rate(q4c[1])
q4dtr = downsample2(2, train_samp)
q4dtr_full = downsample2(2, train)
q4dts = downsample2(2, test)
x1 = np.reshape(q4dtr_full[0], [14,14])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q4n2 = knncv(q4dtr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q4n2, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for smartsampling n=2)')
matplot.show()
st = time()
q4d = knn(q4dtr_full, trlab, q4dts, tslab, 1)
end = time() - st
t4d = end/len(tslab) # Query time
print(q4d[0]) # Error rate
q4d[1] # Confusion matrix
error_rate(q4d[1])
n = [2,4,7,14]
err = [q4d[0],q4a[0],q4b[0],q4c[0]]
t = [t4d,t4a,t4b,t4c]
matplot.subplots(figsize=(20, 10))
matplot.plot(n, err, label="Error") # Blue
matplot.plot(n, t, label="Query time in s") # Orange
matplot.xticks(n,n)
matplot.grid(True)
matplot.legend()
matplot.title('N vs Error (Blue) & N vs Query time in s (Orange)')
matplot.show()
n = [2,4,7,14]
k_val = [1,1,7,20]
matplot.subplots(figsize=(10, 5))
matplot.plot(n, k_val, label="K")
matplot.xticks(n,n)
matplot.yticks(k_val, k_val)
matplot.legend()
matplot.grid(True)
matplot.title('N vs K-value')
matplot.show()
q5tr = downsample2(28, train_samp)
q5tr_full = downsample2(28, train)
q5ts = downsample2(28, test)
x1 = np.reshape(q5tr_full[0], [1,1])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q5k = knncv(q5tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q5k, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (for smartsampling n=28)')
matplot.show()
st = time()
q5 = knn(q5tr_full, trlab, q5ts, tslab, 1)
end = time() - st
print(end/len(tslab)) # Query time
print(q5[0]) # Error rate
q5[1] # Confusion matrix
error_rate(q5[1])
Importing data
import os
import gzip
import math
import operator
import sklearn.model_selection
import random
import matplotlib.pyplot as matplot
import matplotlib
%matplotlib inline
import pandas as pd
import numpy as np
import pickle as cPickle
from time import time
from itertools import chain
from collections import Counter
from PIL import Image
from scipy.stats import mode
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data/')
train = mnist.train.images
validation = mnist.validation.images
test = mnist.test.images
trlab = mnist.train.labels
vallab = mnist.validation.labels
tslab = mnist.test.labels
train = np.concatenate((train, validation), axis=0)
trlab = np.concatenate((trlab, vallab), axis=0)
KNN function
def knn(train, train_label, test, test_label, k):
pred = []
for w in range(len(test)):
test_1 = test[w]
diff = (train - test_1)
dist = np.einsum('ij, ij->i', diff, diff) #distance measure
nearest_lbs = train_label[np.argsort(dist)[:k]]
major = mode(nearest_lbs)[0][0]
pred.append(major)
cm = pd.DataFrame(confusion_matrix(test_label, pred))
err = 1-accuracy_score(test_label, pred)
return [err,cm,pred]
Defining the error rate for the kNN to display in a tabulated format
def error_rate(confusion_matrix):
a = confusion_matrix
b = a.sum(axis=1)
df = []
for i in range(0,10):
temp = 1-a[i][i]/b[i]
df.append(temp)
df = pd.DataFrame(df)
df.columns = ['Error rate']
return df
KNN with Leave-one-out Cross validation
def knncv(data, label, klist):
df = pd.DataFrame(index=range(len(label)), columns=range(len(klist)))
for p in range(len(label)):
te = data[p]
te_lb = label[p]
tr = np.delete(data, p, 0)
train_label = np.delete(label, p)
diff = (tr - te)
dis = np.einsum('ij, ij->i', diff, diff)
for i, k in enumerate(klist):
near = train_label[np.argsort(dis)[:k]]
pick = mode(near)[0][0]
if pick == te_lb:
df.iloc[p][i] = 0
else:
df.iloc[p][i] = 1
return df
Generating random sample of the data
#generating a random sequence for sampling
seq = np.random.randint(0,60000,6000)
train_samp = train[seq]
trlab_samp = trlab[seq]
train_samp.shape
trlab_samp.shape
seq = np.random.randint(0,10000,1000)
ts_samp = test[seq]
tslab_samp = tslab[seq]
Comparing sampled data distribution with the original
fig, ax = matplot.subplots(1,2, figsize=(10,5))
ax[0].hist(trlab_samp)
ax[1].hist(trlab)
fig.show
matplot.show()
Comparing test samples as well
fig, ax = matplot.subplots(1,2, figsize=(8,3))
ax[0].hist(tslab_samp)
ax[1].hist(tslab)
fig.show
matplot.show()
We see that all the images are in greyscale, we can put a threshold and convert the individual pixel values into 0 and 1, which will effectively render them black and white. Lets try with one image for example, and see the distribution
temp = train[:,]>0.75
matplot.hist(temp[0])
matplot.show()
Now seeing as the transformation works, we shall apply the transformation to the whole training and testing set. thing to note is the black and white image is converted float (though only 0 and 1) as the original data was float.
def bnw(threshold, data):
newdata = data[:,]>threshold
return newdata
q6tr_full = bnw(0.75, train)
q6ts = bnw(0.75, ts_samp)
q6tr = bnw(0.75, train_samp)
x1 = np.reshape(q6tr_full[0], [28,28])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q6k = knncv(q6tr, trlab_samp, range(1,21))
print(np.mean(q6k, axis = 0))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q6k, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (b n w threshold = 0.75)')
matplot.show()
st = time()
q6a = knn(q6tr, trlab_samp, q6ts, tslab_samp, 1)
end = time() - st
t6a = end/len(tslab_samp) # Query time
print(q6a[0]) #Error rate
q6a[1] #Confusion matrix
error_rate(q6a[1])
q6tr_full = bnw(0.40, train)
q6ts = bnw(0.40, ts_samp)
q6tr = bnw(0.40, train_samp)
x1 = np.reshape(q6tr_full[0], [28,28])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q6b1 = knncv(q6tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q6b1, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (b n w threshold = 0.40)')
matplot.show()
st = time()
q6b = knn(q6tr, trlab_samp, q6ts, tslab_samp, 1)
end = time() - st
t6b = end/len(tslab_samp) # Query time
print(q6b[0]) #Error rate
q6b[1] #Confusion matrix
error_rate(q6b[1])
q6tr_full = bnw(0.50, train)
q6ts = bnw(0.50, ts_samp)
q6tr = bnw(0.50, train_samp)
x1 = np.reshape(q6tr_full[0], [28,28])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q6c1 = knncv(q6tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q6c1, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (b n w threshold = 0.50)')
matplot.show()
st = time()
q6c = knn(q6tr, trlab_samp, q6ts, tslab_samp, 1)
end = time() - st
t6c = end/len(tslab_samp) # Query time
print(q6c[0]) #Error rate
q6c[1] #Confusion matrix
error_rate(q6c[1])
q6tr_full = bnw(0.60, train)
q6ts = bnw(0.60, ts_samp)
q6tr = bnw(0.60, train_samp)
x1 = np.reshape(q6tr_full[0], [28,28])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q6d1 = knncv(q6tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q6d1, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (b n w threshold = 0.60)')
matplot.show()
st = time()
q6d = knn(q6tr, trlab_samp, q6ts, tslab_samp, 1)
end = time() - st
t6d = end/len(tslab_samp) # Query time
print(q6d[0]) #Error rate
q6d[1] #Confusion matrix
error_rate(q6d[1])
q6tr_full = bnw(0.90, train)
q6ts = bnw(0.90, ts_samp)
q6tr = bnw(0.90, train_samp)
x1 = np.reshape(q6tr_full[0], [28,28])
matplot.imshow(x1, cmap='Greys_r')
matplot.show()
q6e1 = knncv(q6tr, trlab_samp, range(1,21))
matplot.subplots(figsize=(20, 10))
matplot.plot(np.mean(q6e1, axis = 0))
matplot.xticks(range(0,20), range(1,21))
matplot.grid(True)
matplot.title('Error rate vs K value (b n w threshold = 0.50)')
matplot.show()
st = time()
q6e = knn(q6tr, trlab_samp, q6ts, tslab_samp, 1)
end = time() - st
t6e = end/len(tslab_samp) # Query time
print(q6e[0]) #Error rate
q6e[1] #Confusion matrix
error_rate(q6e[1])
thres = [0.40,0.50,0.60,0.75,0.90]
err = [q6b[0],q6c[0],q6d[0],q6a[0],q6e[0]]
t = [t6b,t6c,t6d,t6a,t6e]
matplot.subplots(figsize=(20, 10))
matplot.plot(thres, err, label="Error") # Blue
matplot.plot(thres, t, label="Query time in s") # Orange
matplot.xticks(thres,thres)
matplot.grid(True)
matplot.legend()
matplot.title('Threshold vs Error (Blue) & Threshold vs Query time in s (Orange)')
matplot.show()