Generalization with regards to learning problems is talked about a lot due to the fact that, when we want to train a model, we dont want the model to just perform well with the training data. We want it to perform well in as many cases as possible.
Overfitting means learning the regression on the training data so much that it perfectly (or near perfectly) fits the data given to train, takes the shape and form of that so much that it cannot predict a new testing data without increase in variance.
Here we explain overfitting with a linear regression with polynomial basis function. Note that as we increase the degree of polynomial we can witness the overfitting taking in effect prominently.
We can avoid overfitting by regularization techniques such as: Ridge, Lasso, Elastic-Net, etc. Here we use Elastic-Net regularization to fight the overfitting. Its effect on various degree polynomials is seen below.
import matplotlib
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as matplot
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
def f(x):
return x * np.cos(x)
# generate points used to plot
x_plot = np.linspace(0, 15, 100)
# generate points and keep a subset of them
x = np.linspace(0, 15, 100)
rng = np.random.RandomState(0)
rng.shuffle(x)
x = np.sort(x[:90])
y = f(x)
# create matrix versions of these arrays
X = x[:, np.newaxis]
X_plot = x_plot[:, np.newaxis]
matplot.subplots(3,3, figsize=(15,15))
for i, degree in enumerate([0,2,3,4,5,6,7,8,9]):
if i==0:
l1 = matplot.subplot(3, 3, 1)
l1.scatter(x, y, color='orange', s=30, marker='o', label="training points")
l1.set_xticks(())
l1.set_yticks(())
l1.legend()
else:
model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
model.fit(X, y)
y_plot = model.predict(X_plot)
l1 = matplot.subplot(3, 3, i + 1)
l1.scatter(x, y, color='orange', s=30, marker='o')
l1.plot(x_plot, y_plot, linewidth=2, label="degree %d poly" % degree)
l1.set_xticks(())
l1.set_yticks(())
l1.legend()
matplot.suptitle('\n \n \n Polynomial degree fit for Linear Model', fontsize=16)
matplot.show()
####### ----------------------------------------------------------------------
matplot.subplots(3,3, figsize=(15,15))
for i, degree in enumerate([0,2,3,4,5,6,7,8,9]):
if i==0:
l1 = matplot.subplot(3, 3, 1)
l1.scatter(x, y, color='orange', s=30, marker='o', label="training points")
l1.set_xticks(())
l1.set_yticks(())
l1.legend()
else:
model = make_pipeline(PolynomialFeatures(degree), ElasticNet())
model.fit(X, y)
y_plot = model.predict(X_plot)
l1 = matplot.subplot(3, 3, i + 1)
l1.scatter(x, y, color='orange', s=30, marker='o')
l1.plot(x_plot, y_plot, linewidth=2, label="degree %d poly" % degree)
l1.set_xticks(())
l1.set_yticks(())
l1.legend()
matplot.suptitle('\n \n \n Polynomial degree fit for Linear Model with Elastic-Net (L1 and L2) Regularization', fontsize=16)
matplot.show()
matplot.subplots(1,3, figsize=(15,5))
l12 = matplot.subplot(1, 3, 1)
l12.scatter(x, y, color='orange', s=30, marker='o', label="training points")
l12.set_xticks(())
l12.set_yticks(())
l12.set_xlabel('Data Points')
l12.legend()
#-----------------------
model = make_pipeline(PolynomialFeatures(8), LinearRegression())
model.fit(X, y)
y_plot = model.predict(X_plot)
l12 = matplot.subplot(1, 3, 2)
l12.scatter(x, y, color='orange', s=30, marker='o')
l12.plot(x_plot, y_plot, linewidth=2, label="degree 8 poly")
l12.set_xticks(())
l12.set_yticks(())
l12.set_xlabel('Linear Model')
l12.legend()
#-----------------------
model = make_pipeline(PolynomialFeatures(8), ElasticNet())
model.fit(X, y)
y_plot = model.predict(X_plot)
l12 = matplot.subplot(1, 3, 3)
l12.scatter(x, y, color='orange', s=30, marker='o')
l12.plot(x_plot, y_plot, linewidth=2, label="degree 8 poly")
l12.set_xticks(())
l12.set_yticks(())
l12.set_xlabel('Linear Model with Regularization')
l12.legend()
#-----------------------
matplot.suptitle('Elastic-Net Regularization to avoid Over-fitting', fontsize=16)
matplot.show()
Here, we can clearly see the effect of regularization on avoiding overfitting.
References:
Scikit Learn Documentation http://scikit-learn.org/stable/modules/linear_model.html