%matplotlib inline
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
.datasets
import load_boston
boston
= load_boston
()
X
= boston
.data
y
= boston
.target
X
.shape
(506, 13)
X
[0]
array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
4.980e+00])
boston
.feature_names
array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')
from sklearn
.model_selection
import train_test_split
X_train
, X_test
, y_train
, y_test
= train_test_split
(X
, y
, test_size
=0.2, random_state
=3)
import time
from sklearn
.linear_model
import LinearRegression
model
= LinearRegression
()
start
= time
.clock
()
model
.fit
(X_train
, y_train
)
train_score
= model
.score
(X_train
, y_train
)
cv_score
= model
.score
(X_test
, y_test
)
print('elaspe: {0:.6f}; train_score: {1:0.6f}; cv_score: {2:.6f}'.format(time
.clock
()-start
, train_score
, cv_score
))
elaspe: 0.014008; train_score: 0.723941; cv_score: 0.794958
from sklearn
.linear_model
import LinearRegression
from sklearn
.preprocessing
import PolynomialFeatures
from sklearn
.pipeline
import Pipeline
def polynomial_model(degree
=1):
polynomial_features
= PolynomialFeatures
(degree
=degree
,
include_bias
=False)
linear_regression
= LinearRegression
(normalize
=True)
pipeline
= Pipeline
([("polynomial_features", polynomial_features
),
("linear_regression", linear_regression
)])
return pipeline
model
= polynomial_model
(degree
=2)
start
= time
.clock
()
model
.fit
(X_train
, y_train
)
train_score
= model
.score
(X_train
, y_train
)
cv_score
= model
.score
(X_test
, y_test
)
print('elaspe: {0:.6f}; train_score: {1:0.6f}; cv_score: {2:.6f}'.format(time
.clock
()-start
, train_score
, cv_score
))
elaspe: 0.064957; train_score: 0.930547; cv_score: 0.860465
from common
.utils
import plot_learning_curve
from sklearn
.model_selection
import ShuffleSplit
cv
= ShuffleSplit
(n_splits
=10, test_size
=0.2, random_state
=0)
plt
.figure
(figsize
=(18, 4))
title
= 'Learning Curves (degree={0})'
degrees
= [1, 2, 5]
start
= time
.clock
()
plt
.figure
(figsize
=(18, 4), dpi
=200)
for i
in range(len(degrees
)):
plt
.subplot
(1, 3, i
+ 1)
plot_learning_curve
(plt
, polynomial_model
(degrees
[i
]), title
.format(degrees
[i
]), X
, y
, ylim
=(0.01, 1.01), cv
=cv
)
print('elaspe: {0:.6f}'.format(time
.clock
()-start
))
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
elaspe: 70.732592
<matplotlib.figure.Figure at 0x2a424f41eb8>