%matplotlib inline
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
.datasets
import load_breast_cancer
cancer
= load_breast_cancer
()
X
= cancer
.data
y
= cancer
.target
print('data shape: {0}; no. positive: {1}; no. negative: {2}'.format(
X
.shape
, y
[y
==1].shape
[0], y
[y
==0].shape
[0]))
data shape: (569, 30); no. positive: 357; no. negative: 212
from sklearn
.model_selection
import train_test_split
X_train
, X_test
, y_train
, y_test
= train_test_split
(X
, y
, test_size
=0.2)
高斯核函数
from sklearn
.svm
import SVC
clf
= SVC
(C
=1.0, kernel
='rbf', gamma
=0.1)
clf
.fit
(X_train
, y_train
)
train_score
= clf
.score
(X_train
, y_train
)
test_score
= clf
.score
(X_test
, y_test
)
print('train score: {0}; test score: {1}'.format(train_score
, test_score
))
train score: 1.0; test score: 0.6140350877192983
from common
.utils
import plot_param_curve
from sklearn
.model_selection
import GridSearchCV
gammas
= np
.linspace
(0, 0.0003, 30)
param_grid
= {'gamma': gammas
}
clf
= GridSearchCV
(SVC
(), param_grid
, cv
=5)
clf
.fit
(X
, y
)
print("best param: {0}\nbest score: {1}".format(clf
.best_params_
,
clf
.best_score_
))
plt
.figure
(figsize
=(10, 4), dpi
=144)
plot_param_curve
(plt
, gammas
, clf
.cv_results_
, xlabel
='gamma');
best param: {'gamma': 0.00011379310344827585}
best score: 0.9367311072056239
import time
from common
.utils
import plot_learning_curve
from sklearn
.model_selection
import ShuffleSplit
cv
= ShuffleSplit
(n_splits
=10, test_size
=0.2, random_state
=0)
title
= 'Learning Curves for Gaussian Kernel'
start
= time
.clock
()
plt
.figure
(figsize
=(10, 4), dpi
=144)
plot_learning_curve
(plt
, SVC
(C
=1.0, kernel
='rbf', gamma
=0.01),
title
, X
, y
, ylim
=(0.5, 1.01), cv
=cv
)
print('elaspe: {0:.6f}'.format(time
.clock
()-start
))
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
elaspe: 5.527530
多项式核函数
from sklearn
.svm
import SVC
clf
= SVC
(C
=1.0, kernel
='poly', degree
=2)
clf
.fit
(X_train
, y_train
)
train_score
= clf
.score
(X_train
, y_train
)
test_score
= clf
.score
(X_test
, y_test
)
print('train score: {0}; test score: {1}'.format(train_score
, test_score
))
train score: 0.978021978021978; test score: 0.9824561403508771
import time
from common
.utils
import plot_learning_curve
from sklearn
.model_selection
import ShuffleSplit
cv
= ShuffleSplit
(n_splits
=5, test_size
=0.2, random_state
=0)
title
= 'Learning Curves with degree={0}'
degrees
= [1, 2]
start
= time
.clock
()
plt
.figure
(figsize
=(12, 4), dpi
=144)
for i
in range(len(degrees
)):
plt
.subplot
(1, len(degrees
), i
+ 1)
plot_learning_curve
(plt
, SVC
(C
=1.0, kernel
='poly', degree
=degrees
[i
]),
title
.format(degrees
[i
]), X
, y
, ylim
=(0.8, 1.01), cv
=cv
, n_jobs
=4)
print('elaspe: {0:.6f}'.format(time
.clock
()-start
))
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
if np.issubdtype(train_sizes_abs.dtype, np.float):
elaspe: 359.281419