乳腺癌支持向量机

    xiaoxiao2022-07-03  131

    %matplotlib inline import matplotlib.pyplot as plt import numpy as np # 载入数据 from sklearn.datasets import load_breast_cancer cancer = load_breast_cancer() X = cancer.data y = cancer.target print('data shape: {0}; no. positive: {1}; no. negative: {2}'.format( X.shape, y[y==1].shape[0], y[y==0].shape[0])) data shape: (569, 30); no. positive: 357; no. negative: 212 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    高斯核函数

    from sklearn.svm import SVC clf = SVC(C=1.0, kernel='rbf', gamma=0.1) clf.fit(X_train, y_train) train_score = clf.score(X_train, y_train) test_score = clf.score(X_test, y_test) print('train score: {0}; test score: {1}'.format(train_score, test_score)) train score: 1.0; test score: 0.6140350877192983 from common.utils import plot_param_curve from sklearn.model_selection import GridSearchCV gammas = np.linspace(0, 0.0003, 30) param_grid = {'gamma': gammas} clf = GridSearchCV(SVC(), param_grid, cv=5) clf.fit(X, y) print("best param: {0}\nbest score: {1}".format(clf.best_params_, clf.best_score_)) plt.figure(figsize=(10, 4), dpi=144) plot_param_curve(plt, gammas, clf.cv_results_, xlabel='gamma'); best param: {'gamma': 0.00011379310344827585} best score: 0.9367311072056239

    import time from common.utils import plot_learning_curve from sklearn.model_selection import ShuffleSplit cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) title = 'Learning Curves for Gaussian Kernel' start = time.clock() plt.figure(figsize=(10, 4), dpi=144) plot_learning_curve(plt, SVC(C=1.0, kernel='rbf', gamma=0.01), title, X, y, ylim=(0.5, 1.01), cv=cv) print('elaspe: {0:.6f}'.format(time.clock()-start)) D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. if np.issubdtype(train_sizes_abs.dtype, np.float): elaspe: 5.527530

    多项式核函数

    from sklearn.svm import SVC clf = SVC(C=1.0, kernel='poly', degree=2) clf.fit(X_train, y_train) train_score = clf.score(X_train, y_train) test_score = clf.score(X_test, y_test) print('train score: {0}; test score: {1}'.format(train_score, test_score)) train score: 0.978021978021978; test score: 0.9824561403508771 import time from common.utils import plot_learning_curve from sklearn.model_selection import ShuffleSplit cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) title = 'Learning Curves with degree={0}' degrees = [1, 2] start = time.clock() plt.figure(figsize=(12, 4), dpi=144) for i in range(len(degrees)): plt.subplot(1, len(degrees), i + 1) plot_learning_curve(plt, SVC(C=1.0, kernel='poly', degree=degrees[i]), title.format(degrees[i]), X, y, ylim=(0.8, 1.01), cv=cv, n_jobs=4) print('elaspe: {0:.6f}'.format(time.clock()-start)) D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. if np.issubdtype(train_sizes_abs.dtype, np.float): D:\anaconda\lib\site-packages\sklearn\model_selection\_validation.py:811: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. if np.issubdtype(train_sizes_abs.dtype, np.float): elaspe: 359.281419

    最新回复(0)