Python进行Logistic回归
第一步,导入库和数据;第二步,处理数据;第三步,数据建模;最后,模型评价。
第一步,导入库和数据;
from sklearn
import datasets
import numpy
as np
import pandas
as pd
import matplotlib
.pyplot
as plt
plt
.rcParams
['font.sans-serif'] = ['SimHei']
plt
.rcParams
['axes.unicode_minus'] = False
data
= pd
.read_excel
('F:\\Desktop\\数据.xlsx')
data
[:5]
第二步,处理数据;
data
= pd
.concat
([data
, pd
.DataFrame
(columns
= ['起点', '终点'])], sort
= True)
n1
= []
for i
in list(data
['起终点'].values
):
i
= i
.split
('-')
n1
.append
(i
)
for i
in range(len(n1
)):
data
['起点'][i
] = n1
[i
][0]
data
['终点'][i
] = n1
[i
][1]
import numpy
as np
class_mapping
= {label
:k
for k
,label
in enumerate(np
.unique
(data
['终点']))}
data
['终点'] = data
['终点'].map(class_mapping
)
data
[:5]
第三步,数据建模;
data1
= data
[['乘车方式', '终点', '里程距离', '行驶时间', '是否合心意', '可支配收入', '是否报销', '购票价格', '是否舒适', '是否注重时间成本']]
X
= data1
.iloc
[:, 1:]
y
= data1
.iloc
[:, 0]
from sklearn
.cross_validation
import train_test_split
X_train
, X_test
, y_train
, y_test
= train_test_split
(X
, y
, test_size
= 0.3, random_state
= 0)
from sklearn
.preprocessing
import StandardScaler
stdsc
= StandardScaler
()
X_train_std
= stdsc
.fit_transform
(X_train
)
X_test_std
= stdsc
.transform
(X_test
)
from sklearn
.linear_model
import LogisticRegression
lr
= LogisticRegression
(C
= 10)
lr
.fit
(X_train_std
, y_train
)
print('Training accuracy:', lr
.score
(X_train_std
, y_train
))
print('Test accuracy:', lr
.score
(X_test_std
, y_test
))
lr
.coef_
lr
.intercept_
最后,模型评价。
from sklearn
.metrics
import confusion_matrix
y_pred
= lr
.predict
(X_test_std
)
confmat
= confusion_matrix
(y_true
=y_test
, y_pred
=y_pred
)
print(confmat
)
fig
, ax
= plt
.subplots
(figsize
=(2.5, 2.5))
ax
.matshow
(confmat
, cmap
=plt
.cm
.Blues
, alpha
=0.3)
for i
in range(confmat
.shape
[0]):
for j
in range(confmat
.shape
[1]):
ax
.text
(x
=j
, y
=i
, s
=confmat
[i
, j
], va
='center', ha
='center')
plt
.xlabel
('预测类标')
plt
.ylabel
('真实类标')
plt
.show
()
from sklearn
.metrics
import precision_score
, recall_score
, f1_score
print('Precision: %.4f' % precision_score
(y_true
=y_test
, y_pred
=y_pred
))
print('Recall: %.4f' % recall_score
(y_true
=y_test
, y_pred
=y_pred
))
print('F1: %.4f' % f1_score
(y_true
=y_test
, y_pred
=y_pred
))
from sklearn
.metrics
import roc_curve
, auc
from scipy
import interp
fig
= plt
.figure
(figsize
=(7, 5))
mean_tpr
= 0.0
mean_fpr
= np
.linspace
(0, 1, 100)
all_tpr
= []
probas
= lr
.fit
(X_train
, y_train
).predict_proba
(X_test
)
fpr
, tpr
, thresholds
= roc_curve
(y_test
, probas
[:, 1], pos_label
=1)
mean_tpr
+= interp
(mean_fpr
, fpr
, tpr
)
mean_tpr
[0] = 0.0
roc_auc
= auc
(fpr
, tpr
)
plt
.plot
(fpr
, tpr
, lw
=1, label
='ROC (area = %0.2f)'
% ( roc_auc
))
plt
.plot
([0, 1], [0, 1], linestyle
='--', color
=(0.6, 0.6, 0.6), label
='random guessing')
mean_tpr
/= len(X_train
)
mean_tpr
[-1] = 1.0
mean_auc
= auc
(mean_fpr
, mean_tpr
)
plt
.plot
([0, 0, 1],
[0, 1, 1],
lw
=2,
linestyle
=':',
color
='black',
label
='perfect performance')
plt
.xlim
([-0.05, 1.05])
plt
.ylim
([-0.05, 1.05])
plt
.xlabel
('假正率')
plt
.ylabel
('真正率')
plt
.title
('')
plt
.legend
(loc
="lower right")
plt
.show
()
转载请注明原文地址: https://yun.8miu.com/read-112919.html