随机森林算法python实现

    xiaoxiao2023-11-24  154

    随机森林算法python实现

    瞎BB代码导入数据切分训练集测试集找到最有用的几个属性根据上面的代码更改属性参数组合遍历找最优随机森林 样本数据

    瞎BB

    1.实现根据样本数据(用眼距离distance、最长持续用眼时长duration、总用眼时长total_time、户外运动时长outdoor、用眼角度angle、健康环境光照用眼比例proportion)判别是否需要近视预警 2.样本实在太少,结果还行,原理都是一样的

    代码

    导入数据

    import pandas patients = pandas.read_csv("data.csv") patients.head(5)

    切分训练集测试集

    from sklearn.model_selection import train_test_split patients_data=patients.loc[:,'distance':'proportion'] patients_target=patients.loc[:,'warning'] data_train,data_test,target_train,target_test=train_test_split(patients_data,patients_target,test_size=0.1,random_state=42)

    找到最有用的几个属性

    import numpy as np from sklearn.feature_selection import SelectKBest, f_classif import matplotlib.pyplot as plt predictors = ["distance", "duration", "total_time", "outdoor", "angle", "proportion"] selector = SelectKBest(f_classif, k=5) selector.fit(data_train, target_train) scores = -np.log10(selector.pvalues_) plt.bar(range(len(predictors)), scores) plt.xticks(range(len(predictors)), predictors, rotation='vertical') plt.show()

    根据上面的代码更改属性

    predictors_best = ["distance", "total_time", "angle", "proportion"] data_train = data_train[predictors_best] data_test = data_test[predictors_best]

    参数组合遍历找最优

    from sklearn.model_selection import GridSearchCV tree_param_grid = { 'min_samples_split': list((2,3,4)),'n_estimators':list((3,5,10,15,20,25,30,35,40,45,50))} grid = GridSearchCV(RandomForestClassifier(),param_grid=tree_param_grid, cv=kf)#(算法,调节参数(用字典形式),交叉验证次数) grid.fit(data_train, target_train)#训练集 grid.cv_results_ , grid.best_params_, grid.best_score_#得分,最优参数,最优得分

    随机森林

    from sklearn import model_selection from sklearn.ensemble import RandomForestClassifier rf = RandomForestClassifier(random_state=1, n_estimators=35, min_samples_split=2, min_samples_leaf=2) #交叉验证 kf = model_selection.KFold(n_splits=3) scores = model_selection.cross_val_score(rf, data_train, target_train, cv=kf) print(scores.mean())

    样本数据

    sampledistancedurationtotal_timeoutdoorangleproportionwarning(1 yes 0 no)120723441481181123468263135750132598357321264143765291157889053415116216918631630178259146325017203513437236808391111698745209224426513614761103915121914025501121179184641860112254124171167211318171286131358911432332361022950115201332261241781116171482366632751173411121457588018248516315514321193216527614633521202512435917133700213151167472547022316335258224412316581644513730242937326104336812534471975956602636123185165267002725126171452333128318498373051129309215311414480302917827814627451
    最新回复(0)