李宏毅PM2.5

xiaoxiao2022-07-06 228

李宏毅PM2.5

只是使用了PM2.5的特征，没有考虑其他因素来预测，对数据的处理，因为测试数据是9天，来预测第10的PM2.5，所以我对数据进行切割，9天为特征，第10天为标记，并且循环后移，例如1-9为特征10为标记，2-10为标记11为标记，根据这样的切割使数据变成3600*9的训练数据。

import numpy as np import pandas as pd dir_path = r"G:\python3code\DeepLearn\机器学习\回归\week1" train = pd.read_csv(dir_path + r'\train.csv',engine='python', encoding='utf-8') test = pd.read_csv(dir_path + r'\test.csv', engine='python', encoding='utf-8') train = train[train['observation'] == 'PM2.5'] test = test[test['AMB_TEMP'] == 'PM2.5'] train = train.drop(['Date', 'stations', 'observation'], axis=1) x_test = test.iloc[:, 2:] train_x = [] train_y = [] for i in range(15): x = train.iloc[:, i:i + 9] x.columns = np.array(range(9)) y = train.iloc[:, i + 9] y.columns = np.array(range(1)) train_x.append(x) train_y.append(y) # 对矩阵进行拼接，默认是行，即垂直拼接 x_train= pd.concat(train_x) y_train = np.array(pd.concat(train_y),dtype=np.float) print(x_train.shape,y_train.shape) class LinearRegression(object): def __init__(self,epochs=200,lr=0.01,b=0,w=0): """ :param epochs: 迭代次数 :param lr: 学习率 :param b: 初始偏置 :param w: 初始权重 """ self.epochs = epochs self.lr = lr self.b = b self.w =w def SST(self,y_data): """总平方和真实 - 平均""" sum = np.array(y_data)- np.mean(y_data) return np.sum(sum) def MSE(self,y_data,y_predict): mse = np.sum((y_data - y_predict) ** 2) / len(y_data) return mse def SSE(self,y_data,y_predict): """残差平方和""" sse= np.sum((y_data-y_predict)**2) return sse def R(self,y_data,y_predict): sse = self.SSE(y_data=y_data,y_predict=y_predict) sst = self.SST(y_data=y_data) print(sse/sst) return 1-sse/sst def normalized(self,x_data,x_test): """ 归一化，x-min/(max-min) """ max = np.array(np.max(x_data,axis=0)) min = np.array(np.min(x_data,axis=0)) x_data= np.array(x_data,dtype=np.float32) - min / (max - min) x_test = np.array(x_test,np.float) - min / (max - min) return x_data,x_test # 用矩阵去算标准方程法 def standard_equation(self,x_data,y_data): """ 标准方程法，不使用梯度下降，直接求解权重但需要计算 x_data.T*x_data是否可逆，可逆才能使用 """ # 对输入添加偏置 3600*9变成 3600*10 偏置为1添加到前面 x_data = np.hstack((np.ones((len(x_data),1)),x_data)) x_Tx = x_data.T.dot(x_data) # 判断是否有逆矩阵 if np.linalg.det(x_Tx) ==0: print('矩阵不可逆，不能使用标准方程法') return # 计算权重 w = (x.T*x)**-1 x.T y weight = np.linalg.inv(x_Tx).dot(x_data.T).dot(y_data) return weight def predict(self,x_data,weight): x_data = np.hstack((np.ones((len(x_data),1)),x_data)) y_predict = x_data.dot(weight) return y_predict def gradient_descent(self,x_data,y_data): """ 使用矩阵的方式 :return: """ # 加上偏置 x_data = np.hstack((np.ones((len(x_data),1)),x_data)) shape = x_data.shape # 权重初始化 w = (np.random.random([shape[1],1]) - 0.5) * 2 y_data = y_data.reshape((shape[0],1)) for i in range(20): res = np.dot(x_data,w) w_c = self.lr * (x_data.T.dot(y_data - res)) / int( x_data.shape[0]) w = w - w_c return w line =LinearRegression() x_data,x_test = line.normalized(x_data=x_train,x_test=x_test) weight =line.standard_equation(x_data=x_data,y_data=y_train) predict =line.predict(x_data=x_data,weight=weight) R =line.R(y_data=y_train,y_predict=predict) print(R) weight = line.gradient_descent(x_data=x_data,y_data=y_train) print(weight)

最新回复(0)