DataFrame

    xiaoxiao2023-10-31  141

    1、dataframe的创建

    df_1 = pd.DataFrame({'A': [0, 1, 2], 'B': [3, 4, 5]}) df_2 = pd.DataFrame([[0, 1, 2], [3, 4, 5]], columns=['A' , 'B' , 'C'], index=[1, 2]),colunmns是列,index是行 import numpy as np price=np.array([ [3,4,5,6,7], [5,6,5,4,3], [4,4,5,4,3], [5,5,6,7,4], [5,6,7,5,4], ]) print (price)#打印二维数组 print (price[0,2])#打印第一行第三个数 print (price[1,:])#打印第二行所有数 print (price[1,1:3])#打印第二行2到第3个数 print (price[:,1])#打印第二列所有数 print (price[:,price[0,:].argmax()].mean())#第一行最大的列的均值 for i in range(5): print (price[i,:].mean())#计算所有行的均值 print (price.mean(axis=0))#按列来计算均值 print (price.mean(axis=1))#按行来计算均值 for i in range(5): print (price[i,:].std())#计算所有行的标准差 [[3 4 5 6 7] [5 6 5 4 3] [4 4 5 4 3] [5 5 6 7 4] [5 6 7 5 4]] 5 [5 6 5 4 3] [6 5] [4 6 4 5 6] 4.2 5.0 4.6 4.0 5.4 5.4 [4.4 5. 5.6 5.2 4.2] [5. 4.6 4. 5.4 5.4] 1.4142135623730951 1.0198039027185568 0.6324555320336759 1.019803902718557 1.019803902718557 import numpy as np price=np.array([ ['date','s1','s2','s3','s4','s5'], ['a',3,4,5,6,7], ['b',5,6,5,4,3], ['c',4,4,5,4,3], ['d',5,5,6,7,4], ['e',5,6,7,5,4], ]) #打印类型,因为数组的类型必须一致,所以都被转换为字符串 price.dtype #最大长度为4的字符串类型,但转换成字符串后就不能用mean()之类的计算函数了 #所以需要引用dataframe dtype('<U4') import pandas as pd price=pd.DataFrame({ 's':['s1','s2','s3','s4','s5'], 'a':[3,4,5,6,7], 'b':[5,6,5,4,3], 'c':[4,4,5,4,3], 'd':[5,5,6,7,4], 'e':[5,6,7,5,4],} ) print (price)#引入了dataframe就能做数学计算了 print (price.std())#打印标准差,pandas标准差和numpy不一样,存在贝塞尔矫正 print (price.std(ddof=0))#关闭贝塞尔矫正后打印标准差,值变得和numpy一样 s a b c d e 0 s1 3 5 4 5 5 1 s2 4 6 4 5 6 2 s3 5 5 5 6 7 3 s4 6 4 4 7 5 4 s5 7 3 3 4 4 a 1.581139 b 1.140175 c 0.707107 d 1.140175 e 1.140175 dtype: float64 a 1.414214 b 1.019804 c 0.632456 d 1.019804 e 1.019804 dtype: float64

     2、读取dataframe数据单元

    import pandas as pd price=pd.DataFrame({ 'a':[3,4,5,6,7], 'b':[5,6,5,4,3], 'c':[4,4,5,4,3], 'd':[5,5,6,7,4], 'e':[5,6,7,5,4]}, index=['s1','s2','s3','s4','s5']) print (price) print (price.loc['s1']) type(price.loc['s1']) price['d'] print (price.values)#返回numpy ndarray print (price.values.mean())#返回所有值的均值 a b c d e s1 3 5 4 5 5 s2 4 6 4 5 6 s3 5 5 5 6 7 s4 6 4 4 7 5 s5 7 3 3 4 4 a 3 b 5 c 4 d 5 e 5 Name: s1, dtype: int64 [[3 5 4 5 5] [4 6 4 5 6] [5 5 5 6 7] [6 4 4 7 5] [7 3 3 4 4]] 4.88
    最新回复(0)