Matplotlib基本图表绘制(5)(数十种常用,自己反复用时要留心选择)

    xiaoxiao2021-04-15  355

    图表类型:线型图,柱状图,密度图,横纵坐标两个坐标为主

    同时可以延展出很多其他的图表样式

    plt.plot(kind = ‘line’, ax = None, figsize = none, use_index = True, title = None, grid = None, legend = False, style = None, logy = False, loglog = False, xticks = None, yticks = None, ylim = None, rot = None, fontsize = None, colormap = None, table = False, yerr = None, label = None, secondary_y = False, **kwds)

    图主要还是帮助我们检测数据的

    import numpy as np import pandas as pd import matplotlib.pyplot as plt % matplotlib inline import warnings warnings.filterwarnings('ignore') #过滤警告 ts = pd.Series(np.random.randn(100),index = pd.date_range('1/1/2000', periods = 100)) #print(ts) ts = ts.cumsum() #累计和 #print(ts) ts.plot(kind = 'line', label = 'hehe', style = '--g.', color = 'r', alpha = 0.4, use_index = True, rot = 45, grid = True, ylim = [-50, 50], yticks = list(range(-50, 50, 10)), figsize = (8, 4), title = 'test', legend = True) #plt.grid(True, linestyle = '--', color = 'gray', linewidth = '0.4', axis = 'x') #网格 plt.legend() #Series.plot():Series的index为横坐标,value为纵坐标 #kind->line, bar, bash... #lable->图例标签,dataframe以列名为label #use_index->将索引引用为刻度标签,默认为true #rot->旋转刻度标签 #grid->网格显示,一般用,plt.grid() #xlim,ylim->x,y轴的界限 #xticks,yticks->x,y轴刻度值 #title->图名 #legend->是否显示图例,一般啊,plt.legend() #grid,legend可以在外面显示 ts.head() 2000-01-01 0.147600 2000-01-02 -0.062122 2000-01-03 0.206698 2000-01-04 -0.796225 2000-01-05 0.323237 Freq: D, dtype: float64

    plt.xkcd() #柱状图与堆叠图 fig,axes = plt.subplots(4, 1, figsize = (10, 10)) s = pd.Series(np.random.randint(0, 10, 16), index = list('abcdefghijklmnop')) df = pd.DataFrame(np.random.rand(10, 3), columns = ['a', 'b', 'c']) #默认index为横坐标 s.plot(kind = 'bar', color = 'k', grid = True, alpha = 0.5, ax = axes[0]) #单系列柱状图方法1, plt.plot(kind = 'bar') df.plot(kind = 'bar', ax = axes[1], grid = True, colormap = 'Reds_r') #多系列柱状图 df.plot(kind = 'bar', ax = axes[2], grid = True, colormap = 'Blues_r', stacked = True) #多系列堆叠图 #stacked ->堆叠 df.plot.barh(ax = axes[3], grid = True, stacked = True, colormap = 'BuGn_r') #新版本,df.plot.<kind>,只需要掌握一种方法即可了 <matplotlib.axes._subplots.AxesSubplot at 0x16812078c88>

    #柱状图 plt.bar() plt.figure() x = np.arange(10) y1 = np.random.rand(10) y2 = -np.random.rand(10) plt.bar(x, y1, width = 1, facecolor = 'yellowgreen', edgecolor = 'white', yerr = y1*0.1) plt.bar(x, y2, width = 1, facecolor = 'lightskyblue', edgecolor = 'white', yerr = y2*0.1) #width:宽度比例 #facecolor:柱状图的填充颜色,edgecolor是边界颜色 #left-每个柱x轴左边界,bottom-每个柱y轴下边界 -> bottom扩展即可化为Gantt Char #align:决定整个bar图分布,默认left表示从左边界开始绘制,center #xerr/yerr:x/y方向的error bar for i,j in zip(x, y1): plt.text(i+0.1, j-0.15, '%.2f' % j, color = 'k') for i,j in zip(x, y2): plt.text(i+0.1, j-0.15, '%.2f' % j, color = 'k')

    #面积图 fig,axes = plt.subplots(2, 1, figsize = (8, 6)) df1 = pd.DataFrame(np.random.rand(10, 4), columns = list('abcd')) df2 = pd.DataFrame(np.random.randn(10, 4), columns = list('abcd')) df1.plot.area(colormap = 'Greens_r', alpha = 0.5, ax = axes[0]) df2.plot.area(stacked = False, colormap = 'Set2', alpha = 0.5, ax = axes[1]) #使用Series.plot.area()和DataFrame.plot.area()创建面积图 #stacked:是否堆叠,默认情况下是True #为了产生堆积面积图,每列必须全部是正值或负值 #当数据有NaN是, 自动填充为0, 所以所以图标签需要清洗掉缺失值 <matplotlib.axes._subplots.AxesSubplot at 0x168122fdf60>

    #填图 fig, axes = plt.subplots(2,1, figsize = (8,6)) x = np.linspace(0, 1, 500) y1 = np.sin(4 * np.pi * x) * np.exp(-5 * x) y2 = -np.sin(4* np.pi * x) * np.exp(-5 * x) axes[0].fill(x, y1, 'r', alpha = 0.5, label = 'y1') axes[0].fill(x, y2, 'g', alpha = 0.5, label = 'y2') # 对函数和坐标轴间区域进行填充,使用fill数和坐标轴间区域进行填充,使用fill函数 #也可以写成:plt.fill(x, y1, 'r', x, y2, 'g', alpha = 0.5) x = np.linspace(0, 5 * np.pi, 1000) y1 = np.sin(x) y2 = np.sin(2 * x) axes[1].fill_between(x, y1, y2, color = 'b', alpha = 0.5, label = 'area') #填充两个函数之间的区域,是用fill_between函数 for i in range(2): axes[i].legend() axes[i].grid()

    # 饼图 plt.pie() # plt.pie(x, explode = None, labels = None, color = None, autopct = None, pctdistance = 0.6, shadow = False, labeldistance = 1.1, # startangle = None, counterclock = True, wedgeprops = None, center = (0, 0), frame = False, hold = None, data = None ) s = pd.Series(3 * np.random.rand(4), index = list('abcd'), name = 'series') plt.axis('equal') plt.pie(s, explode = [0.1, 0, 0, 0], labels = s.index, colors = list('rgbc'), autopct = '%.2f%%', pctdistance = 0.6, labeldistance = 1.2, shadow = True, startangle = 0, radius = 1.5, frame = False) print(s) #explode:指定的每部分的偏移量 #label #colors #autopct:饼图上的数据标签显示方式12.13% #pctdistance:每个饼切片的中心和通过autopct生成的文本间的比例 #labeldistance:被画饼标记的直径,默认1。1 #startangle:开始的角度 #radius:半径 a 1.752496 b 2.219930 c 0.149030 d 2.038231 Name: series, dtype: float64

    #核心:查询数据分布 #直方图+密度图 s = pd.Series(np.random.randn(1000)) s.hist(bins = 20, histtype = 'bar', align = 'mid', orientation = 'vertical', alpha = 0.5, normed = True) # bin:箱子的宽度 # normed 标准化 # histtype 风格, bar, barstacked, step, stepfilled #orientation 水平还是垂直{‘horizontal’, ‘vertical’} #align:{‘left’, ‘mid’, ‘right’},optional(对齐方式) s.plot(kind = 'kde', style = 'k--') #密度图--已经标准化,所以两者放一起的话,徐亚直方图标准化 <matplotlib.axes._subplots.AxesSubplot at 0x16812662fd0>

    #堆叠直方图 plt.figure(num = 1) df = pd.DataFrame({'a':np.random.randn(1000) + 1, 'b':np.random.randn(1000), 'c':np.random.randn(1000) - 1, 'd':np.random.randn(1000) - 2}, columns = list('abcd')) df.plot.hist(stacked = True, bins = 20, colormap = 'Greens_r', alpha = 0.5, grid = True) # 使用DataFrame.plot.hist()和Series.plot.hist()方法绘制 # stacked:是否堆叠 df.hist(bins = 50) #生成多个直方图 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000168140959B0>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000168140C84E0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x00000168140EDB70>, <matplotlib.axes._subplots.AxesSubplot object at 0x000001681411C240>]], dtype=object) <Figure size 432x288 with 0 Axes>

    #plt.scatter()散点图 #plt.scatter(x, y, s = 20, c = None, marker = 'o', norm = None, vmin = None, vmax = None,alpha = None, # linewidths = None, verts = None, edgecolors = None, hold = None, data = None, **kwargs) plt.figure(figsize = (8, 6)) x = np.random.randn(1000) y = np.random.randn(1000) plt.scatter(x, y, marker = '.', s = np.random.randn(1000)*100, cmap = 'Reds', c = y, alpha = 0.8) plt.grid() # s:散点的大小 # c:散点的颜色 # vmin,vmax亮度设置,标量 #cmap:colormap

    # pd.scatter_matrix()散点矩阵 # pd.scatter_matrix(frame, alpha = 0.5, figsize = None, ax = None, grid = False, diagonal = 'hist' # marker = '.', density_kwds = None, hist_kwds = None, range_padding = 0.05, **kwds) df = pd.DataFrame(np.random.randn(100, 4), columns = list('abcd')) pd.scatter_matrix(df, figsize = (8,6), marker = 'o', diagonal = 'hist', alpha = 0.4, range_padding = 0.1) #diagonal:{'hist', 'kde'}, 必须且只能其中选一个, ->每个指标的频率图 #rand_padding:(float,可选),图像在x轴,y轴原点附近的留白(padding),该值越大,留白距离越大,图像远离坐标原点 array([[<matplotlib.axes._subplots.AxesSubplot object at 0x00000168143E12E8>, <matplotlib.axes._subplots.AxesSubplot object at 0x000001681430B898>, <matplotlib.axes._subplots.AxesSubplot object at 0x000001681421E208>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000016813FD8D68>], [<matplotlib.axes._subplots.AxesSubplot object at 0x0000016814017A58>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000168140179E8>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000016814069B00>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000168122C4828>], [<matplotlib.axes._subplots.AxesSubplot object at 0x0000016813676908>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000016814328588>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000016813FBFC18>, <matplotlib.axes._subplots.AxesSubplot object at 0x000001681442D2E8>], [<matplotlib.axes._subplots.AxesSubplot object at 0x000001681444B978>, <matplotlib.axes._subplots.AxesSubplot object at 0x0000016814472048>, <matplotlib.axes._subplots.AxesSubplot object at 0x000001681449B6D8>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000168144C3D68>]], dtype=object)

    # 箱型图 # plt.plot.box()绘制 fig, axes = plt.subplots(2,1,figsize = (10, 6)) df = pd.DataFrame(np.random.randn(10, 5), columns = list('abcde')) color = dict(boxes = 'DarkGreen', whiskers = 'DarkOrange', medians = 'DarkBlue', caps = 'Gray') # 箱型图着色 # boxes -> 箱线 # whisker -> 分位数与error bar间竖线颜色 # medians -> 中位数线颜色 # caps -> error bar 横线颜色 df.plot.box(ylim = [0, 1.2], grid = True, ax = axes[0], color = color ) # color: 样式填充 df.plot.box(vert = False, positions = [1, 4, 5, 6, 8], ax = axes[1], grid = True, color = color) # vert:是否垂直,默认为True # position: 箱型图占位 <matplotlib.axes._subplots.AxesSubplot at 0x16816dc2400>

    #箱型图 #plt.boxplot() #plt.box.plot(x, notch = None, sym = None, vert = None, positions = None, widths = Node, patch_artist = None, bootstrap = None, # usermedians = None, conf_intervals = None, meanline = None, showmeans = None, showcaps = None, showbox = None # showfliers = None, boxprops = None, labels = None, flierprops = None, medianprops = None, capprops = None, # whiskerprops = None, manage_xticks = True, autorange = False, zorder = None, hold = None, data = None) df = pd.DataFrame(np.random.randn(10, 5), columns = list('abcde')) plt.figure(figsize = (10,6)) #创建图表、数据 f = df.boxplot(sym = 'o', #异常点形状,参考marker vert = True, #是否垂直 whis = 1.5, #IQR,默认为1.5,也可以设置区间比如[5, 95],代表强制上下边缘为数据95%和5%的位置 patch_artist = True, #上下四分卫框内是否填充,True为填充 meanline = False, #是否有均值线及其形状 showbox = True, #是否显示箱线 showcaps = True, #是否显示边缘线 showfliers = True, #是否显示异常值 notch = False, #中间箱体是否缺口 return_type = 'dict') #返回类型为字典 plt.title('boxplot') print(f) for box in f['boxes']: box.set(color = 'b', linewidth = 1) #箱体边框颜色 box.set(facecolor = 'b', alpha = 0.5) #箱体内部填充颜色 for whisker in f['whiskers']: whisker.set(color = 'k', linewidth = 0.5, linestyle = '-') for cap in f['caps']: cap.set(color = 'gray', linewidth = 2) for median in f['medians']: median.set(color = 'DarkBlue', linewidth = 2) for flier in f['fliers']: flier.set(marker = 'o', color = 'y', alpha = 0.5) # boxes 箱线 # medians,中位数的横线 # whiskers,从boxes到error bar间的竖线 {'whiskers': [<matplotlib.lines.Line2D object at 0x00000168170887F0>, <matplotlib.lines.Line2D object at 0x0000016817088940>, <matplotlib.lines.Line2D object at 0x00000168170B8470>, <matplotlib.lines.Line2D object at 0x00000168170B85C0>, <matplotlib.lines.Line2D object at 0x00000168170D7080>, <matplotlib.lines.Line2D object at 0x00000168170D71D0>, <matplotlib.lines.Line2D object at 0x00000168170E7C50>, <matplotlib.lines.Line2D object at 0x00000168170E7DA0>, <matplotlib.lines.Line2D object at 0x0000016817146860>, <matplotlib.lines.Line2D object at 0x00000168171469B0>], 'caps': [<matplotlib.lines.Line2D object at 0x0000016817088DD8>, <matplotlib.lines.Line2D object at 0x00000168170A0240>, <matplotlib.lines.Line2D object at 0x00000168170B89E8>, <matplotlib.lines.Line2D object at 0x00000168170B8E10>, <matplotlib.lines.Line2D object at 0x00000168170D75F8>, <matplotlib.lines.Line2D object at 0x00000168170D7A20>, <matplotlib.lines.Line2D object at 0x00000168170F9208>, <matplotlib.lines.Line2D object at 0x00000168170F9630>, <matplotlib.lines.Line2D object at 0x0000016817146DD8>, <matplotlib.lines.Line2D object at 0x000001681715A240>], 'boxes': [<matplotlib.patches.PathPatch object at 0x0000016817088588>, <matplotlib.patches.PathPatch object at 0x00000168170B8208>, <matplotlib.patches.PathPatch object at 0x00000168170C4DD8>, <matplotlib.patches.PathPatch object at 0x00000168170E79B0>, <matplotlib.patches.PathPatch object at 0x00000168171465F8>], 'medians': [<matplotlib.lines.Line2D object at 0x00000168170A0668>, <matplotlib.lines.Line2D object at 0x00000168170C4278>, <matplotlib.lines.Line2D object at 0x00000168170D7E48>, <matplotlib.lines.Line2D object at 0x00000168170F9A58>, <matplotlib.lines.Line2D object at 0x000001681715A668>], 'fliers': [<matplotlib.lines.Line2D object at 0x00000168170A0A90>, <matplotlib.lines.Line2D object at 0x00000168170C46A0>, <matplotlib.lines.Line2D object at 0x00000168170E72B0>, <matplotlib.lines.Line2D object at 0x00000168170F9E80>, <matplotlib.lines.Line2D object at 0x000001681715AA90>], 'means': []}

    #箱型图 #plt.boxplot() #分组汇总 df = pd.DataFrame(np.random.rand(10, 2), columns = ['Col1','Col2']) df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B']) print(df) df.boxplot(column = ['Col1','Col2'], by = ['X','Y']) # 按照数据的列分子图 # 按照累分组做箱型图 Col1 Col2 X Y 0 0.447041 0.123629 A A 1 0.728267 0.320884 A B 2 0.167149 0.065361 A A 3 0.975806 0.982760 A B 4 0.730560 0.557661 A A 5 0.754968 0.493001 B B 6 0.544775 0.516427 B A 7 0.208985 0.387391 B B 8 0.367907 0.602283 B A 9 0.172040 0.022766 B B array([<matplotlib.axes._subplots.AxesSubplot object at 0x0000016814C2AAC8>, <matplotlib.axes._subplots.AxesSubplot object at 0x00000168153F2828>], dtype=object)


    最新回复(0)