datetime基础

    xiaoxiao2023-10-02  143

    ''' 【课程2.8】 时间模块:datetime datetime模块,主要掌握:datetime.date(), datetime.datetime(), datetime.timedelta() 日期解析方法:parser.parse ''' # datatime.date :date 对象 import datetime today=datetime.date.today() print(today) # datetime.date.today() 返回今天日期 格式为date 2019-05-25 # 直接转换日期 年,月,日 # datetime.date print(datetime.date(2016,6,2)) 2016-06-02 # datetime.datetime :datetime对象 # 返回现在的时间 datetime.datetime.now() now=datetime.datetime.now() print(now) # 可以通过str转成字符串 2019-05-25 08:38:20.732982 # 转换日期,最少输入年月日 print(datetime.datetime(2016,5,6)) print(datetime.datetime(2015,5,6,5,22,22)) 2016-05-06 00:00:00 2015-05-06 05:22:22 # 时间可以算时间差 t1 = datetime.datetime(2016,5,6) t2 = datetime.datetime(2015,5,6,5,22,22) print(t1-t2) 365 days, 18:37:38 # 时间差 datetime.timedelta:时间查 today=datetime.datetime.now() yes=today-datetime.timedelta(1) print(yes) print(today-datetime.timedelta(10)) 2019-05-24 08:38:21.237705 2019-05-15 08:38:21.237705 # 日期字符串转换 # parser.parse from dateutil.parser import parse date='21-12-2017' print(parse(date)) print(parse('12-11-2017')) print(parse('12/11/2017')) print(parse('2017-5-6')) print(parse('Jan 31, 1997 10:45 PM')) # 参数dayfirst=True 国际通运格式日在月之前 print('===================') print(parse('5/1/2019')) print(parse('5/1/2019',dayfirst=True)) 2017-12-21 00:00:00 2017-12-11 00:00:00 2017-12-11 00:00:00 2017-05-06 00:00:00 1997-01-31 22:45:00 =================== 2019-05-01 00:00:00 2019-01-05 00:00:00 ''' 【课程2.9】 Pandas时刻数据:Timestamp 时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据 pandas.Timestamp() ''' '\n【课程2.9】 Pandas时刻数据:Timestamp\n\n时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据\n\npandas.Timestamp()\n\n' import pandas as pd import numpy as np # pd.Timestamp() print(pd.Timestamp('2017-12-21')) print(pd.Timestamp('2018-5-8 1:1:1')) print(pd.Timestamp(datetime.datetime(2015,5,6))) 2017-12-21 00:00:00 2018-05-08 01:01:01 2015-05-06 00:00:00 # pd.to_datetime() from datetime import datetime print(pd.to_datetime(datetime(2016,12,11,1,1,1))) print(pd.to_datetime('2017-2-3')) # 可以是时间串 print(pd.to_datetime(['2017-2-3','2018-3-4','2019-4-5'])) 2016-12-11 01:01:01 2017-02-03 00:00:00 DatetimeIndex(['2017-02-03', '2018-03-04', '2019-04-05'], dtype='datetime64[ns]', freq=None) # pd.to_datetime转成时间戳索引 print(pd.to_datetime(['2018-2-3','2019-2-6','2017-5-9'])) a = datetime(2016,2,2) print(type(a)) print(pd.to_datetime([datetime(2016,5,6),datetime(2017,5,6)])) DatetimeIndex(['2018-02-03', '2019-02-06', '2017-05-09'], dtype='datetime64[ns]', freq=None) <class 'datetime.datetime'> DatetimeIndex(['2016-05-06', '2017-05-06'], dtype='datetime64[ns]', freq=None) # 如果夹杂其他格式 可以采用errors参数 # ignore 产生一般数组,coerce 返回NaT date=['2018-2-3','2019-2-6','hello','2017-5-9'] print(pd.to_datetime(date,errors='ignore')) print(pd.to_datetime(date,errors='coerce')) ['2018-2-3' '2019-2-6' 'hello' '2017-5-9'] DatetimeIndex(['2018-02-03', '2019-02-06', 'NaT', '2017-05-09'], dtype='datetime64[ns]', freq=None) ''' 【课程2.10】 Pandas时间戳索引:DatetimeIndex 核心:pd.date_range() ''' '\n【课程2.10】 Pandas时间戳索引:DatetimeIndex\n\n核心:pd.date_range()\n\n' # pd.DatatimeIndex() 与 TimeSeries时间序列 # pd.DatetimeIndex() print(pd.DatetimeIndex(['2019/5/4','5/4/2018','3/5/2016'])) DatetimeIndex(['2019-05-04', '2018-05-04', '2016-03-05'], dtype='datetime64[ns]', freq=None) rng = pd.DatetimeIndex(['2019/5/4','5/4/2018','3/5/2016']) print(pd.Series(np.random.rand(len(rng)),index=rng)) # 以DatetimeIndex为index的Series 2019-05-04 0.747358 2018-05-04 0.484367 2016-03-05 0.666840 dtype: float64 # pd.date_range()-日期范围:生成日期范围 # 2种生成方式:①start + end; ②start/end + periods # 默认频率:day print(pd.date_range('1/1/2017','30/1/2017')) DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08', '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12', '2017-01-13', '2017-01-14', '2017-01-15', '2017-01-16', '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-21', '2017-01-22', '2017-01-23', '2017-01-24', '2017-01-25', '2017-01-26', '2017-01-27', '2017-01-28', '2017-01-29', '2017-01-30'], dtype='datetime64[ns]', freq='D') # periods周期 print(pd.date_range('1/1/2017',periods=10)) print(pd.date_range(end='2017/1/1',periods=10)) # pd.date_range(start,end,period,freq,tz,normalize,name,closed,) ''' start:开始时间 end:结束时间 periods:偏移量 freq:频率 默认天 pd.date_range()默认日历日 pd.bdate_range()默认频率为工作日 tz:时区 ''' DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08', '2017-01-09', '2017-01-10'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2016-12-23', '2016-12-24', '2016-12-25', '2016-12-26', '2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30', '2016-12-31', '2017-01-01'], dtype='datetime64[ns]', freq='D') '\nstart:开始时间\nend:结束时间\nperiods:偏移量\nfreq:频率 默认天 pd.date_range()默认日历日 pd.bdate_range()默认频率为工作日\ntz:时区\n' print(pd.date_range('2017/2/2 15:00:00',periods=10)) print(pd.date_range('2017/2/2 15:00:00',periods=10,normalize=True)) DatetimeIndex(['2017-02-02 15:00:00', '2017-02-03 15:00:00', '2017-02-04 15:00:00', '2017-02-05 15:00:00', '2017-02-06 15:00:00', '2017-02-07 15:00:00', '2017-02-08 15:00:00', '2017-02-09 15:00:00', '2017-02-10 15:00:00', '2017-02-11 15:00:00'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2017-02-02', '2017-02-03', '2017-02-04', '2017-02-05', '2017-02-06', '2017-02-07', '2017-02-08', '2017-02-09', '2017-02-10', '2017-02-11'], dtype='datetime64[ns]', freq='D') # closed 默认左闭右闭,left左闭右开,right左开右闭 print(pd.date_range('2017/1/1','2017/1/4')) print(pd.date_range('2017/1/1','2017/1/4',closed='left')) print(pd.date_range('2017/1/1','2017/1/4',closed='right')) DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') # 可以直接转换为list print(list(pd.date_range('2017/1/1','2017/1/4'))) [Timestamp('2017-01-01 00:00:00', offset='D'), Timestamp('2017-01-02 00:00:00', offset='D'), Timestamp('2017-01-03 00:00:00', offset='D'), Timestamp('2017-01-04 00:00:00', offset='D')] # pd.date_range()-日期范围:频率(1) print(pd.date_range('2017/1/1','2017/1/4')) # 默认为天 print(pd.date_range('2017/1/1','2017/1/30',freq='B')) # B,每工作日 DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12', '2017-01-13', '2017-01-16', '2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20', '2017-01-23', '2017-01-24', '2017-01-25', '2017-01-26', '2017-01-27', '2017-01-30'], dtype='datetime64[ns]', freq='B') print(pd.date_range('2017/1/1','2017/1/4',freq='H')) # 每小时 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00', '2017-01-01 02:00:00', '2017-01-01 03:00:00', '2017-01-01 04:00:00', '2017-01-01 05:00:00', '2017-01-01 06:00:00', '2017-01-01 07:00:00', '2017-01-01 08:00:00', '2017-01-01 09:00:00', '2017-01-01 10:00:00', '2017-01-01 11:00:00', '2017-01-01 12:00:00', '2017-01-01 13:00:00', '2017-01-01 14:00:00', '2017-01-01 15:00:00', '2017-01-01 16:00:00', '2017-01-01 17:00:00', '2017-01-01 18:00:00', '2017-01-01 19:00:00', '2017-01-01 20:00:00', '2017-01-01 21:00:00', '2017-01-01 22:00:00', '2017-01-01 23:00:00', '2017-01-02 00:00:00', '2017-01-02 01:00:00', '2017-01-02 02:00:00', '2017-01-02 03:00:00', '2017-01-02 04:00:00', '2017-01-02 05:00:00', '2017-01-02 06:00:00', '2017-01-02 07:00:00', '2017-01-02 08:00:00', '2017-01-02 09:00:00', '2017-01-02 10:00:00', '2017-01-02 11:00:00', '2017-01-02 12:00:00', '2017-01-02 13:00:00', '2017-01-02 14:00:00', '2017-01-02 15:00:00', '2017-01-02 16:00:00', '2017-01-02 17:00:00', '2017-01-02 18:00:00', '2017-01-02 19:00:00', '2017-01-02 20:00:00', '2017-01-02 21:00:00', '2017-01-02 22:00:00', '2017-01-02 23:00:00', '2017-01-03 00:00:00', '2017-01-03 01:00:00', '2017-01-03 02:00:00', '2017-01-03 03:00:00', '2017-01-03 04:00:00', '2017-01-03 05:00:00', '2017-01-03 06:00:00', '2017-01-03 07:00:00', '2017-01-03 08:00:00', '2017-01-03 09:00:00', '2017-01-03 10:00:00', '2017-01-03 11:00:00', '2017-01-03 12:00:00', '2017-01-03 13:00:00', '2017-01-03 14:00:00', '2017-01-03 15:00:00', '2017-01-03 16:00:00', '2017-01-03 17:00:00', '2017-01-03 18:00:00', '2017-01-03 19:00:00', '2017-01-03 20:00:00', '2017-01-03 21:00:00', '2017-01-03 22:00:00', '2017-01-03 23:00:00', '2017-01-04 00:00:00'], dtype='datetime64[ns]', freq='H') print(pd.date_range('2017/1/1','2017/1/4',freq='T')) # 每分钟 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00', '2017-01-01 00:02:00', '2017-01-01 00:03:00', '2017-01-01 00:04:00', '2017-01-01 00:05:00', '2017-01-01 00:06:00', '2017-01-01 00:07:00', '2017-01-01 00:08:00', '2017-01-01 00:09:00', ... '2017-01-03 23:51:00', '2017-01-03 23:52:00', '2017-01-03 23:53:00', '2017-01-03 23:54:00', '2017-01-03 23:55:00', '2017-01-03 23:56:00', '2017-01-03 23:57:00', '2017-01-03 23:58:00', '2017-01-03 23:59:00', '2017-01-04 00:00:00'], dtype='datetime64[ns]', length=4321, freq='T') print(pd.date_range('2017/1/1','2017/1/4',freq='H')) # 每小时 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00', '2017-01-01 02:00:00', '2017-01-01 03:00:00', '2017-01-01 04:00:00', '2017-01-01 05:00:00', '2017-01-01 06:00:00', '2017-01-01 07:00:00', '2017-01-01 08:00:00', '2017-01-01 09:00:00', '2017-01-01 10:00:00', '2017-01-01 11:00:00', '2017-01-01 12:00:00', '2017-01-01 13:00:00', '2017-01-01 14:00:00', '2017-01-01 15:00:00', '2017-01-01 16:00:00', '2017-01-01 17:00:00', '2017-01-01 18:00:00', '2017-01-01 19:00:00', '2017-01-01 20:00:00', '2017-01-01 21:00:00', '2017-01-01 22:00:00', '2017-01-01 23:00:00', '2017-01-02 00:00:00', '2017-01-02 01:00:00', '2017-01-02 02:00:00', '2017-01-02 03:00:00', '2017-01-02 04:00:00', '2017-01-02 05:00:00', '2017-01-02 06:00:00', '2017-01-02 07:00:00', '2017-01-02 08:00:00', '2017-01-02 09:00:00', '2017-01-02 10:00:00', '2017-01-02 11:00:00', '2017-01-02 12:00:00', '2017-01-02 13:00:00', '2017-01-02 14:00:00', '2017-01-02 15:00:00', '2017-01-02 16:00:00', '2017-01-02 17:00:00', '2017-01-02 18:00:00', '2017-01-02 19:00:00', '2017-01-02 20:00:00', '2017-01-02 21:00:00', '2017-01-02 22:00:00', '2017-01-02 23:00:00', '2017-01-03 00:00:00', '2017-01-03 01:00:00', '2017-01-03 02:00:00', '2017-01-03 03:00:00', '2017-01-03 04:00:00', '2017-01-03 05:00:00', '2017-01-03 06:00:00', '2017-01-03 07:00:00', '2017-01-03 08:00:00', '2017-01-03 09:00:00', '2017-01-03 10:00:00', '2017-01-03 11:00:00', '2017-01-03 12:00:00', '2017-01-03 13:00:00', '2017-01-03 14:00:00', '2017-01-03 15:00:00', '2017-01-03 16:00:00', '2017-01-03 17:00:00', '2017-01-03 18:00:00', '2017-01-03 19:00:00', '2017-01-03 20:00:00', '2017-01-03 21:00:00', '2017-01-03 22:00:00', '2017-01-03 23:00:00', '2017-01-04 00:00:00'], dtype='datetime64[ns]', freq='H') print(pd.date_range('2017/1/1','2017/1/4',freq='T')) # 每分钟 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00', '2017-01-01 00:02:00', '2017-01-01 00:03:00', '2017-01-01 00:04:00', '2017-01-01 00:05:00', '2017-01-01 00:06:00', '2017-01-01 00:07:00', '2017-01-01 00:08:00', '2017-01-01 00:09:00', ... '2017-01-03 23:51:00', '2017-01-03 23:52:00', '2017-01-03 23:53:00', '2017-01-03 23:54:00', '2017-01-03 23:55:00', '2017-01-03 23:56:00', '2017-01-03 23:57:00', '2017-01-03 23:58:00', '2017-01-03 23:59:00', '2017-01-04 00:00:00'], dtype='datetime64[ns]', length=4321, freq='T') print(pd.date_range('2017/1/1','2017/1/4',freq='S')) # 每秒 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:00:01', '2017-01-01 00:00:02', '2017-01-01 00:00:03', '2017-01-01 00:00:04', '2017-01-01 00:00:05', '2017-01-01 00:00:06', '2017-01-01 00:00:07', '2017-01-01 00:00:08', '2017-01-01 00:00:09', ... '2017-01-03 23:59:51', '2017-01-03 23:59:52', '2017-01-03 23:59:53', '2017-01-03 23:59:54', '2017-01-03 23:59:55', '2017-01-03 23:59:56', '2017-01-03 23:59:57', '2017-01-03 23:59:58', '2017-01-03 23:59:59', '2017-01-04 00:00:00'], dtype='datetime64[ns]', length=259201, freq='S') # L 每毫秒,U每微秒 # 指定每周的星期几 print(pd.date_range('2017/1/1','2017/2/1',freq='W-MON')) DatetimeIndex(['2017-01-02', '2017-01-09', '2017-01-16', '2017-01-23', '2017-01-30'], dtype='datetime64[ns]', freq='W-MON') # 每月第2个周一 print(pd.date_range('2017/1/1','2017/5/1',freq='WOM-2MON')) DatetimeIndex(['2017-01-09', '2017-02-13', '2017-03-13', '2017-04-10'], dtype='datetime64[ns]', freq='WOM-2MON') # pd.date_range()-日期范围:频率(2) # 每月最后一个日历日 M print(pd.date_range('2017/1/1','2018/1/1',freq='M')) DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30', '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31', '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'], dtype='datetime64[ns]', freq='M') # 指定月为季度末,每个季度末的最后一个日历日 Q-月 print(pd.date_range('2017','2018',freq='Q-DEC')) DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-30', '2017-12-31'], dtype='datetime64[ns]', freq='Q-DEC') # 指定月份的最后一个日历日 A-月 print(pd.date_range('2017','2018',freq='A-DEC')) DatetimeIndex(['2017-12-31'], dtype='datetime64[ns]', freq='A-DEC') # 每月最后一个工作日 print(pd.date_range('2017','2018',freq='BM')) DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-28', '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31', '2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29'], dtype='datetime64[ns]', freq='BM') # 指定月为季度末,每个季度末最后一个工作日 BQ-月 print(pd.date_range('2017','2018',freq='BQ-DEC')) DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-29', '2017-12-29'], dtype='datetime64[ns]', freq='BQ-DEC') # 指定月的最后一个工作日 BA-月 print(pd.date_range('2017','2018',freq='BA-DEC')) DatetimeIndex(['2017-12-29'], dtype='datetime64[ns]', freq='BA-DEC') # 每个月的第一个日历日 MS print(pd.date_range('2017','2018',freq='MS')) DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01', '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01', '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01', '2018-01-01'], dtype='datetime64[ns]', freq='MS') # 指定月为季度末,每个季度最后一个月的第一个日历日 print(pd.date_range('2017','2018',freq='QS-DEC')) DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='QS-DEC') # 指定月的第一个工作日 print(pd.date_range('2017','2018',freq='AS-DEC')) DatetimeIndex(['2017-12-01'], dtype='datetime64[ns]', freq='AS-DEC') # 每个月的第一个工作日 print(pd.date_range('2017','2018',freq='BMS')) DatetimeIndex(['2017-01-02', '2017-02-01', '2017-03-01', '2017-04-03', '2017-05-01', '2017-06-01', '2017-07-03', '2017-08-01', '2017-09-01', '2017-10-02', '2017-11-01', '2017-12-01', '2018-01-01'], dtype='datetime64[ns]', freq='BMS') #指定月为季度末,每个季度最后一个月的第一天 print(pd.date_range('2017','2018',freq='BQS-DEC')) DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='BQS-DEC') # 指定月的第一个工作日 print(pd.date_range('2017','2020', freq = 'BAS-DEC')) DatetimeIndex(['2017-12-01', '2018-12-03', '2019-12-02'], dtype='datetime64[ns]', freq='BAS-DEC') # pd.date_range()-日期范围:复合频率 # 7天一个频率 print(pd.date_range('2017/1/1','2017/4/1',freq='7D')) DatetimeIndex(['2017-01-01', '2017-01-08', '2017-01-15', '2017-01-22', '2017-01-29', '2017-02-05', '2017-02-12', '2017-02-19', '2017-02-26', '2017-03-05', '2017-03-12', '2017-03-19', '2017-03-26'], dtype='datetime64[ns]', freq='7D') print(pd.date_range('2017/1/1','2017/1/2', freq = '2h30min')) # 2小时30分钟 DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 02:30:00', '2017-01-01 05:00:00', '2017-01-01 07:30:00', '2017-01-01 10:00:00', '2017-01-01 12:30:00', '2017-01-01 15:00:00', '2017-01-01 17:30:00', '2017-01-01 20:00:00', '2017-01-01 22:30:00'], dtype='datetime64[ns]', freq='150T') print(pd.date_range('2017','2018', freq = '2M')) # 2月,每月最后一个日历日 DatetimeIndex(['2017-01-31', '2017-03-31', '2017-05-31', '2017-07-31', '2017-09-30', '2017-11-30'], dtype='datetime64[ns]', freq='2M') # asfreq:时期频率转换 ts=pd.Series(np.random.rand(4), index=pd.date_range('2017/1/1','2017/1/4')) print(ts) print(ts.asfreq('4H')) 2017-01-01 0.121601 2017-01-02 0.813354 2017-01-03 0.258622 2017-01-04 0.816194 Freq: D, dtype: float64 2017-01-01 00:00:00 0.121601 2017-01-01 04:00:00 NaN 2017-01-01 08:00:00 NaN 2017-01-01 12:00:00 NaN 2017-01-01 16:00:00 NaN 2017-01-01 20:00:00 NaN 2017-01-02 00:00:00 0.813354 2017-01-02 04:00:00 NaN 2017-01-02 08:00:00 NaN 2017-01-02 12:00:00 NaN 2017-01-02 16:00:00 NaN 2017-01-02 20:00:00 NaN 2017-01-03 00:00:00 0.258622 2017-01-03 04:00:00 NaN 2017-01-03 08:00:00 NaN 2017-01-03 12:00:00 NaN 2017-01-03 16:00:00 NaN 2017-01-03 20:00:00 NaN 2017-01-04 00:00:00 0.816194 Freq: 4H, dtype: float64 # 填充 method print(ts.asfreq('4H',method='ffill')) # ffill 用之前的填充 2017-01-01 00:00:00 0.121601 2017-01-01 04:00:00 0.121601 2017-01-01 08:00:00 0.121601 2017-01-01 12:00:00 0.121601 2017-01-01 16:00:00 0.121601 2017-01-01 20:00:00 0.121601 2017-01-02 00:00:00 0.813354 2017-01-02 04:00:00 0.813354 2017-01-02 08:00:00 0.813354 2017-01-02 12:00:00 0.813354 2017-01-02 16:00:00 0.813354 2017-01-02 20:00:00 0.813354 2017-01-03 00:00:00 0.258622 2017-01-03 04:00:00 0.258622 2017-01-03 08:00:00 0.258622 2017-01-03 12:00:00 0.258622 2017-01-03 16:00:00 0.258622 2017-01-03 20:00:00 0.258622 2017-01-04 00:00:00 0.816194 Freq: 4H, dtype: float64 print(ts.asfreq('4H',method='bfill')) # bfill 用之后的填充 2017-01-01 00:00:00 0.121601 2017-01-01 04:00:00 0.813354 2017-01-01 08:00:00 0.813354 2017-01-01 12:00:00 0.813354 2017-01-01 16:00:00 0.813354 2017-01-01 20:00:00 0.813354 2017-01-02 00:00:00 0.813354 2017-01-02 04:00:00 0.258622 2017-01-02 08:00:00 0.258622 2017-01-02 12:00:00 0.258622 2017-01-02 16:00:00 0.258622 2017-01-02 20:00:00 0.258622 2017-01-03 00:00:00 0.258622 2017-01-03 04:00:00 0.816194 2017-01-03 08:00:00 0.816194 2017-01-03 12:00:00 0.816194 2017-01-03 16:00:00 0.816194 2017-01-03 20:00:00 0.816194 2017-01-04 00:00:00 0.816194 Freq: 4H, dtype: float64 # pd.date_range()-日期范围:超前/滞后数据 ts=pd.Series(np.random.rand(4), index=pd.date_range('20170101','20170104')) print(ts) 2017-01-01 0.764262 2017-01-02 0.894735 2017-01-03 0.302417 2017-01-04 0.765669 Freq: D, dtype: float64 # shift 数据滞后 print(ts.shift(2)) 2017-01-01 NaN 2017-01-02 NaN 2017-01-03 0.764262 2017-01-04 0.894735 Freq: D, dtype: float64 # shift 数据超前 print(ts.shift(-2)) 2017-01-01 0.302417 2017-01-02 0.765669 2017-01-03 NaN 2017-01-04 NaN Freq: D, dtype: float64 # 这个一般用来计算变化百分比 per=ts/ts.shift(1)-1 print(per) 2017-01-01 NaN 2017-01-02 0.170716 2017-01-03 -0.662003 2017-01-04 1.531832 Freq: D, dtype: float64 # 对时间戳进行位移 添加参数freq # freq=D 时间向后 print(ts.shift(2,freq='D')) 2017-01-03 0.764262 2017-01-04 0.894735 2017-01-05 0.302417 2017-01-06 0.765669 Freq: D, dtype: float64 #freq=T 时间向前 print(ts.shift(2,freq='T')) 2017-01-01 00:02:00 0.764262 2017-01-02 00:02:00 0.894735 2017-01-03 00:02:00 0.302417 2017-01-04 00:02:00 0.765669 Freq: D, dtype: float64 ''' 【课程2.11】 Pandas时期:Period 核心:pd.Period() ''' '\n【课程2.11】 Pandas时期:Period\n\n核心:pd.Period()\n\n' # 生成一个以2017-1开始,以月为频率的构造器 p=pd.Period('2017',freq='M') print(p) 2017-01 print(p+1) print(p-1) 2017-02 2016-12 print(pd.Period('2012',freq='A-DEC')-1) 2011 # 创建时期范围 pd.period_range() pr=pd.period_range('1/1/2017','1/1/2018',freq='M') print(pr) PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='int64', freq='M') # 时间序列 时期序列 s=pd.Series(np.random.rand(len(pr)),index=pr) print(s) 2017-01 0.395536 2017-02 0.867729 2017-03 0.727422 2017-04 0.194098 2017-05 0.897134 2017-06 0.119327 2017-07 0.481213 2017-08 0.343418 2017-09 0.474208 2017-10 0.271213 2017-11 0.621978 2017-12 0.723095 2018-01 0.234442 Freq: M, dtype: float64 # 频率转换 asfreq p=pd.Period('2017','A-DEC') print(p) 2017 print(p.asfreq('M',how='start')) print(p.asfreq('D',how='end')) # 通过.asfreq(freq,method=None,how=None)的方法转换成别的频率 2017-01 2017-12-31 #asfreq也可以转换timeSeries的index pr=pd.period_range('2017','2018',freq='M') print(pr) PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='int64', freq='M') s1=pd.Series(np.random.rand(len(pr)),index=pr) print(s1) print(pd.Series(np.random.rand(len(pr)),index=pr.asfreq('D',how='start'))) 2017-01 0.027461 2017-02 0.351679 2017-03 0.026355 2017-04 0.130524 2017-05 0.323551 2017-06 0.418091 2017-07 0.252305 2017-08 0.159283 2017-09 0.863181 2017-10 0.113546 2017-11 0.834609 2017-12 0.404364 2018-01 0.248782 Freq: M, dtype: float64 2017-01-01 0.942848 2017-02-01 0.787942 2017-03-01 0.898672 2017-04-01 0.838851 2017-05-01 0.687743 2017-06-01 0.835100 2017-07-01 0.971016 2017-08-01 0.433730 2017-09-01 0.984731 2017-10-01 0.197288 2017-11-01 0.530533 2017-12-01 0.734869 2018-01-01 0.652892 Freq: D, dtype: float64 # 时间戳与时间之间的转换 : pd.to_period() pd.to_timestamp() dr=pd.date_range('2017/1/1',periods=10,freq='M') pr=pd.period_range('2017','2018',freq='M') print(dr) print(pr) DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30', '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31', '2017-09-30', '2017-10-31'], dtype='datetime64[ns]', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='int64', freq='M') # 每月最后一天转换为每月 ts1=pd.Series(np.random.rand(len(dr)),index=dr) print(ts1) print(ts1.to_period()) 2017-01-31 0.158806 2017-02-28 0.483492 2017-03-31 0.372044 2017-04-30 0.034802 2017-05-31 0.283967 2017-06-30 0.344219 2017-07-31 0.322696 2017-08-31 0.896927 2017-09-30 0.209306 2017-10-31 0.904640 Freq: M, dtype: float64 2017-01 0.158806 2017-02 0.483492 2017-03 0.372044 2017-04 0.034802 2017-05 0.283967 2017-06 0.344219 2017-07 0.322696 2017-08 0.896927 2017-09 0.209306 2017-10 0.904640 Freq: M, dtype: float64 # 每月转换为每月第一天 ts2=pd.Series(np.random.rand(len(pr)),index=pr) print(ts2) print(ts2.to_timestamp()) 2017-01 0.045885 2017-02 0.254705 2017-03 0.010104 2017-04 0.184321 2017-05 0.080500 2017-06 0.306121 2017-07 0.186046 2017-08 0.915024 2017-09 0.612428 2017-10 0.348309 2017-11 0.969925 2017-12 0.769598 2018-01 0.482902 Freq: M, dtype: float64 2017-01-01 0.045885 2017-02-01 0.254705 2017-03-01 0.010104 2017-04-01 0.184321 2017-05-01 0.080500 2017-06-01 0.306121 2017-07-01 0.186046 2017-08-01 0.915024 2017-09-01 0.612428 2017-10-01 0.348309 2017-11-01 0.969925 2017-12-01 0.769598 2018-01-01 0.482902 Freq: MS, dtype: float64 ''' 【课程2.12】 时间序列 - 索引及切片 TimeSeries是Series的一个子类,所以Series索引及数据选取方面的方法基本一样 同时TimeSeries通过时间序列有更便捷的方法做索引和切片 ''' # 索引 from datetime import datetime dr=pd.date_range('2017/1','2017/3') s=pd.Series(np.random.rand(len(dr)),index=dr) print(s.head()) 2017-01-01 0.965767 2017-01-02 0.092477 2017-01-03 0.101372 2017-01-04 0.518738 2017-01-05 0.512767 Freq: D, dtype: float64 # 下标位置索引 print(s[0]) print(s[:5]) print(s[::2]) 0.965766677911 2017-01-01 0.965767 2017-01-02 0.092477 2017-01-03 0.101372 2017-01-04 0.518738 2017-01-05 0.512767 Freq: D, dtype: float64 2017-01-01 0.965767 2017-01-03 0.101372 2017-01-05 0.512767 2017-01-07 0.397813 2017-01-09 0.044512 2017-01-11 0.440636 2017-01-13 0.576041 2017-01-15 0.661706 2017-01-17 0.413142 2017-01-19 0.061130 2017-01-21 0.921432 2017-01-23 0.268689 2017-01-25 0.305809 2017-01-27 0.859973 2017-01-29 0.420669 2017-01-31 0.488450 2017-02-02 0.427845 2017-02-04 0.277361 2017-02-06 0.649949 2017-02-08 0.347356 2017-02-10 0.147578 2017-02-12 0.133765 2017-02-14 0.751505 2017-02-16 0.516944 2017-02-18 0.703469 2017-02-20 0.085836 2017-02-22 0.374159 2017-02-24 0.711476 2017-02-26 0.748231 2017-02-28 0.595617 Freq: 2D, dtype: float64 # 时间序列标签索引 print(s['2017-01-15']) print(s[datetime(2017,1,1)]) 0.66170600133 0.965766677911 # 切片 dr = pd.date_range('2017/1','2017/3',freq='12H') s=pd.Series(np.random.rand(len(dr)),index=dr) print(s.head()) 2017-01-01 00:00:00 0.318529 2017-01-01 12:00:00 0.648512 2017-01-02 00:00:00 0.325831 2017-01-02 12:00:00 0.717528 2017-01-03 00:00:00 0.802091 Freq: 12H, dtype: float64 print(s['2017-01-01':'2017-01-02']) 2017-01-01 00:00:00 0.318529 2017-01-01 12:00:00 0.648512 2017-01-02 00:00:00 0.325831 2017-01-02 12:00:00 0.717528 Freq: 12H, dtype: float64 # 重复索引的时间序列 dates=pd.DatetimeIndex(['1/1/2015','1/2/2015','1/3/2015','1/4/2015','1/1/2015','1/2/2015']) ts=pd.Series(np.random.rand(6),index=dates) print(ts) 2015-01-01 0.428691 2015-01-02 0.589544 2015-01-03 0.160050 2015-01-04 0.652073 2015-01-01 0.219171 2015-01-02 0.733021 dtype: float64 print(ts.is_unique) print(ts.index.is_unique) True False print(ts['2015-01-01']) 2015-01-01 0.428691 2015-01-01 0.219171 dtype: float64 # 通过group分组,重复的值用平均值处理 print(ts.groupby(level=0).mean()) 2015-01-01 0.323931 2015-01-02 0.661282 2015-01-03 0.160050 2015-01-04 0.652073 dtype: float64 ''' 【课程2.13】 时间序列 - 重采样 将时间序列从一个频率转换为另一个频率的过程,且会有数据的结合 降采样:高频数据 → 低频数据,eg.以天为频率的数据转为以月为频率的数据 升采样:低频数据 → 高频数据,eg.以年为频率的数据转为以月为频率的数据 ''' # 重采样: .resample() # 创建一个以天为频率的TimeSeries 重采样按2天为频率 dr=pd.date_range('20170101',periods=12) ts=pd.Series(np.arange(12),index=dr) print(ts) 2017-01-01 0 2017-01-02 1 2017-01-03 2 2017-01-04 3 2017-01-05 4 2017-01-06 5 2017-01-07 6 2017-01-08 7 2017-01-09 8 2017-01-10 9 2017-01-11 10 2017-01-12 11 Freq: D, dtype: int32 ts_re=ts.resample('5D') print(ts_re) DatetimeIndexResampler [freq=<5 * Days>, axis=0, closed=left, label=left, convention=start, base=0] print(ts.resample('5D').sum()) 2017-01-01 10 2017-01-06 35 2017-01-11 21 Freq: 5D, dtype: int32 print(ts.resample('5D').mean()) # 平局值 2017-01-01 2.0 2017-01-06 7.0 2017-01-11 10.5 Freq: 5D, dtype: float64 print(ts.resample('5D').max()) # 最大值 2017-01-01 4 2017-01-06 9 2017-01-11 11 Freq: 5D, dtype: int32 print(ts.resample('5D').median()) # 求中值 2017-01-01 2.0 2017-01-06 7.0 2017-01-11 10.5 Freq: 5D, dtype: float64 print(ts.resample('5D').first()) # 返回第一个只 2017-01-01 0 2017-01-06 5 2017-01-11 10 Freq: 5D, dtype: int32 print(ts.resample('5D').ohlc(),'→ OHLC重采样\n') open high low close 2017-01-01 0 4 0 4 2017-01-06 5 9 5 9 2017-01-11 10 11 10 11 → OHLC重采样 # 降采样 dr=pd.date_range('20170101',periods=12) ts=pd.Series(np.arange(1,13),index=dr) print(ts) 2017-01-01 1 2017-01-02 2 2017-01-03 3 2017-01-04 4 2017-01-05 5 2017-01-06 6 2017-01-07 7 2017-01-08 8 2017-01-09 9 2017-01-10 10 2017-01-11 11 2017-01-12 12 Freq: D, dtype: int32 print(ts.resample('5D').sum()) # 默认是左闭右闭 [1,2,3,4,5][6,7,8,9,10][11,12] 2017-01-01 15 2017-01-06 40 2017-01-11 23 Freq: 5D, dtype: int32 print(ts.resample('5D',closed='left').sum()) # 间隔左边为结束 [1,2,3,4,5][6,7,8,9,10][11,12] 2017-01-01 15 2017-01-06 40 2017-01-11 23 Freq: 5D, dtype: int32 print(ts.resample('5D',closed='right').sum()) # 间隔右边为结束 [1][2,3,4,5,6][7,8,9,10,11][12] 2016-12-27 1 2017-01-01 20 2017-01-06 45 2017-01-11 12 Freq: 5D, dtype: int32 print(ts.resample('5D', label = 'left').sum(),'→ leftlabel\n') print(ts.resample('5D', label = 'right').sum(),'→ rightlabel\n') # label:聚合值的index,默认为取左 # 值采样认为默认(这里closed默认) 2017-01-01 15 2017-01-06 40 2017-01-11 23 Freq: 5D, dtype: int32 → leftlabel 2017-01-06 15 2017-01-11 40 2017-01-16 23 Freq: 5D, dtype: int32 → rightlabel # 升采样及插值 dr=pd.date_range('2017/1/1 0:0:0',periods=5,freq='H') ts=pd.DataFrame(np.arange(15).reshape(5,3),index=dr,columns=list('abc')) print(ts) a b c 2017-01-01 00:00:00 0 1 2 2017-01-01 01:00:00 3 4 5 2017-01-01 02:00:00 6 7 8 2017-01-01 03:00:00 9 10 11 2017-01-01 04:00:00 12 13 14 print(ts.resample('15T').asfreq()) print(ts.resample('15T').ffill()) print(ts.resample('15T').bfill()) # .asfreq():不做填充,返回Nan # .ffill():向上填充 # .bfill():向下填充 a b c 2017-01-01 00:00:00 0.0 1.0 2.0 2017-01-01 00:15:00 NaN NaN NaN 2017-01-01 00:30:00 NaN NaN NaN 2017-01-01 00:45:00 NaN NaN NaN 2017-01-01 01:00:00 3.0 4.0 5.0 2017-01-01 01:15:00 NaN NaN NaN 2017-01-01 01:30:00 NaN NaN NaN 2017-01-01 01:45:00 NaN NaN NaN 2017-01-01 02:00:00 6.0 7.0 8.0 2017-01-01 02:15:00 NaN NaN NaN 2017-01-01 02:30:00 NaN NaN NaN 2017-01-01 02:45:00 NaN NaN NaN 2017-01-01 03:00:00 9.0 10.0 11.0 2017-01-01 03:15:00 NaN NaN NaN 2017-01-01 03:30:00 NaN NaN NaN 2017-01-01 03:45:00 NaN NaN NaN 2017-01-01 04:00:00 12.0 13.0 14.0 a b c 2017-01-01 00:00:00 0 1 2 2017-01-01 00:15:00 0 1 2 2017-01-01 00:30:00 0 1 2 2017-01-01 00:45:00 0 1 2 2017-01-01 01:00:00 3 4 5 2017-01-01 01:15:00 3 4 5 2017-01-01 01:30:00 3 4 5 2017-01-01 01:45:00 3 4 5 2017-01-01 02:00:00 6 7 8 2017-01-01 02:15:00 6 7 8 2017-01-01 02:30:00 6 7 8 2017-01-01 02:45:00 6 7 8 2017-01-01 03:00:00 9 10 11 2017-01-01 03:15:00 9 10 11 2017-01-01 03:30:00 9 10 11 2017-01-01 03:45:00 9 10 11 2017-01-01 04:00:00 12 13 14 a b c 2017-01-01 00:00:00 0 1 2 2017-01-01 00:15:00 3 4 5 2017-01-01 00:30:00 3 4 5 2017-01-01 00:45:00 3 4 5 2017-01-01 01:00:00 3 4 5 2017-01-01 01:15:00 6 7 8 2017-01-01 01:30:00 6 7 8 2017-01-01 01:45:00 6 7 8 2017-01-01 02:00:00 6 7 8 2017-01-01 02:15:00 9 10 11 2017-01-01 02:30:00 9 10 11 2017-01-01 02:45:00 9 10 11 2017-01-01 03:00:00 9 10 11 2017-01-01 03:15:00 12 13 14 2017-01-01 03:30:00 12 13 14 2017-01-01 03:45:00 12 13 14 2017-01-01 04:00:00 12 13 14 # 时期重采样 - Period prng = pd.period_range('2016','2017',freq = 'M') ts = pd.Series(np.arange(len(prng)), index = prng) print(ts) print(ts.resample('3M').sum()) # 降采样 print(ts.resample('15D').ffill()) # 升采样 2016-01 0 2016-02 1 2016-03 2 2016-04 3 2016-05 4 2016-06 5 2016-07 6 2016-08 7 2016-09 8 2016-10 9 2016-11 10 2016-12 11 2017-01 12 Freq: M, dtype: int32 2016-01-31 0 2016-04-30 6 2016-07-31 15 2016-10-31 24 2017-01-31 33 Freq: 3M, dtype: int32 2016-01-01 0 2016-01-16 0 2016-01-31 0 2016-02-15 1 2016-03-01 2 2016-03-16 2 2016-03-31 2 2016-04-15 3 2016-04-30 3 2016-05-15 4 2016-05-30 4 2016-06-14 5 2016-06-29 5 2016-07-14 6 2016-07-29 6 2016-08-13 7 2016-08-28 7 2016-09-12 8 2016-09-27 8 2016-10-12 9 2016-10-27 9 2016-11-11 10 2016-11-26 10 2016-12-11 11 2016-12-26 11 Freq: 15D, dtype: int32
    最新回复(0)