'''
【课程2.8】 时间模块:datetime
datetime模块,主要掌握:datetime.date(), datetime.datetime(), datetime.timedelta()
日期解析方法:parser.parse
'''
import datetime
today
=datetime
.date
.today
()
print(today
)
2019-05-25
print(datetime
.date
(2016,6,2))
2016-06-02
now
=datetime
.datetime
.now
()
print(now
)
2019-05-25 08:38:20.732982
print(datetime
.datetime
(2016,5,6))
print(datetime
.datetime
(2015,5,6,5,22,22))
2016-05-06 00:00:00
2015-05-06 05:22:22
t1
= datetime
.datetime
(2016,5,6)
t2
= datetime
.datetime
(2015,5,6,5,22,22)
print(t1
-t2
)
365 days, 18:37:38
today
=datetime
.datetime
.now
()
yes
=today
-datetime
.timedelta
(1)
print(yes
)
print(today
-datetime
.timedelta
(10))
2019-05-24 08:38:21.237705
2019-05-15 08:38:21.237705
from dateutil
.parser
import parse
date
='21-12-2017'
print(parse
(date
))
print(parse
('12-11-2017'))
print(parse
('12/11/2017'))
print(parse
('2017-5-6'))
print(parse
('Jan 31, 1997 10:45 PM'))
print('===================')
print(parse
('5/1/2019'))
print(parse
('5/1/2019',dayfirst
=True))
2017-12-21 00:00:00
2017-12-11 00:00:00
2017-12-11 00:00:00
2017-05-06 00:00:00
1997-01-31 22:45:00
===================
2019-05-01 00:00:00
2019-01-05 00:00:00
'''
【课程2.9】 Pandas时刻数据:Timestamp
时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据
pandas.Timestamp()
'''
'\n【课程2.9】 Pandas时刻数据:Timestamp\n\n时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据\n\npandas.Timestamp()\n\n'
import pandas
as pd
import numpy
as np
print(pd
.Timestamp
('2017-12-21'))
print(pd
.Timestamp
('2018-5-8 1:1:1'))
print(pd
.Timestamp
(datetime
.datetime
(2015,5,6)))
2017-12-21 00:00:00
2018-05-08 01:01:01
2015-05-06 00:00:00
from datetime
import datetime
print(pd
.to_datetime
(datetime
(2016,12,11,1,1,1)))
print(pd
.to_datetime
('2017-2-3'))
print(pd
.to_datetime
(['2017-2-3','2018-3-4','2019-4-5']))
2016-12-11 01:01:01
2017-02-03 00:00:00
DatetimeIndex(['2017-02-03', '2018-03-04', '2019-04-05'], dtype='datetime64[ns]', freq=None)
print(pd
.to_datetime
(['2018-2-3','2019-2-6','2017-5-9']))
a
= datetime
(2016,2,2)
print(type(a
))
print(pd
.to_datetime
([datetime
(2016,5,6),datetime
(2017,5,6)]))
DatetimeIndex(['2018-02-03', '2019-02-06', '2017-05-09'], dtype='datetime64[ns]', freq=None)
<class 'datetime.datetime'>
DatetimeIndex(['2016-05-06', '2017-05-06'], dtype='datetime64[ns]', freq=None)
date
=['2018-2-3','2019-2-6','hello','2017-5-9']
print(pd
.to_datetime
(date
,errors
='ignore'))
print(pd
.to_datetime
(date
,errors
='coerce'))
['2018-2-3' '2019-2-6' 'hello' '2017-5-9']
DatetimeIndex(['2018-02-03', '2019-02-06', 'NaT', '2017-05-09'], dtype='datetime64[ns]', freq=None)
'''
【课程2.10】 Pandas时间戳索引:DatetimeIndex
核心:pd.date_range()
'''
'\n【课程2.10】 Pandas时间戳索引:DatetimeIndex\n\n核心:pd.date_range()\n\n'
print(pd
.DatetimeIndex
(['2019/5/4','5/4/2018','3/5/2016']))
DatetimeIndex(['2019-05-04', '2018-05-04', '2016-03-05'], dtype='datetime64[ns]', freq=None)
rng
= pd
.DatetimeIndex
(['2019/5/4','5/4/2018','3/5/2016'])
print(pd
.Series
(np
.random
.rand
(len(rng
)),index
=rng
))
2019-05-04 0.747358
2018-05-04 0.484367
2016-03-05 0.666840
dtype: float64
print(pd
.date_range
('1/1/2017','30/1/2017'))
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
'2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
'2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
'2017-01-13', '2017-01-14', '2017-01-15', '2017-01-16',
'2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20',
'2017-01-21', '2017-01-22', '2017-01-23', '2017-01-24',
'2017-01-25', '2017-01-26', '2017-01-27', '2017-01-28',
'2017-01-29', '2017-01-30'],
dtype='datetime64[ns]', freq='D')
print(pd
.date_range
('1/1/2017',periods
=10))
print(pd
.date_range
(end
='2017/1/1',periods
=10))
'''
start:开始时间
end:结束时间
periods:偏移量
freq:频率 默认天 pd.date_range()默认日历日 pd.bdate_range()默认频率为工作日
tz:时区
'''
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
'2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',
'2017-01-09', '2017-01-10'],
dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2016-12-23', '2016-12-24', '2016-12-25', '2016-12-26',
'2016-12-27', '2016-12-28', '2016-12-29', '2016-12-30',
'2016-12-31', '2017-01-01'],
dtype='datetime64[ns]', freq='D')
'\nstart:开始时间\nend:结束时间\nperiods:偏移量\nfreq:频率 默认天 pd.date_range()默认日历日 pd.bdate_range()默认频率为工作日\ntz:时区\n'
print(pd
.date_range
('2017/2/2 15:00:00',periods
=10))
print(pd
.date_range
('2017/2/2 15:00:00',periods
=10,normalize
=True))
DatetimeIndex(['2017-02-02 15:00:00', '2017-02-03 15:00:00',
'2017-02-04 15:00:00', '2017-02-05 15:00:00',
'2017-02-06 15:00:00', '2017-02-07 15:00:00',
'2017-02-08 15:00:00', '2017-02-09 15:00:00',
'2017-02-10 15:00:00', '2017-02-11 15:00:00'],
dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-02-02', '2017-02-03', '2017-02-04', '2017-02-05',
'2017-02-06', '2017-02-07', '2017-02-08', '2017-02-09',
'2017-02-10', '2017-02-11'],
dtype='datetime64[ns]', freq='D')
print(pd
.date_range
('2017/1/1','2017/1/4'))
print(pd
.date_range
('2017/1/1','2017/1/4',closed
='left'))
print(pd
.date_range
('2017/1/1','2017/1/4',closed
='right'))
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
print(list(pd
.date_range
('2017/1/1','2017/1/4')))
[Timestamp('2017-01-01 00:00:00', offset='D'), Timestamp('2017-01-02 00:00:00', offset='D'), Timestamp('2017-01-03 00:00:00', offset='D'), Timestamp('2017-01-04 00:00:00', offset='D')]
print(pd
.date_range
('2017/1/1','2017/1/4'))
print(pd
.date_range
('2017/1/1','2017/1/30',freq
='B'))
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',
'2017-01-06', '2017-01-09', '2017-01-10', '2017-01-11',
'2017-01-12', '2017-01-13', '2017-01-16', '2017-01-17',
'2017-01-18', '2017-01-19', '2017-01-20', '2017-01-23',
'2017-01-24', '2017-01-25', '2017-01-26', '2017-01-27',
'2017-01-30'],
dtype='datetime64[ns]', freq='B')
print(pd
.date_range
('2017/1/1','2017/1/4',freq
='H'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
'2017-01-01 02:00:00', '2017-01-01 03:00:00',
'2017-01-01 04:00:00', '2017-01-01 05:00:00',
'2017-01-01 06:00:00', '2017-01-01 07:00:00',
'2017-01-01 08:00:00', '2017-01-01 09:00:00',
'2017-01-01 10:00:00', '2017-01-01 11:00:00',
'2017-01-01 12:00:00', '2017-01-01 13:00:00',
'2017-01-01 14:00:00', '2017-01-01 15:00:00',
'2017-01-01 16:00:00', '2017-01-01 17:00:00',
'2017-01-01 18:00:00', '2017-01-01 19:00:00',
'2017-01-01 20:00:00', '2017-01-01 21:00:00',
'2017-01-01 22:00:00', '2017-01-01 23:00:00',
'2017-01-02 00:00:00', '2017-01-02 01:00:00',
'2017-01-02 02:00:00', '2017-01-02 03:00:00',
'2017-01-02 04:00:00', '2017-01-02 05:00:00',
'2017-01-02 06:00:00', '2017-01-02 07:00:00',
'2017-01-02 08:00:00', '2017-01-02 09:00:00',
'2017-01-02 10:00:00', '2017-01-02 11:00:00',
'2017-01-02 12:00:00', '2017-01-02 13:00:00',
'2017-01-02 14:00:00', '2017-01-02 15:00:00',
'2017-01-02 16:00:00', '2017-01-02 17:00:00',
'2017-01-02 18:00:00', '2017-01-02 19:00:00',
'2017-01-02 20:00:00', '2017-01-02 21:00:00',
'2017-01-02 22:00:00', '2017-01-02 23:00:00',
'2017-01-03 00:00:00', '2017-01-03 01:00:00',
'2017-01-03 02:00:00', '2017-01-03 03:00:00',
'2017-01-03 04:00:00', '2017-01-03 05:00:00',
'2017-01-03 06:00:00', '2017-01-03 07:00:00',
'2017-01-03 08:00:00', '2017-01-03 09:00:00',
'2017-01-03 10:00:00', '2017-01-03 11:00:00',
'2017-01-03 12:00:00', '2017-01-03 13:00:00',
'2017-01-03 14:00:00', '2017-01-03 15:00:00',
'2017-01-03 16:00:00', '2017-01-03 17:00:00',
'2017-01-03 18:00:00', '2017-01-03 19:00:00',
'2017-01-03 20:00:00', '2017-01-03 21:00:00',
'2017-01-03 22:00:00', '2017-01-03 23:00:00',
'2017-01-04 00:00:00'],
dtype='datetime64[ns]', freq='H')
print(pd
.date_range
('2017/1/1','2017/1/4',freq
='T'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00',
'2017-01-01 00:02:00', '2017-01-01 00:03:00',
'2017-01-01 00:04:00', '2017-01-01 00:05:00',
'2017-01-01 00:06:00', '2017-01-01 00:07:00',
'2017-01-01 00:08:00', '2017-01-01 00:09:00',
...
'2017-01-03 23:51:00', '2017-01-03 23:52:00',
'2017-01-03 23:53:00', '2017-01-03 23:54:00',
'2017-01-03 23:55:00', '2017-01-03 23:56:00',
'2017-01-03 23:57:00', '2017-01-03 23:58:00',
'2017-01-03 23:59:00', '2017-01-04 00:00:00'],
dtype='datetime64[ns]', length=4321, freq='T')
print(pd
.date_range
('2017/1/1','2017/1/4',freq
='H'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',
'2017-01-01 02:00:00', '2017-01-01 03:00:00',
'2017-01-01 04:00:00', '2017-01-01 05:00:00',
'2017-01-01 06:00:00', '2017-01-01 07:00:00',
'2017-01-01 08:00:00', '2017-01-01 09:00:00',
'2017-01-01 10:00:00', '2017-01-01 11:00:00',
'2017-01-01 12:00:00', '2017-01-01 13:00:00',
'2017-01-01 14:00:00', '2017-01-01 15:00:00',
'2017-01-01 16:00:00', '2017-01-01 17:00:00',
'2017-01-01 18:00:00', '2017-01-01 19:00:00',
'2017-01-01 20:00:00', '2017-01-01 21:00:00',
'2017-01-01 22:00:00', '2017-01-01 23:00:00',
'2017-01-02 00:00:00', '2017-01-02 01:00:00',
'2017-01-02 02:00:00', '2017-01-02 03:00:00',
'2017-01-02 04:00:00', '2017-01-02 05:00:00',
'2017-01-02 06:00:00', '2017-01-02 07:00:00',
'2017-01-02 08:00:00', '2017-01-02 09:00:00',
'2017-01-02 10:00:00', '2017-01-02 11:00:00',
'2017-01-02 12:00:00', '2017-01-02 13:00:00',
'2017-01-02 14:00:00', '2017-01-02 15:00:00',
'2017-01-02 16:00:00', '2017-01-02 17:00:00',
'2017-01-02 18:00:00', '2017-01-02 19:00:00',
'2017-01-02 20:00:00', '2017-01-02 21:00:00',
'2017-01-02 22:00:00', '2017-01-02 23:00:00',
'2017-01-03 00:00:00', '2017-01-03 01:00:00',
'2017-01-03 02:00:00', '2017-01-03 03:00:00',
'2017-01-03 04:00:00', '2017-01-03 05:00:00',
'2017-01-03 06:00:00', '2017-01-03 07:00:00',
'2017-01-03 08:00:00', '2017-01-03 09:00:00',
'2017-01-03 10:00:00', '2017-01-03 11:00:00',
'2017-01-03 12:00:00', '2017-01-03 13:00:00',
'2017-01-03 14:00:00', '2017-01-03 15:00:00',
'2017-01-03 16:00:00', '2017-01-03 17:00:00',
'2017-01-03 18:00:00', '2017-01-03 19:00:00',
'2017-01-03 20:00:00', '2017-01-03 21:00:00',
'2017-01-03 22:00:00', '2017-01-03 23:00:00',
'2017-01-04 00:00:00'],
dtype='datetime64[ns]', freq='H')
print(pd
.date_range
('2017/1/1','2017/1/4',freq
='T'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:01:00',
'2017-01-01 00:02:00', '2017-01-01 00:03:00',
'2017-01-01 00:04:00', '2017-01-01 00:05:00',
'2017-01-01 00:06:00', '2017-01-01 00:07:00',
'2017-01-01 00:08:00', '2017-01-01 00:09:00',
...
'2017-01-03 23:51:00', '2017-01-03 23:52:00',
'2017-01-03 23:53:00', '2017-01-03 23:54:00',
'2017-01-03 23:55:00', '2017-01-03 23:56:00',
'2017-01-03 23:57:00', '2017-01-03 23:58:00',
'2017-01-03 23:59:00', '2017-01-04 00:00:00'],
dtype='datetime64[ns]', length=4321, freq='T')
print(pd
.date_range
('2017/1/1','2017/1/4',freq
='S'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 00:00:01',
'2017-01-01 00:00:02', '2017-01-01 00:00:03',
'2017-01-01 00:00:04', '2017-01-01 00:00:05',
'2017-01-01 00:00:06', '2017-01-01 00:00:07',
'2017-01-01 00:00:08', '2017-01-01 00:00:09',
...
'2017-01-03 23:59:51', '2017-01-03 23:59:52',
'2017-01-03 23:59:53', '2017-01-03 23:59:54',
'2017-01-03 23:59:55', '2017-01-03 23:59:56',
'2017-01-03 23:59:57', '2017-01-03 23:59:58',
'2017-01-03 23:59:59', '2017-01-04 00:00:00'],
dtype='datetime64[ns]', length=259201, freq='S')
print(pd
.date_range
('2017/1/1','2017/2/1',freq
='W-MON'))
DatetimeIndex(['2017-01-02', '2017-01-09', '2017-01-16', '2017-01-23',
'2017-01-30'],
dtype='datetime64[ns]', freq='W-MON')
print(pd
.date_range
('2017/1/1','2017/5/1',freq
='WOM-2MON'))
DatetimeIndex(['2017-01-09', '2017-02-13', '2017-03-13', '2017-04-10'], dtype='datetime64[ns]', freq='WOM-2MON')
print(pd
.date_range
('2017/1/1','2018/1/1',freq
='M'))
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
'2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
'2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],
dtype='datetime64[ns]', freq='M')
print(pd
.date_range
('2017','2018',freq
='Q-DEC'))
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-30', '2017-12-31'], dtype='datetime64[ns]', freq='Q-DEC')
print(pd
.date_range
('2017','2018',freq
='A-DEC'))
DatetimeIndex(['2017-12-31'], dtype='datetime64[ns]', freq='A-DEC')
print(pd
.date_range
('2017','2018',freq
='BM'))
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-28',
'2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
'2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29'],
dtype='datetime64[ns]', freq='BM')
print(pd
.date_range
('2017','2018',freq
='BQ-DEC'))
DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-29', '2017-12-29'], dtype='datetime64[ns]', freq='BQ-DEC')
print(pd
.date_range
('2017','2018',freq
='BA-DEC'))
DatetimeIndex(['2017-12-29'], dtype='datetime64[ns]', freq='BA-DEC')
print(pd
.date_range
('2017','2018',freq
='MS'))
DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',
'2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01',
'2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01',
'2018-01-01'],
dtype='datetime64[ns]', freq='MS')
print(pd
.date_range
('2017','2018',freq
='QS-DEC'))
DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='QS-DEC')
print(pd
.date_range
('2017','2018',freq
='AS-DEC'))
DatetimeIndex(['2017-12-01'], dtype='datetime64[ns]', freq='AS-DEC')
print(pd
.date_range
('2017','2018',freq
='BMS'))
DatetimeIndex(['2017-01-02', '2017-02-01', '2017-03-01', '2017-04-03',
'2017-05-01', '2017-06-01', '2017-07-03', '2017-08-01',
'2017-09-01', '2017-10-02', '2017-11-01', '2017-12-01',
'2018-01-01'],
dtype='datetime64[ns]', freq='BMS')
print(pd
.date_range
('2017','2018',freq
='BQS-DEC'))
DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01'], dtype='datetime64[ns]', freq='BQS-DEC')
print(pd
.date_range
('2017','2020', freq
= 'BAS-DEC'))
DatetimeIndex(['2017-12-01', '2018-12-03', '2019-12-02'], dtype='datetime64[ns]', freq='BAS-DEC')
print(pd
.date_range
('2017/1/1','2017/4/1',freq
='7D'))
DatetimeIndex(['2017-01-01', '2017-01-08', '2017-01-15', '2017-01-22',
'2017-01-29', '2017-02-05', '2017-02-12', '2017-02-19',
'2017-02-26', '2017-03-05', '2017-03-12', '2017-03-19',
'2017-03-26'],
dtype='datetime64[ns]', freq='7D')
print(pd
.date_range
('2017/1/1','2017/1/2', freq
= '2h30min'))
DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 02:30:00',
'2017-01-01 05:00:00', '2017-01-01 07:30:00',
'2017-01-01 10:00:00', '2017-01-01 12:30:00',
'2017-01-01 15:00:00', '2017-01-01 17:30:00',
'2017-01-01 20:00:00', '2017-01-01 22:30:00'],
dtype='datetime64[ns]', freq='150T')
print(pd
.date_range
('2017','2018', freq
= '2M'))
DatetimeIndex(['2017-01-31', '2017-03-31', '2017-05-31', '2017-07-31',
'2017-09-30', '2017-11-30'],
dtype='datetime64[ns]', freq='2M')
ts
=pd
.Series
(np
.random
.rand
(4),
index
=pd
.date_range
('2017/1/1','2017/1/4'))
print(ts
)
print(ts
.asfreq
('4H'))
2017-01-01 0.121601
2017-01-02 0.813354
2017-01-03 0.258622
2017-01-04 0.816194
Freq: D, dtype: float64
2017-01-01 00:00:00 0.121601
2017-01-01 04:00:00 NaN
2017-01-01 08:00:00 NaN
2017-01-01 12:00:00 NaN
2017-01-01 16:00:00 NaN
2017-01-01 20:00:00 NaN
2017-01-02 00:00:00 0.813354
2017-01-02 04:00:00 NaN
2017-01-02 08:00:00 NaN
2017-01-02 12:00:00 NaN
2017-01-02 16:00:00 NaN
2017-01-02 20:00:00 NaN
2017-01-03 00:00:00 0.258622
2017-01-03 04:00:00 NaN
2017-01-03 08:00:00 NaN
2017-01-03 12:00:00 NaN
2017-01-03 16:00:00 NaN
2017-01-03 20:00:00 NaN
2017-01-04 00:00:00 0.816194
Freq: 4H, dtype: float64
print(ts
.asfreq
('4H',method
='ffill'))
2017-01-01 00:00:00 0.121601
2017-01-01 04:00:00 0.121601
2017-01-01 08:00:00 0.121601
2017-01-01 12:00:00 0.121601
2017-01-01 16:00:00 0.121601
2017-01-01 20:00:00 0.121601
2017-01-02 00:00:00 0.813354
2017-01-02 04:00:00 0.813354
2017-01-02 08:00:00 0.813354
2017-01-02 12:00:00 0.813354
2017-01-02 16:00:00 0.813354
2017-01-02 20:00:00 0.813354
2017-01-03 00:00:00 0.258622
2017-01-03 04:00:00 0.258622
2017-01-03 08:00:00 0.258622
2017-01-03 12:00:00 0.258622
2017-01-03 16:00:00 0.258622
2017-01-03 20:00:00 0.258622
2017-01-04 00:00:00 0.816194
Freq: 4H, dtype: float64
print(ts
.asfreq
('4H',method
='bfill'))
2017-01-01 00:00:00 0.121601
2017-01-01 04:00:00 0.813354
2017-01-01 08:00:00 0.813354
2017-01-01 12:00:00 0.813354
2017-01-01 16:00:00 0.813354
2017-01-01 20:00:00 0.813354
2017-01-02 00:00:00 0.813354
2017-01-02 04:00:00 0.258622
2017-01-02 08:00:00 0.258622
2017-01-02 12:00:00 0.258622
2017-01-02 16:00:00 0.258622
2017-01-02 20:00:00 0.258622
2017-01-03 00:00:00 0.258622
2017-01-03 04:00:00 0.816194
2017-01-03 08:00:00 0.816194
2017-01-03 12:00:00 0.816194
2017-01-03 16:00:00 0.816194
2017-01-03 20:00:00 0.816194
2017-01-04 00:00:00 0.816194
Freq: 4H, dtype: float64
ts
=pd
.Series
(np
.random
.rand
(4),
index
=pd
.date_range
('20170101','20170104'))
print(ts
)
2017-01-01 0.764262
2017-01-02 0.894735
2017-01-03 0.302417
2017-01-04 0.765669
Freq: D, dtype: float64
print(ts
.shift
(2))
2017-01-01 NaN
2017-01-02 NaN
2017-01-03 0.764262
2017-01-04 0.894735
Freq: D, dtype: float64
print(ts
.shift
(-2))
2017-01-01 0.302417
2017-01-02 0.765669
2017-01-03 NaN
2017-01-04 NaN
Freq: D, dtype: float64
per
=ts
/ts
.shift
(1)-1
print(per
)
2017-01-01 NaN
2017-01-02 0.170716
2017-01-03 -0.662003
2017-01-04 1.531832
Freq: D, dtype: float64
print(ts
.shift
(2,freq
='D'))
2017-01-03 0.764262
2017-01-04 0.894735
2017-01-05 0.302417
2017-01-06 0.765669
Freq: D, dtype: float64
print(ts
.shift
(2,freq
='T'))
2017-01-01 00:02:00 0.764262
2017-01-02 00:02:00 0.894735
2017-01-03 00:02:00 0.302417
2017-01-04 00:02:00 0.765669
Freq: D, dtype: float64
'''
【课程2.11】 Pandas时期:Period
核心:pd.Period()
'''
'\n【课程2.11】 Pandas时期:Period\n\n核心:pd.Period()\n\n'
p
=pd
.Period
('2017',freq
='M')
print(p
)
2017-01
print(p
+1)
print(p
-1)
2017-02
2016-12
print(pd
.Period
('2012',freq
='A-DEC')-1)
2011
pr
=pd
.period_range
('1/1/2017','1/1/2018',freq
='M')
print(pr
)
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='int64', freq='M')
s
=pd
.Series
(np
.random
.rand
(len(pr
)),index
=pr
)
print(s
)
2017-01 0.395536
2017-02 0.867729
2017-03 0.727422
2017-04 0.194098
2017-05 0.897134
2017-06 0.119327
2017-07 0.481213
2017-08 0.343418
2017-09 0.474208
2017-10 0.271213
2017-11 0.621978
2017-12 0.723095
2018-01 0.234442
Freq: M, dtype: float64
p
=pd
.Period
('2017','A-DEC')
print(p
)
2017
print(p
.asfreq
('M',how
='start'))
print(p
.asfreq
('D',how
='end'))
2017-01
2017-12-31
pr
=pd
.period_range
('2017','2018',freq
='M')
print(pr
)
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='int64', freq='M')
s1
=pd
.Series
(np
.random
.rand
(len(pr
)),index
=pr
)
print(s1
)
print(pd
.Series
(np
.random
.rand
(len(pr
)),index
=pr
.asfreq
('D',how
='start')))
2017-01 0.027461
2017-02 0.351679
2017-03 0.026355
2017-04 0.130524
2017-05 0.323551
2017-06 0.418091
2017-07 0.252305
2017-08 0.159283
2017-09 0.863181
2017-10 0.113546
2017-11 0.834609
2017-12 0.404364
2018-01 0.248782
Freq: M, dtype: float64
2017-01-01 0.942848
2017-02-01 0.787942
2017-03-01 0.898672
2017-04-01 0.838851
2017-05-01 0.687743
2017-06-01 0.835100
2017-07-01 0.971016
2017-08-01 0.433730
2017-09-01 0.984731
2017-10-01 0.197288
2017-11-01 0.530533
2017-12-01 0.734869
2018-01-01 0.652892
Freq: D, dtype: float64
dr
=pd
.date_range
('2017/1/1',periods
=10,freq
='M')
pr
=pd
.period_range
('2017','2018',freq
='M')
print(dr
)
print(pr
)
DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
'2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
'2017-09-30', '2017-10-31'],
dtype='datetime64[ns]', freq='M')
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
'2018-01'],
dtype='int64', freq='M')
ts1
=pd
.Series
(np
.random
.rand
(len(dr
)),index
=dr
)
print(ts1
)
print(ts1
.to_period
())
2017-01-31 0.158806
2017-02-28 0.483492
2017-03-31 0.372044
2017-04-30 0.034802
2017-05-31 0.283967
2017-06-30 0.344219
2017-07-31 0.322696
2017-08-31 0.896927
2017-09-30 0.209306
2017-10-31 0.904640
Freq: M, dtype: float64
2017-01 0.158806
2017-02 0.483492
2017-03 0.372044
2017-04 0.034802
2017-05 0.283967
2017-06 0.344219
2017-07 0.322696
2017-08 0.896927
2017-09 0.209306
2017-10 0.904640
Freq: M, dtype: float64
ts2
=pd
.Series
(np
.random
.rand
(len(pr
)),index
=pr
)
print(ts2
)
print(ts2
.to_timestamp
())
2017-01 0.045885
2017-02 0.254705
2017-03 0.010104
2017-04 0.184321
2017-05 0.080500
2017-06 0.306121
2017-07 0.186046
2017-08 0.915024
2017-09 0.612428
2017-10 0.348309
2017-11 0.969925
2017-12 0.769598
2018-01 0.482902
Freq: M, dtype: float64
2017-01-01 0.045885
2017-02-01 0.254705
2017-03-01 0.010104
2017-04-01 0.184321
2017-05-01 0.080500
2017-06-01 0.306121
2017-07-01 0.186046
2017-08-01 0.915024
2017-09-01 0.612428
2017-10-01 0.348309
2017-11-01 0.969925
2017-12-01 0.769598
2018-01-01 0.482902
Freq: MS, dtype: float64
'''
【课程2.12】 时间序列 - 索引及切片
TimeSeries是Series的一个子类,所以Series索引及数据选取方面的方法基本一样
同时TimeSeries通过时间序列有更便捷的方法做索引和切片
'''
from datetime
import datetime
dr
=pd
.date_range
('2017/1','2017/3')
s
=pd
.Series
(np
.random
.rand
(len(dr
)),index
=dr
)
print(s
.head
())
2017-01-01 0.965767
2017-01-02 0.092477
2017-01-03 0.101372
2017-01-04 0.518738
2017-01-05 0.512767
Freq: D, dtype: float64
print(s
[0])
print(s
[:5])
print(s
[::2])
0.965766677911
2017-01-01 0.965767
2017-01-02 0.092477
2017-01-03 0.101372
2017-01-04 0.518738
2017-01-05 0.512767
Freq: D, dtype: float64
2017-01-01 0.965767
2017-01-03 0.101372
2017-01-05 0.512767
2017-01-07 0.397813
2017-01-09 0.044512
2017-01-11 0.440636
2017-01-13 0.576041
2017-01-15 0.661706
2017-01-17 0.413142
2017-01-19 0.061130
2017-01-21 0.921432
2017-01-23 0.268689
2017-01-25 0.305809
2017-01-27 0.859973
2017-01-29 0.420669
2017-01-31 0.488450
2017-02-02 0.427845
2017-02-04 0.277361
2017-02-06 0.649949
2017-02-08 0.347356
2017-02-10 0.147578
2017-02-12 0.133765
2017-02-14 0.751505
2017-02-16 0.516944
2017-02-18 0.703469
2017-02-20 0.085836
2017-02-22 0.374159
2017-02-24 0.711476
2017-02-26 0.748231
2017-02-28 0.595617
Freq: 2D, dtype: float64
print(s
['2017-01-15'])
print(s
[datetime
(2017,1,1)])
0.66170600133
0.965766677911
dr
= pd
.date_range
('2017/1','2017/3',freq
='12H')
s
=pd
.Series
(np
.random
.rand
(len(dr
)),index
=dr
)
print(s
.head
())
2017-01-01 00:00:00 0.318529
2017-01-01 12:00:00 0.648512
2017-01-02 00:00:00 0.325831
2017-01-02 12:00:00 0.717528
2017-01-03 00:00:00 0.802091
Freq: 12H, dtype: float64
print(s
['2017-01-01':'2017-01-02'])
2017-01-01 00:00:00 0.318529
2017-01-01 12:00:00 0.648512
2017-01-02 00:00:00 0.325831
2017-01-02 12:00:00 0.717528
Freq: 12H, dtype: float64
dates
=pd
.DatetimeIndex
(['1/1/2015','1/2/2015','1/3/2015','1/4/2015','1/1/2015','1/2/2015'])
ts
=pd
.Series
(np
.random
.rand
(6),index
=dates
)
print(ts
)
2015-01-01 0.428691
2015-01-02 0.589544
2015-01-03 0.160050
2015-01-04 0.652073
2015-01-01 0.219171
2015-01-02 0.733021
dtype: float64
print(ts
.is_unique
)
print(ts
.index
.is_unique
)
True
False
print(ts
['2015-01-01'])
2015-01-01 0.428691
2015-01-01 0.219171
dtype: float64
print(ts
.groupby
(level
=0).mean
())
2015-01-01 0.323931
2015-01-02 0.661282
2015-01-03 0.160050
2015-01-04 0.652073
dtype: float64
'''
【课程2.13】 时间序列 - 重采样
将时间序列从一个频率转换为另一个频率的过程,且会有数据的结合
降采样:高频数据 → 低频数据,eg.以天为频率的数据转为以月为频率的数据
升采样:低频数据 → 高频数据,eg.以年为频率的数据转为以月为频率的数据
'''
dr
=pd
.date_range
('20170101',periods
=12)
ts
=pd
.Series
(np
.arange
(12),index
=dr
)
print(ts
)
2017-01-01 0
2017-01-02 1
2017-01-03 2
2017-01-04 3
2017-01-05 4
2017-01-06 5
2017-01-07 6
2017-01-08 7
2017-01-09 8
2017-01-10 9
2017-01-11 10
2017-01-12 11
Freq: D, dtype: int32
ts_re
=ts
.resample
('5D')
print(ts_re
)
DatetimeIndexResampler [freq=<5 * Days>, axis=0, closed=left, label=left, convention=start, base=0]
print(ts
.resample
('5D').sum())
2017-01-01 10
2017-01-06 35
2017-01-11 21
Freq: 5D, dtype: int32
print(ts
.resample
('5D').mean
())
2017-01-01 2.0
2017-01-06 7.0
2017-01-11 10.5
Freq: 5D, dtype: float64
print(ts
.resample
('5D').max())
2017-01-01 4
2017-01-06 9
2017-01-11 11
Freq: 5D, dtype: int32
print(ts
.resample
('5D').median
())
2017-01-01 2.0
2017-01-06 7.0
2017-01-11 10.5
Freq: 5D, dtype: float64
print(ts
.resample
('5D').first
())
2017-01-01 0
2017-01-06 5
2017-01-11 10
Freq: 5D, dtype: int32
print(ts
.resample
('5D').ohlc
(),'→ OHLC重采样\n')
open high low close
2017-01-01 0 4 0 4
2017-01-06 5 9 5 9
2017-01-11 10 11 10 11 → OHLC重采样
dr
=pd
.date_range
('20170101',periods
=12)
ts
=pd
.Series
(np
.arange
(1,13),index
=dr
)
print(ts
)
2017-01-01 1
2017-01-02 2
2017-01-03 3
2017-01-04 4
2017-01-05 5
2017-01-06 6
2017-01-07 7
2017-01-08 8
2017-01-09 9
2017-01-10 10
2017-01-11 11
2017-01-12 12
Freq: D, dtype: int32
print(ts
.resample
('5D').sum())
2017-01-01 15
2017-01-06 40
2017-01-11 23
Freq: 5D, dtype: int32
print(ts
.resample
('5D',closed
='left').sum())
2017-01-01 15
2017-01-06 40
2017-01-11 23
Freq: 5D, dtype: int32
print(ts
.resample
('5D',closed
='right').sum())
2016-12-27 1
2017-01-01 20
2017-01-06 45
2017-01-11 12
Freq: 5D, dtype: int32
print(ts
.resample
('5D', label
= 'left').sum(),'→ leftlabel\n')
print(ts
.resample
('5D', label
= 'right').sum(),'→ rightlabel\n')
2017-01-01 15
2017-01-06 40
2017-01-11 23
Freq: 5D, dtype: int32 → leftlabel
2017-01-06 15
2017-01-11 40
2017-01-16 23
Freq: 5D, dtype: int32 → rightlabel
dr
=pd
.date_range
('2017/1/1 0:0:0',periods
=5,freq
='H')
ts
=pd
.DataFrame
(np
.arange
(15).reshape
(5,3),index
=dr
,columns
=list('abc'))
print(ts
)
a b c
2017-01-01 00:00:00 0 1 2
2017-01-01 01:00:00 3 4 5
2017-01-01 02:00:00 6 7 8
2017-01-01 03:00:00 9 10 11
2017-01-01 04:00:00 12 13 14
print(ts
.resample
('15T').asfreq
())
print(ts
.resample
('15T').ffill
())
print(ts
.resample
('15T').bfill
())
a b c
2017-01-01 00:00:00 0.0 1.0 2.0
2017-01-01 00:15:00 NaN NaN NaN
2017-01-01 00:30:00 NaN NaN NaN
2017-01-01 00:45:00 NaN NaN NaN
2017-01-01 01:00:00 3.0 4.0 5.0
2017-01-01 01:15:00 NaN NaN NaN
2017-01-01 01:30:00 NaN NaN NaN
2017-01-01 01:45:00 NaN NaN NaN
2017-01-01 02:00:00 6.0 7.0 8.0
2017-01-01 02:15:00 NaN NaN NaN
2017-01-01 02:30:00 NaN NaN NaN
2017-01-01 02:45:00 NaN NaN NaN
2017-01-01 03:00:00 9.0 10.0 11.0
2017-01-01 03:15:00 NaN NaN NaN
2017-01-01 03:30:00 NaN NaN NaN
2017-01-01 03:45:00 NaN NaN NaN
2017-01-01 04:00:00 12.0 13.0 14.0
a b c
2017-01-01 00:00:00 0 1 2
2017-01-01 00:15:00 0 1 2
2017-01-01 00:30:00 0 1 2
2017-01-01 00:45:00 0 1 2
2017-01-01 01:00:00 3 4 5
2017-01-01 01:15:00 3 4 5
2017-01-01 01:30:00 3 4 5
2017-01-01 01:45:00 3 4 5
2017-01-01 02:00:00 6 7 8
2017-01-01 02:15:00 6 7 8
2017-01-01 02:30:00 6 7 8
2017-01-01 02:45:00 6 7 8
2017-01-01 03:00:00 9 10 11
2017-01-01 03:15:00 9 10 11
2017-01-01 03:30:00 9 10 11
2017-01-01 03:45:00 9 10 11
2017-01-01 04:00:00 12 13 14
a b c
2017-01-01 00:00:00 0 1 2
2017-01-01 00:15:00 3 4 5
2017-01-01 00:30:00 3 4 5
2017-01-01 00:45:00 3 4 5
2017-01-01 01:00:00 3 4 5
2017-01-01 01:15:00 6 7 8
2017-01-01 01:30:00 6 7 8
2017-01-01 01:45:00 6 7 8
2017-01-01 02:00:00 6 7 8
2017-01-01 02:15:00 9 10 11
2017-01-01 02:30:00 9 10 11
2017-01-01 02:45:00 9 10 11
2017-01-01 03:00:00 9 10 11
2017-01-01 03:15:00 12 13 14
2017-01-01 03:30:00 12 13 14
2017-01-01 03:45:00 12 13 14
2017-01-01 04:00:00 12 13 14
prng
= pd
.period_range
('2016','2017',freq
= 'M')
ts
= pd
.Series
(np
.arange
(len(prng
)), index
= prng
)
print(ts
)
print(ts
.resample
('3M').sum())
print(ts
.resample
('15D').ffill
())
2016-01 0
2016-02 1
2016-03 2
2016-04 3
2016-05 4
2016-06 5
2016-07 6
2016-08 7
2016-09 8
2016-10 9
2016-11 10
2016-12 11
2017-01 12
Freq: M, dtype: int32
2016-01-31 0
2016-04-30 6
2016-07-31 15
2016-10-31 24
2017-01-31 33
Freq: 3M, dtype: int32
2016-01-01 0
2016-01-16 0
2016-01-31 0
2016-02-15 1
2016-03-01 2
2016-03-16 2
2016-03-31 2
2016-04-15 3
2016-04-30 3
2016-05-15 4
2016-05-30 4
2016-06-14 5
2016-06-29 5
2016-07-14 6
2016-07-29 6
2016-08-13 7
2016-08-28 7
2016-09-12 8
2016-09-27 8
2016-10-12 9
2016-10-27 9
2016-11-11 10
2016-11-26 10
2016-12-11 11
2016-12-26 11
Freq: 15D, dtype: int32