本文共 4322 字,大约阅读时间需要 14 分钟。
《Python for Data Analysis》
In [2]: from datetime import datetimeIn [3]: now = datetime.now()In [4]: nowOut[4]: datetime.datetime(2017, 5, 25, 13, 55, 30, 39000)
1: 利用datetime的strftime和strptime方法转换字符串和日期。
In [5]: stamp = datetime(2011, 1, 3)In [6]: str(stamp)Out[6]: '2011-01-03 00:00:00'In [7]: stamp.strftime('%Y-%m-%d')Out[7]: '2011-01-03'In [10]: datetime.strptime('2011-01-03', '%Y-%m-%d')Out[10]: datetime.datetime(2011, 1, 3, 0, 0)In [11]: datestrs = ['7/6/2011', '8/6/2011']In [12]: [datetime.strptime(x,'%m/%d/%Y') for x in datestrs]Out[12]: [datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]
2: dateutil可以解析几乎所有人类能够理解的日期表示形式(中文不行)。
In [13]: from dateutil.parser import parseIn [14]: parse('2011-01-03')Out[14]: datetime.datetime(2011, 1, 3, 0, 0)# 在国际通用格式,日通常在月的前面In [15]: parse('6/12/2011', dayfirst=True)Out[15]: datetime.datetime(2011, 12, 6, 0, 0)In [16]: parse('6/12/2011')Out[16]: datetime.datetime(2011, 6, 12, 0, 0)In [17]: parse('Jan 31, 1998 10:45 PM')Out[17]: datetime.datetime(1998, 1, 31, 22, 45)
3: pandas.to_datetime()方法,通常用于处理++成组日期++,
In [22]: datestrs = ['7/6/2011', '8/6/2011']In [23]: pd.to_datetime(datestrs)Out[23]: DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)
处理缺失值:
idx = pd.to_datetime(datestrs + [None])print idxprint idx[2]print pd.isnull(idx)DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)NaT[False False True]
In [11]: from datetime import datetime ...: dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), ...: datetime(2011, 1, 7), datetime(2011, 1, 8), ...: datetime(2011, 1, 10), datetime(2011, 1, 12)] ...: ts = pd.Series(np.random.randn(6), index=dates) ...: ts ...:Out[11]:2011-01-02 0.0929082011-01-05 0.2817462011-01-07 0.7690232011-01-08 1.2464352011-01-10 1.0071892011-01-12 -1.296221dtype: float64In [12]: type(ts)Out[12]: pandas.core.series.SeriesIn [13]: ts.indexOut[13]:DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08', '2011-01-10', '2011-01-12'], dtype='datetime64[ns]', freq=None)In [14]: ts.index[0]Out[14]: Timestamp('2011-01-02 00:00:00')
In [15]: ts[::2]Out[15]:2011-01-02 0.0929082011-01-07 0.7690232011-01-10 1.007189dtype: float64In [16]: ts + ts[::2]Out[16]:2011-01-02 0.1858162011-01-05 NaN2011-01-07 1.5380452011-01-08 NaN2011-01-10 2.0143792011-01-12 NaNdtype: float64
In [17]: stamp = ts.index[2] ...: ts[stamp] ...:Out[17]: 0.76902256761183874In [18]: ts['1/10/2011']Out[18]: 1.0071893575830049In [19]: ts['20110110']Out[19]: 1.0071893575830049
In [20]: ts['1/6/2011':'1/11/2011']Out[20]:2011-01-07 0.7690232011-01-08 1.2464352011-01-10 1.007189dtype: float64In [21]: ts.truncate(after='1/9/2011')Out[21]:2011-01-02 0.0929082011-01-05 0.2817462011-01-07 0.7690232011-01-08 1.246435dtype: float64In [22]: ts[datetime(2011, 1, 7):]Out[22]:2011-01-07 0.7690232011-01-08 1.2464352011-01-10 1.0071892011-01-12 -1.296221dtype: float64
In [23]: longer_ts = pd.Series(np.random.randn(1000), ...: index=pd.date_range('1/1/2000', periods=1000))In [24]: longer_tsOut[24]:2000-01-01 0.274992 ...2002-09-25 0.8841112002-09-26 -0.608506Freq: D, dtype: float64In [25]: longer_ts['2001']Out[25]:2001-01-01 -1.308228 ...2001-12-31 -0.502678Freq: D, dtype: float64In [26]: longer_ts['2001-05']Out[26]:2001-05-01 1.489410 ...2001-05-31 -0.241235Freq: D, dtype: float64
In [27]: dates = pd.date_range('1/1/2000', periods=100, freq='W-WED') ...: long_df = pd.DataFrame(np.random.randn(100, 4), ...: index=dates, ...: columns=['Colorado', 'Texas', ...: 'New York', 'Ohio']) ...: long_df.loc['5-2001'] ...:Out[27]: Colorado Texas New York Ohio2001-05-02 0.927335 1.513906 0.538600 1.2737682001-05-09 0.667876 -0.969206 1.676091 -0.8176492001-05-16 0.050188 1.951312 3.260383 0.9633012001-05-23 1.201206 -1.852001 2.406778 0.8411762001-05-30 -0.749181 -2.989741 -1.295289 -1.690195
转载地址:http://hjoji.baihongyu.com/