当我尝试使用window ='1M'时,我发现user2689410代码中断,因为营业月的增量引发此错误:
AttributeError: 'MonthEnd' object has no attribute 'delta'
我添加了直接传递相对时间增量的选项,因此您可以针对用户定义的时间段执行类似的操作。
感谢您的指点,这是我的尝试-希望可以使用。
def rolling_mean(data, window, min_periods=1, center=False):
""" Function that computes a rolling mean
Reference:
http://stackoverflow.com/questions/15771472/pandas-rolling-mean-by-time-interval
Parameters
----------
data : DataFrame or Series
If a DataFrame is passed, the rolling_mean is computed for all columns.
window : int, string, Timedelta or Relativedelta
int - number of observations used for calculating the statistic,
as defined by the function pd.rolling_mean()
string - must be a frequency string, e.g. '90S'. This is
internally converted into a DateOffset object, and then
Timedelta representing the window size.
Timedelta / Relativedelta - Can directly pass a timedeltas.
min_periods : int
Minimum number of observations in window required to have a value.
center : bool
Point around which to 'center' the slicing.
Returns
-------
Series or DataFrame, if more than one column
"""
def f(x, time_increment):
"""Function to apply that actually computes the rolling mean
:param x:
:return:
"""
if not center:
start_date = x - time_increment + timedelta(0, 0, 1)
end_date = x
else:
start_date = x - time_increment/2 + timedelta(0, 0, 1)
end_date = x + time_increment/2
dslice = col[start_date:end_date]
if dslice.size < min_periods:
return np.nan
else:
return dslice.mean()
data = DataFrame(data.copy())
dfout = DataFrame()
if isinstance(window, int):
dfout = pd.rolling_mean(data, window, min_periods=min_periods, center=center)
elif isinstance(window, basestring):
time_delta = pd.datetools.to_offset(window).delta
idx = Series(data.index.to_pydatetime(), index=data.index)
for colname, col in data.iteritems():
result = idx.apply(lambda x: f(x, time_delta))
result.name = colname
dfout = dfout.join(result, how='outer')
elif isinstance(window, (timedelta, relativedelta)):
time_delta = window
idx = Series(data.index.to_pydatetime(), index=data.index)
for colname, col in data.iteritems():
result = idx.apply(lambda x: f(x, time_delta))
result.name = colname
dfout = dfout.join(result, how='outer')
if dfout.columns.size == 1:
dfout = dfout.ix[:, 0]
return dfout
并以3天时间窗计算平均值的示例:
from pandas import Series, DataFrame
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from dateutil.relativedelta import relativedelta
idx = [datetime(2011, 2, 7, 0, 0),
datetime(2011, 2, 7, 0, 1),
datetime(2011, 2, 8, 0, 1, 30),
datetime(2011, 2, 9, 0, 2),
datetime(2011, 2, 10, 0, 4),
datetime(2011, 2, 11, 0, 5),
datetime(2011, 2, 12, 0, 5, 10),
datetime(2011, 2, 12, 0, 6),
datetime(2011, 2, 13, 0, 8),
datetime(2011, 2, 14, 0, 9)]
idx = pd.Index(idx)
vals = np.arange(len(idx)).astype(float)
s = Series(vals, index=idx)
rm = rolling_mean(s, window=relativedelta(days=3))
>>> rm
Out[2]:
2011-02-07 00:00:00 0.0
2011-02-07 00:01:00 0.5
2011-02-08 00:01:30 1.0
2011-02-09 00:02:00 1.5
2011-02-10 00:04:00 3.0
2011-02-11 00:05:00 4.0
2011-02-12 00:05:10 5.0
2011-02-12 00:06:00 5.5
2011-02-13 00:08:00 6.5
2011-02-14 00:09:00 7.5
Name: 0, dtype: float64
rolling_*
函数相比,它通常会很慢。