Created
March 13, 2018 04:50
-
-
Save joelgrus/9c5749369e5f048bc68c943f74197f29 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
a response to https://gist.github.com/TomAugspurger/f96d41ae7a40d41fe9053d0adbceb4f1 | |
""" | |
from collections import deque | |
import datetime | |
import itertools | |
import random | |
import string | |
from typing import NamedTuple, Tuple, Iterable, List | |
class Row(NamedTuple): | |
"""A row contains a timestamp, a letter, and a float between 0 and 1""" | |
a: datetime.datetime | |
b: str | |
c: float | |
# Generate 100 hourly timestamps. | |
t0 = datetime.datetime(2018, 1, 1) | |
hour = datetime.timedelta(hours=1) | |
hourly = [t0 + i * hour for i in range(100)] | |
aa = [t for t in hourly for _ in range(10)] | |
bb = [random.choice(string.ascii_letters[:10]) for _ in range(1000)] | |
cc = [random.random() for _ in range(1000)] | |
# This corresponds to your dataframe. | |
data = [Row(a, b, c) for a, b, c in zip(aa, bb, cc)] | |
def hour12(dt: datetime.datetime) -> datetime.datetime: | |
"""Round down to the nearest 12 hours""" | |
return dt - datetime.timedelta(hours=dt.hour % 12) | |
def key(row: Row) -> Tuple[str, datetime.datetime]: | |
"""Key by the value of b and then by hour12 of a""" | |
return row.b, hour12(row.a) | |
# Sort before calling itertools.groupby | |
sorted_data = sorted(data, key=key) | |
grouped_data = itertools.groupby(sorted_data, key) | |
def rolling_mean(group: Iterable[Row], n: int) -> List[float]: | |
"""Rolling mean of n values, with None when there's not enough""" | |
result = [] | |
q = deque(maxlen=n) | |
for row in group: | |
q.append(row.c) | |
if len(q) == n: | |
result.append(sum(q) / n) | |
else: | |
result.append(None) | |
return result | |
result = [(b, h12, rolling_mean(group, 4)) | |
for (b, h12), group in grouped_data] | |
print(result[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment