Skip to content

Instantly share code, notes, and snippets.

View gautierdag's full-sized avatar
🇺🇦

Gautier Dagan gautierdag

🇺🇦
View GitHub Profile
@gautierdag
gautierdag / shift_compress.py
Created July 15, 2020 08:53
Shift Date Compression
cols = [d for d in df.columns if d != "date"]
compressed_df = df.loc[~(df[cols].shift() == df[cols]).all(axis=1)].reset_index(drop=True)
@gautierdag
gautierdag / reduce_compress.py
Created July 15, 2020 08:51
Reduce Date Compression
from functools import reduce
values = df.values
new_values = []
cols = [df.columns.get_loc(d) for d in df.columns if d != "date"]
new_values.append(values[0])
def compress_helper(x, y):
if (x[cols] == y[cols]).all():
return x
@gautierdag
gautierdag / naive_compression.py
Created July 15, 2020 08:49
Naive Date Compression
# set first (oldest balance as index entry)
new_indexes = [0]
# choose columns that are not the date column
cols = [d for d in df.columns if d != "date"]
# iterate over rows
for i, row in df.iterrows():
if i == 0: #skip 0 index since it has no previous balance
continue
# if not all values of the previous row/date is not equal to current - then we have new observation
if not (df.iloc[i-1][cols] ==row[cols]).all():
@gautierdag
gautierdag / generate_random_series.py
Last active July 15, 2020 08:26
Generating a random series
from random import randint
import pandas as pd
num_observations = 500
dates = pd.to_datetime([f"{randint(2018, 2020)}-{randint(1, 12)}-{randint(1, 27)}"
for _ in range(num_observations)]).unique()
df = pd.DataFrame({"date": dates,
"balance": [randint(0, 10000)
for _ in range(len(dates))]
})
@gautierdag
gautierdag / plaid_models.py
Created May 31, 2020 17:35
Plaid Python Pydantic Models
import datetime
from typing import Optional, List
from pydantic import BaseModel
class PlaidBalance(BaseModel):
current: float
available: Optional[float]
iso_currency_code: Optional[str]
limit: Optional[float]