This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Create sample pandas.Series to calculate frequency | |
s = pd.Series(pd.date_range("2021", freq="18H", periods=1000)) | |
# Calculate frequency grouping by month | |
# If you want another period check following link: | |
# https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases | |
freq_count = s.dt.to_period("M").value_counts(sort=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# The function shows `str` for dates, but could be datetime.date objects as well | |
def n_date_intervals(start_date: str, end_date: str, intervals_count: int) -> pd.Series: | |
return pd.Series(pd.date_range(start_date, end_date, periods=intervals_count)) | |
# Change this variables as needed | |
start_date = "2018-01-01" | |
end_date = "2021-03-24" | |
intervals_count = 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- You can count the rows to be equal to columns that you're checking | |
SELECT * | |
FROM information_schema.columns | |
WHERE table_schema = '<table_schema>' | |
AND table_name = '<table_name>' | |
AND column_name IN ('<column1>', '<column2>'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import partial | |
def print_n_dict_elem(n, dictionary, elem): | |
print(n * dictionary[elem]) | |
# Create a new function that print elem 3 times | |
print_3_dict_elem = partial(print_n_dict_elem, n=3, dictionary={"a": "A", "b": "B"}) | |
print_3_dict_elem(elem="a") | |
# Update dictionary passed as an argument |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Remember to install more_itertools first | |
# pip install more-itertools | |
from more_itertools import grouper | |
def do_something_with_iterable(iterable): | |
pass | |
n = 3 | |
chunks = grouper('a'*10, n) | |
for c in chunks: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.DataFrame({"A": [0, 1, 0], "B": ["a", "b", "c"], "C": [1, 2, 3]}) | |
df.set_index(["A", "C"], drop=False, inplace=True) | |
result = {level: df.xs(level).to_dict("index") for level in df.index.levels[0]} | |
print(result[0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
utc_now_pd = pd.Timestamp.utcnow() | |
# The function round(freq="D") is magic behind the round up | |
# Use replace(tzinfo=None) to remove timezone information | |
utc_now_ceil = utc_now_pd.round(freq="D").to_pydatetime().replace(tzinfo=None) | |
# Convert to ISO format | |
utc_now_str = utc_now_ceil.strftime("%Y-%m-%dT%H:%M:%S") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
df = pd.concat([pd.read_csv(f, compression="gzip") for f in os.listdir() if f.endswith(".gz")], ignore_index=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
df = pd.DataFrame({"A": [1, 2, 3], "B": [1.2, np.NaN, 3.4]}) | |
result = ( | |
df | |
.replace([np.nan], [None], regex=False) | |
.to_dict(orient="records") | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Problem: you don't see all the schemas when querying PG_TABLE_DEF | |
-- Solution: | |
-- 1. First check if the schema you're trying to query is on the search path | |
show search_path; | |
-- 2. Add the missing one(s) to the search path (imagine the result was only public and you're missing data_warehouse and matching) | |
set search_path to '$user', public, data_warehouse, matching; -- No matter which is your user, use '$user' |
OlderNewer