Skip to content

Instantly share code, notes, and snippets.

View samukweku's full-sized avatar

Samuel Oranyeli samukweku

View GitHub Profile
@samukweku
samukweku / conditional_join.py
Last active November 30, 2022 20:10
Code for pydata global conditional_join talk
# data is from DuckDB's github repo
# https://github.com/duckdb/duckdb/tree/master/benchmark/micro/join
import pandas as pd; import numpy as np; import janitor as jn; import duckdb
query = """SELECT SETSEED(0.8675309);
CREATE TABLE events AS (
SELECT *,
"start" + INTERVAL (CASE WHEN random() < 0.1 THEN 120 ELSE (5 + round(random() * 50, 0)::BIGINT) END) MINUTE
AS "end"
FROM (
@samukweku
samukweku / wide_to_long_datatable.py
Last active April 16, 2022 14:23
Idea for wide_to_long implementation in datatable
from datatable import dt, f
from typing import Pattern, NamedTuple, Union
from collections import Counter, defaultdict
from itertools import compress, chain
import re
import numpy as np
class measure(NamedTuple):
"""reshape either with a separator or a regular expression."""
column_names:Union[str, list]
@samukweku
samukweku / pandas_hype.md
Last active November 15, 2021 06:15
Solutions to some challenges on `Dont believe the python hype...`
@samukweku
samukweku / pandas_nth.md
Last active August 2, 2021 08:51
Pandas Chaining - Second most common value

link

import pandas as pd
df = pd.DataFrame({'x': [1, 1, 1, 2, 2, 3],