Skip to content

Instantly share code, notes, and snippets.


cs95 Coldsp33d

  • Mountain View, CA
View GitHub Profile
import pandas as pd
import numpy as np
import perfplot
from string import ascii_lowercase as LOWER, ascii_uppercase as UPPER
import random
# Note: The copy() calls are needed here because `pop()` mutates the dataframe inplace
# so it is essential to make a copy() we don't want to mutate the output across runs
def apply_drop(df):
return df.join(df['val'].apply(pd.Series),).drop('val', axis=1)
import pandas as pd
import numpy as np
import perfplot
import numba
def with_df_loc(df):[0, 'b'] = 5
for i in range(1, len(df)):[i, 'b'] = ([i - 1, 'a'] +[i - 1, 'b']) / 2
import pandas as pd
import numpy as np
import perfplot
def slice(df):
return df[::-1]
def slice_loc(df):
return df.loc[::-1]
View read_clipboard beginner's guide [DRAFT]
## Beginner's Guide to `pd.read_clipboard`
[`read_clipboard`]( is truly a saving grace for anyone starting out to answer questions in the [tag:pandas] tag. Unfortunately, pandas veterans also know that the data provided in questions isn't always easy to grok into a terminal due to various complication such as MultiIndexes, spaces in header names, datetimes, and python objects.
Thankfully, `read_clipboard` has arguments that make handling most of these cases possible (and easy). The purpose of this answer is to document some of those cases in finer details.
### Spaces in column headers
Coldsp33d / inplace_funcs.txt
Last active Dec 9, 2019
List of functions that support in-place modification
View inplace_funcs.txt
# Reference:
DataFrame.set_index (with drop=False wouldn't change the data, but that doesn't seem the main use case)
Coldsp33d /
Created Jul 31, 2019
Partially automated comment flagging on Stack Overflow.
import requests
import re
import pandas as pd
import html
import time
import sys
client_id = '15705'
import pandas as pd
import perfplot
def numpy_where(df):
return df.assign(is_rich=np.where(df['salary'] >= 50, 'yes', 'no'))
def list_comp(df):
return df.assign(is_rich=['yes' if x >= 50 else 'no' for x in df['salary']])
def loc(df):
def load_data(datafile, encoder=None):
data = pd.read_csv(datafile, header=0, low_memory=False)
data_y = data[['job_performance']]
data_x = data.drop(['job_performance'], axis=1)
data_x.replace([np.inf, -np.inf], np.nan, inplace=True)
data_x.fillna(data_x.mean(), inplace=True)
if not encoder:
Coldsp33d /
Created Jun 27, 2019
Interleave two or more lists
from itertools import chain
import perfplot
def cs1(l):
def _cs1(l):
for i, x in enumerate(l, 1):
yield x
yield f'{x}_{i}'
return list(_cs1(l))
Coldsp33d /
Last active Apr 11, 2021
Benchmarking different methods for creating empty dataframes from scratch
import pandas as pd
import perfplot
def append(n):
df = pd.DataFrame(columns=['A', 'B', 'C'])
for _ in range(n):
df = df.append({'A': 1, 'B': 12.3, 'C': 'xyz'}, ignore_index=True) # yuck
return df
def list_append(n):