Skip to content

Instantly share code, notes, and snippets.

View Ailuropoda1864's full-sized avatar

Jingfei Cai-Pincus Ailuropoda1864

View GitHub Profile
@Ailuropoda1864
Ailuropoda1864 / auto_git_pull_to_gmail.py
Created October 8, 2017 00:37
automatically check for updates in GitHub repos; if there is update, pull from upstream and send notification to Gmail
import subprocess
import os
import time
import smtplib
from email.message import EmailMessage
# put your GA directory here, e.g. '/home/fay/code/GA/DSI'
CWD = ''
@Ailuropoda1864
Ailuropoda1864 / append_to_csv.py
Created September 8, 2017 06:44
append a row to a csv file
import csv
def append_to_csv(csvfile, entry):
"""
append a row to a csv file
:param csvfile: the path to a .csv file
:param entry: a list representing a row in the .csv file
:return: None
"""
@Ailuropoda1864
Ailuropoda1864 / corr_heatmap.py
Created September 8, 2017 06:15
A wrapper function for seaborn.heatmap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from functools import partial
def corr_heatmap(dataframe, cmap=sns.diverging_palette(220, 10, as_cmap=True),
**kwargs):
"""
@Ailuropoda1864
Ailuropoda1864 / scatter_plot_with_linear_fit.py
Last active September 8, 2017 06:08
A function to plot a scatter plot of two variables plus a linearly fitted line.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
def scatter_plot_with_linear_fit(x, y, slope=None, y_intercept=None):
"""
:param x: an array
:param y: an array
@Ailuropoda1864
Ailuropoda1864 / auto_git_pull.py
Last active October 8, 2017 00:26
automatically check for updates in GitHub repos, and pull from upstream if there is update
import subprocess
import os
import time
# put your GA directory here, e.g. '/home/fay/code/GA/DSI'
CWD = ''
# put the path to a .gitignore template here
# e.g. '/home/fay/code/GA/DSI/projects/West-Nile-Virus-Prediction/.gitignore'
@Ailuropoda1864
Ailuropoda1864 / eda.py
Created September 8, 2017 00:21
exploratory data analysis performed on a pandas DataFrame
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import (
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype
)
from pandas.core.indexes.datetimes import DatetimeIndex
def eda(dataframe, head=True, info=True, describe=True, duplicated=True,
dup_kwd={}):
@Ailuropoda1864
Ailuropoda1864 / category_counts.py
Last active September 6, 2017 19:06
prints value counts for each (categorical) column
import pandas as pd
from pandas.core.dtypes.common import (
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype
)
def category_counts(dataframe, max_nunique=20, numeric=False, datetime=False):
"""
prints value counts for each (categorical) column
:param dataframe: a pandas DataFrame
@Ailuropoda1864
Ailuropoda1864 / find_duplicated.py
Created September 6, 2017 17:52
a function that prints out information on duplicate rows in a pandas DataFrame
import pandas as pd
def find_duplicated(dataframe, show=True, sort=False):
"""
prints out information on duplicate rows
:param dataframe: a pandas DataFrame
:param show: boolean; if True, the duplicated rows (if any) are shown
:param sort: boolean; if True, the duplicated rows are sorted by each column
of the dataframe
"""
@Ailuropoda1864
Ailuropoda1864 / show_null.py
Created September 6, 2017 16:12
This function prints the number and percentage of null values in each column in a pandas DataFrame.
import pandas as pd
def show_null(dataframe):
"""
prints the number and percentage of null values in each column
:param dataframe: a pandas DataFrame
:return: None
"""
if dataframe.isnull().sum().sum() == 0:
print('No null in the dataframe.')
@Ailuropoda1864
Ailuropoda1864 / describe_by_type.py
Last active September 6, 2017 04:47
Customization of the describe method for pandas dataframe; prints descriptions of all columns (grouped by numeric, datetime, boolean, and others) and DatetimeIndex (if any)
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import (
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype
)
from pandas.core.indexes.datetimes import DatetimeIndex
def describe_by_type(dataframe):
"""