This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from pandas.core.dtypes.common import ( | |
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype | |
) | |
from pandas.core.indexes.datetimes import DatetimeIndex | |
def describe_by_type(dataframe): | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def show_null(dataframe): | |
""" | |
prints the number and percentage of null values in each column | |
:param dataframe: a pandas DataFrame | |
:return: None | |
""" | |
if dataframe.isnull().sum().sum() == 0: | |
print('No null in the dataframe.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def find_duplicated(dataframe, show=True, sort=False): | |
""" | |
prints out information on duplicate rows | |
:param dataframe: a pandas DataFrame | |
:param show: boolean; if True, the duplicated rows (if any) are shown | |
:param sort: boolean; if True, the duplicated rows are sorted by each column | |
of the dataframe | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pandas.core.dtypes.common import ( | |
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype | |
) | |
def category_counts(dataframe, max_nunique=20, numeric=False, datetime=False): | |
""" | |
prints value counts for each (categorical) column | |
:param dataframe: a pandas DataFrame |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from pandas.core.dtypes.common import ( | |
is_numeric_dtype, is_datetime64_dtype, is_bool_dtype | |
) | |
from pandas.core.indexes.datetimes import DatetimeIndex | |
def eda(dataframe, head=True, info=True, describe=True, duplicated=True, | |
dup_kwd={}): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import os | |
import time | |
# put your GA directory here, e.g. '/home/fay/code/GA/DSI' | |
CWD = '' | |
# put the path to a .gitignore template here | |
# e.g. '/home/fay/code/GA/DSI/projects/West-Nile-Virus-Prediction/.gitignore' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import scipy.stats as stats | |
def scatter_plot_with_linear_fit(x, y, slope=None, y_intercept=None): | |
""" | |
:param x: an array | |
:param y: an array |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from functools import partial | |
def corr_heatmap(dataframe, cmap=sns.diverging_palette(220, 10, as_cmap=True), | |
**kwargs): | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
def append_to_csv(csvfile, entry): | |
""" | |
append a row to a csv file | |
:param csvfile: the path to a .csv file | |
:param entry: a list representing a row in the .csv file | |
:return: None | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import os | |
import time | |
import smtplib | |
from email.message import EmailMessage | |
# put your GA directory here, e.g. '/home/fay/code/GA/DSI' | |
CWD = '' |