Skip to content

Instantly share code, notes, and snippets.

@tapanhp
Created June 10, 2019 11:36
Show Gist options
  • Save tapanhp/8026fe6c899bd295960e0430e84dc097 to your computer and use it in GitHub Desktop.
Save tapanhp/8026fe6c899bd295960e0430e84dc097 to your computer and use it in GitHub Desktop.
Pandas utilities
import pandas as pd
from pandas import DataFrame
# Prints a single line, It's same as println() in other languages, by default print single empty line or "\n"
printline = lambda times=1 : print("\n" * times)
# function that counts NaN values for whole dataframe
__checkAllNaNs = lambda dataframe: dataframe.isnull().sum()
DataFrame.checkAllNaNs = __checkAllNaNs
# dropped a specific row where Total stop was NaN, Instead of doing other Logic I simply dropped 1 row
# Find row where Total_stops is NaN -> Locate index -> Drop index from Dataframe
__dropRowsWhereNone = lambda dataframe,column : dataframe.drop(dataframe[dataframe[column].isnull()].index) #, inplace= True
DataFrame.dropRowsWhereNone = __dropRowsWhereNone
# check total number of duplicate rows
__totalDuplicateRows = lambda dataframe : dataframe.duplicated().sum()
DataFrame.totalDuplicateRows = __totalDuplicateRows
"""
Check all the basic information about dataset
"""
def __get_dataset_overview(dataframe):
print("=> Shape of dataframe is\n")
print(dataframe.shape)
printline()
print("=> Data types of each columns\n")
print(dataframe.dtypes)
printline()
print("=> Head of the dataframe is ")
display(dataframe.head()) #https://stackoverflow.com/a/29665452 => how display works?
printline()
print("=> Basic statistics about this dataframe")
display(dataframe.describe())
DataFrame.get_dataset_overview = __get_dataset_overview
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment