tapanhp/pandas_utils

## pandas_utils
import pandas as pd
from pandas import DataFrame


# Prints a single line, It's same as println() in other languages, by default print single empty line or "\n"
printline = lambda times=1 : print("\n" * times)


# function that counts NaN values for whole dataframe
__checkAllNaNs = lambda dataframe: dataframe.isnull().sum()
DataFrame.checkAllNaNs = __checkAllNaNs


# dropped a specific row where Total stop was NaN, Instead of doing other Logic I simply dropped 1 row
# Find row where Total_stops is NaN -> Locate index -> Drop index from Dataframe
__dropRowsWhereNone = lambda dataframe,column : dataframe.drop(dataframe[dataframe[column].isnull()].index) #, inplace= True
DataFrame.dropRowsWhereNone = __dropRowsWhereNone


# check total number of duplicate rows
__totalDuplicateRows = lambda dataframe : dataframe.duplicated().sum()
DataFrame.totalDuplicateRows = __totalDuplicateRows


"""
    Check all the basic information about dataset
"""
def __get_dataset_overview(dataframe):
    print("=> Shape of dataframe is\n")
    print(dataframe.shape)
    printline()
    print("=> Data types of each columns\n")
    print(dataframe.dtypes)
    printline()
    print("=> Head of the dataframe is ")
    display(dataframe.head()) #https://stackoverflow.com/a/29665452 => how display works?
    printline()
    print("=> Basic statistics about this dataframe")
    display(dataframe.describe())

DataFrame.get_dataset_overview = __get_dataset_overview
	import pandas as pd
	from pandas import DataFrame


	# Prints a single line, It's same as println() in other languages, by default print single empty line or "\n"
	printline = lambda times=1 : print("\n" * times)


	# function that counts NaN values for whole dataframe
	__checkAllNaNs = lambda dataframe: dataframe.isnull().sum()
	DataFrame.checkAllNaNs = __checkAllNaNs


	# dropped a specific row where Total stop was NaN, Instead of doing other Logic I simply dropped 1 row
	# Find row where Total_stops is NaN -> Locate index -> Drop index from Dataframe
	__dropRowsWhereNone = lambda dataframe,column : dataframe.drop(dataframe[dataframe[column].isnull()].index) #, inplace= True
	DataFrame.dropRowsWhereNone = __dropRowsWhereNone


	# check total number of duplicate rows
	__totalDuplicateRows = lambda dataframe : dataframe.duplicated().sum()
	DataFrame.totalDuplicateRows = __totalDuplicateRows


	"""
	Check all the basic information about dataset
	"""
	def __get_dataset_overview(dataframe):
	print("=> Shape of dataframe is\n")
	print(dataframe.shape)
	printline()
	print("=> Data types of each columns\n")
	print(dataframe.dtypes)
	printline()
	print("=> Head of the dataframe is ")
	display(dataframe.head()) #https://stackoverflow.com/a/29665452 => how display works?
	printline()
	print("=> Basic statistics about this dataframe")
	display(dataframe.describe())

	DataFrame.get_dataset_overview = __get_dataset_overview