Created
June 10, 2019 11:36
-
-
Save tapanhp/8026fe6c899bd295960e0430e84dc097 to your computer and use it in GitHub Desktop.
Pandas utilities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pandas import DataFrame | |
# Prints a single line, It's same as println() in other languages, by default print single empty line or "\n" | |
printline = lambda times=1 : print("\n" * times) | |
# function that counts NaN values for whole dataframe | |
__checkAllNaNs = lambda dataframe: dataframe.isnull().sum() | |
DataFrame.checkAllNaNs = __checkAllNaNs | |
# dropped a specific row where Total stop was NaN, Instead of doing other Logic I simply dropped 1 row | |
# Find row where Total_stops is NaN -> Locate index -> Drop index from Dataframe | |
__dropRowsWhereNone = lambda dataframe,column : dataframe.drop(dataframe[dataframe[column].isnull()].index) #, inplace= True | |
DataFrame.dropRowsWhereNone = __dropRowsWhereNone | |
# check total number of duplicate rows | |
__totalDuplicateRows = lambda dataframe : dataframe.duplicated().sum() | |
DataFrame.totalDuplicateRows = __totalDuplicateRows | |
""" | |
Check all the basic information about dataset | |
""" | |
def __get_dataset_overview(dataframe): | |
print("=> Shape of dataframe is\n") | |
print(dataframe.shape) | |
printline() | |
print("=> Data types of each columns\n") | |
print(dataframe.dtypes) | |
printline() | |
print("=> Head of the dataframe is ") | |
display(dataframe.head()) #https://stackoverflow.com/a/29665452 => how display works? | |
printline() | |
print("=> Basic statistics about this dataframe") | |
display(dataframe.describe()) | |
DataFrame.get_dataset_overview = __get_dataset_overview |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment