Skip to content

Instantly share code, notes, and snippets.

@lewiuberg
Last active December 28, 2020 09:48
Show Gist options
  • Save lewiuberg/f75c6ff2c764709a105e565800f602de to your computer and use it in GitHub Desktop.
Save lewiuberg/f75c6ff2c764709a105e565800f602de to your computer and use it in GitHub Desktop.
Active Airport code 01
def unpack_list(lst: list) -> str:
"""
Divides a list or string of list items up, and separates them with commas.
Args:
lst (list/str): Data passed into the function.
Returns:
str: det input data seperated by commas.
"""
if not isinstance(lst, str):
lst = [str(item) for item in lst]
if len(lst) == 0:
return
if len(lst) == 1:
return ", ".join(lst)
if len(lst) == 2:
return ", and ".join(lst)
else:
first_part = lst[:-1]
last_part = lst[-1]
return ", ".join(first_part) + ", and " + last_part
def word_search(df, *words) -> None:
"""
Word search in a pandas dataframe.
Will print out the amount the word(s) are found.
Args:
df (DataFrame): The dataframe to be searched.
words (str/list): The word(s) that is searched for.
"""
if not words or len(words[0]) < 1:
return
col_count: int = 0
sum_words: int = 0
found_words: str = []
if isinstance(words[0], str):
words = [word for word in words]
else:
words = list(*words)
print(words)
for word in words:
col_count = 0
sum_word = 0
for column in df:
if df[column].dtype == object or df[column].dtype == str:
col_count += 1
sum_word += df[column].str.contains(f"^{word}$").sum()
if df[column].str.contains(f"^{word}$").any():
if word not in found_words:
found_words.append(word)
sum_words += sum_word
if len(found_words) == 0:
found_words = words
print("Columns of dtype str or object:", col_count)
print(
f"Instances of {unpack_list(found_words)} in the dataframe: {sum_words}"
)
def find_missing_values(df) -> None:
"""
Finds the amount of missing values in a Pandas DataFrame.
Args:
df (DataFrame): The dataframe being searched for missing values.
"""
column_names = (df.columns[df.isnull().any() is True]).format()
miss_columns = df.isna().any().sum()
miss_values = df.isna().sum().sum()
print(f"Instances of missing data: {miss_values}")
print(f"Columns with missing data: {miss_columns}")
print(f"Column names with missing data: {unpack_list(column_names)}")
def path_checker(path: str) -> bool:
"""
Print to the user if the path exists, is a file or a directory.
Args:
path (str): The path to check.
Returns:
bool: True/False
"""
from pathlib import Path
path = Path(path)
if path.exists():
if path.is_dir():
print(f"'{path}' is directory")
else:
if path.is_file():
print(f"'{path}' is a file")
return True
else:
print(f"'{path}' does not exist.")
return False
def df_location_data(df, search_col):
"""
Add columns with geographical location data based on location name or address.
Args:
df (DataFrame): The database holding the name/address column.
search_col (Series): The column holding the name/address.
Returns:
DataFrame: The original DataFrame with location data columns included.
"""
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
geolocator = Nominatim(user_agent="my_geocoder")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=.1)
# Find the location.
df['location'] = df[search_col].apply(geocode)
# Extract point to its own columns.
df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)
# Split point column into latitude, longitude and altitude columns.
df[['latitude', 'longitude',
'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)
return df
def missing_location(df):
"""
Isolates rows, columns or both, containing missing values.
Args:
df (DataFrame): [description]
Returns:
DataFrame: Missing columns, Missing rows, and their intersection.
"""
col_criteria = df.isnull().any(axis=0)
miss_col = df[col_criteria.index[col_criteria]]
miss_only = miss_col[miss_col.isnull().any(axis=1)]
row_criteria = df.isnull().any(axis=1)
miss_row = df[row_criteria]
return miss_col, miss_row, miss_only
def replace_df_ax_name(df, find, replace_with="", axis=0):
"""
Rename rows or columns. May also be used to reformat columns/rows to datetime.
Args:
df (DataFrame): The base dataframe.
find (str): The (sub)string one wants to change.
replace_with (str, optional): The (sub)string one wants to change to. Defaults to "".
axis (int, optional): Rows(0), columns(1). Defaults to 0.
Returns:
DataFrame: Renamed/reformatted dataframe.
"""
import pandas as pd
dff = df.copy()
if axis == 1: # <-- Columns
dff = dff.T
dff_row = dff.index.to_list()
dff_dict = {i: dff_row[i] for i in range(len(dff_row))}
change_index: list = []
change_dict: dict = {}
for i, v in dff_dict.items():
if find in v:
change_index.append(i)
if replace_with == "d_to_datetime":
v = pd.to_datetime(v)
else:
v = v.replace(find, replace_with)
change_dict[i] = v
dff_dict.update(change_dict)
dff.index = list(dff_dict.values())
if axis == 1: # <-- Columns
dff = dff.T
return dff
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment