Skip to content

Instantly share code, notes, and snippets.

@toby-p
Created September 6, 2019 18:19
Show Gist options
  • Save toby-p/6c0b4fb4897d7bd584de022763883b92 to your computer and use it in GitHub Desktop.
Save toby-p/6c0b4fb4897d7bd584de022763883b92 to your computer and use it in GitHub Desktop.
Search for strings in iterables and Pandas DataFrames
def str_search(*substrings: str, iterable, exact_match: bool = False) -> list:
"""Case-insensitive search of an iterable for substrings.
Args:
substrings (str): strings to search for in the iterable.
iterable (list, tuple, etc): iterable containing string objects to be
searched.
exact_match (bool): if True only return a single value that exactly
matches the substring supplied (therefore only works if 1 substring
arg is supplied). Otherwise returns list of all partial matches.
"""
lower_to_real = {str.lower(i): i for i in iterable if isinstance(i, str)}
strings = [str.lower(s) for s in substrings if isinstance(s, str)]
if exact_match:
try:
match = [v for k, v in lower_to_real.items() if all([s == k for s in strings])][0]
except IndexError:
return list()
else:
match = [v for k, v in lower_to_real.items() if all([s in k for s in strings])]
return match
def str_search_df(*keywords, df):
"""Search all columns of a Pandas DataFrame for strings matching keywords.
"""
mask = (~df.index.isin(list(df.index)))
for col in df.columns:
matches = str_search(*keywords, iterable=df[col].astype(str))
if len(matches):
mask = mask | (df[col].isin(matches))
return df[mask]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment