Skip to content

Instantly share code, notes, and snippets.

@brunomsantiago
Created April 28, 2021 22:49
Show Gist options
  • Save brunomsantiago/77bd56fbba4e962752f9a0862f622a28 to your computer and use it in GitHub Desktop.
Save brunomsantiago/77bd56fbba4e962752f9a0862f622a28 to your computer and use it in GitHub Desktop.
String search on pandas dataframe
import numpy as np
import pandas as pd
from random import choice, randint
names = ['David', 'James', 'John', 'Michael', 'Richard', 'Robert', 'William']
surnames = ['Davis', 'Jones', 'Lee', 'Miller', 'Moore', 'Smith', 'Taylor']
colors = ['Black', 'Blue', 'Green', 'Purple', 'Red', 'White', 'Yellow']
pets = ['', 'Dog', 'Cat', 'Bird', 'Fish', 'Bunny', 'Hamster', 'Guinea Pig']
def random_person():
person = {}
person['First Name'] = choice(names)
person['Last Name'] = choice(surnames)
person['Year of Birth'] = randint(1970, 2000)
person['Favorite Color'] = choice(colors)
person['Pet'] = choice(pets)
return person
def row_to_string(row):
return (' '.join(row.values.astype(str))).lower()
def search_df(df, query):
queries = query.lower().split(' ')
strings = df.apply(row_to_string, axis=1)
masks = [strings.str.contains(word) for word in queries]
final_mask = np.column_stack(masks).all(axis=1)
return df[final_mask]
data = [random_person() for _ in range(20)]
df = pd.DataFrame(data).sort_values(by='Year of Birth').reset_index(drop=True)
query = 'taylor 197'
df2 = search_df(df, query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment