Skip to content

Instantly share code, notes, and snippets.

@cydal
Created March 17, 2021 22:33
Show Gist options
  • Save cydal/fdcd2769104a776b77790cd24e1fd3ab to your computer and use it in GitHub Desktop.
Save cydal/fdcd2769104a776b77790cd24e1fd3ab to your computer and use it in GitHub Desktop.
keywords = {
"online": ['online', 'technology', 'internet', 'web','social media',
'Facebook', 'chat', 'chatroom', 'digital', 'webcam', 'cyber'],
"children": ['children', 'child', 'minor', 'minors', 'infant', 'infants',
'underage', 'under-age', 'kid', 'teenager', 'teenagers', 'adolescent',
'adolescents', 'girl', 'girls', 'boy', 'boys'],
"abuse": ['abuse', 'abusive', 'exploiting', 'exploitation', 'harrassment',
'prostitution', 'groom', 'grooming', 'predator', 'predators',
'pedophile', 'paedophile', 'maltreatment', 'trafficking', 'violence'],
"sexual": ['sex', 'sexual', 'pornography', 'pornographic']
}
# Check text for search terms
def search_terms(text):
searched = []
for each in keywords:
terms = keywords[each]
if any(term in text for term in terms):
searched.append(1)
else:
searched.append(0)
return(1 if searched == [1, 1, 1, 1] else 0)
core_df['term_positive'] = core_df['cleaned_text'].map(lambda x: search_terms(x))
# Remove rows that don't meet search criteria
core_df = core_df[core_df['term_positive'] == 1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment