t-redactyl/Resolutions matching.py

## Resolutions matching.py
# Import the relevant packages
import numpy as np
import re

# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(weight|fit|exercise|gym|muscle|health|water|smoking|alcohol|drinking|walk|run|swim)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(business|job|career|professional|study|learn|develop|advance|grades|school|university| read|study|skill|education)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Mental Wellbeing'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(positive|enjoy|happy|happiness|stress|depress|anxi|organised|organized|hobb|fun|psychologist|psychiatrist|sleep|meditate)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Finances'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(save|saving|debt|credit|money|invest|wast|finance|frugal|\$)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Relationships'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(relationship|friend|boyfriend|girlfriend|fiance|husband|wife|engaged|wedding|married|pregnant|child|kid|family|parent|father|dad|mother|mom|mum|brother|sister|dog|cat)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Travel and Holidays'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(travel|trip|holiday|vacation|country|foreign|overseas|abroad)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

# Find cases where tweets fall into multiple categories and delete them
twitter_df['Number of resolutions'] = twitter_df[col_list].sum(axis=1)
twitter_df = twitter_df.drop(twitter_df[twitter_df['Number of resolutions'] > 1].index)
	# Import the relevant packages
	import numpy as np
	import re

	# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
	twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(weight\|fit\|exercise\|gym\|muscle\|health\|water\|smoking\|alcohol\|drinking\|walk\|run\|swim)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(business\|job\|career\|professional\|study\|learn\|develop\|advance\|grades\|school\|university\| read\|study\|skill\|education)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Mental Wellbeing'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(positive\|enjoy\|happy\|happiness\|stress\|depress\|anxi\|organised\|organized\|hobb\|fun\|psychologist\|psychiatrist\|sleep\|meditate)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Finances'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(save\|saving\|debt\|credit\|money\|invest\|wast\|finance\|frugal\|\$)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Relationships'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(relationship\|friend\|boyfriend\|girlfriend\|fiance\|husband\|wife\|engaged\|wedding\|married\|pregnant\|child\|kid\|family\|parent\|father\|dad\|mother\|mom\|mum\|brother\|sister\|dog\|cat)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Travel and Holidays'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(travel\|trip\|holiday\|vacation\|country\|foreign\|overseas\|abroad)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	# Find cases where tweets fall into multiple categories and delete them
	twitter_df['Number of resolutions'] = twitter_df[col_list].sum(axis=1)
	twitter_df = twitter_df.drop(twitter_df[twitter_df['Number of resolutions'] > 1].index)