gracecarrillo/POS helper function

## POS helper function
#------------ FEATURE ENGINEERING ----------------#

#--- Part of Speech Tags (POS)--#

nltk.download('averaged_perceptron_tagger')

pos_family = {
    'NOUN' : ['NN','NNS','NNP'], # Removed 'NNPS'
    'PRON' : ['PRP','PRP$','WP','WP$'],
    'VERB' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
    'ADJ' :  ['JJ','JJR','JJS'],
    'ADV' : ['RB','RBR','RBS','WRB']
}

def count_pos_tag(tweets):
  '''
  Takes string of text to:
  1. Processes text and attaches POS tags
  2. Input the dictionary of POS tags into a Counter.
  2. Returns list of POS tags with occurrence number '''
  total_count = []
  for s in tweets:
    partial_count = {}
    s = s.split()
    count_pos = Counter(dict(nltk.pos_tag(s)).values())

    for item, value in count_pos.items():
      partial_count[item] = partial_count.get(item, 0) + 1

    total_count.append(partial_count)

  return total_count

# Apply to your data

# Retrieve POS tags with occurrence
total_count = count_pos_tag(train.tidy_tweet.values)
	#------------ FEATURE ENGINEERING ----------------#

	#--- Part of Speech Tags (POS)--#

	nltk.download('averaged_perceptron_tagger')

	pos_family = {
	'NOUN' : ['NN','NNS','NNP'], # Removed 'NNPS'
	'PRON' : ['PRP','PRP$','WP','WP$'],
	'VERB' : ['VB','VBD','VBG','VBN','VBP','VBZ'],
	'ADJ' : ['JJ','JJR','JJS'],
	'ADV' : ['RB','RBR','RBS','WRB']
	}

	def count_pos_tag(tweets):
	'''
	Takes string of text to:
	1. Processes text and attaches POS tags
	2. Input the dictionary of POS tags into a Counter.
	2. Returns list of POS tags with occurrence number '''
	total_count = []
	for s in tweets:
	partial_count = {}
	s = s.split()
	count_pos = Counter(dict(nltk.pos_tag(s)).values())

	for item, value in count_pos.items():
	partial_count[item] = partial_count.get(item, 0) + 1

	total_count.append(partial_count)

	return total_count

	# Apply to your data

	# Retrieve POS tags with occurrence
	total_count = count_pos_tag(train.tidy_tweet.values)