JarrydWannenburg/get_org_counts.py

## get_org_counts.py
# Create a function to get a count of the top n organizations mentioned in the article with counts
def get_org_counts(text):
    # Remove linebreaks from the text
    text = text.replace("\n"," paragraph break ")
    doc = nlp(text)

    # Loop through the doc object and extract ORG (organization) entities
    res = []
    for ent in doc.ents:
        if ent.label_ == 'ORG':
            res.append(ent.text.lower().replace("'s", "").replace(", inc.", "")) # some text standardization

    # The word company is extracted a lot but is useless, so remove this.
    res = [i for i in res if i != 'company']

    # Create a dictionary that counts the number of times a word is mentioned
# https://stackoverflow.com/questions/61712565/count-words-in-a-list-and-add-them-to-a-dictionary-along-with-number-of-occurre
    word_count = {}
    for item in res:
        if item in word_count:
            word_count[item] += 1

        else:
            word_count[item] = 1

    # Return a sorted dictionary of the org counts
    n=3
    top_n_org = dict(sorted(word_count.items(), key= lambda x: x[1], reverse=True)[:n])

    return(top_n_org)
	# Create a function to get a count of the top n organizations mentioned in the article with counts
	def get_org_counts(text):
	# Remove linebreaks from the text
	text = text.replace("\n"," paragraph break ")
	doc = nlp(text)

	# Loop through the doc object and extract ORG (organization) entities
	res = []
	for ent in doc.ents:
	if ent.label_ == 'ORG':
	res.append(ent.text.lower().replace("'s", "").replace(", inc.", "")) # some text standardization

	# The word company is extracted a lot but is useless, so remove this.
	res = [i for i in res if i != 'company']

	# Create a dictionary that counts the number of times a word is mentioned
	# https://stackoverflow.com/questions/61712565/count-words-in-a-list-and-add-them-to-a-dictionary-along-with-number-of-occurre
	word_count = {}
	for item in res:
	if item in word_count:
	word_count[item] += 1

	else:
	word_count[item] = 1

	# Return a sorted dictionary of the org counts
	n=3
	top_n_org = dict(sorted(word_count.items(), key= lambda x: x[1], reverse=True)[:n])

	return(top_n_org)