Skip to content

Instantly share code, notes, and snippets.

@manmohan24nov
Created September 5, 2020 13:52
Show Gist options
  • Save manmohan24nov/0ae15381db74956ccf5a1ab1752e5d67 to your computer and use it in GitHub Desktop.
Save manmohan24nov/0ae15381db74956ccf5a1ab1752e5d67 to your computer and use it in GitHub Desktop.
import spacy
from matplotlib import cm
from matplotlib.pyplot import plt
nlp = spacy.load('en_core_web_sm')
ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]}
location = []
person = []
date = []
quantity = []
organisation = []
def ner_text(text):
doc = nlp(text)
ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]}
for ent in doc.ents:
if str(ent.label_) == "GPE":
ner_collection['Location'].append(ent.text)
location.append(ent.text)
elif str(ent.label_) == "DATE":
ner_collection['Date'].append(ent.text)
person.append(ent.text)
elif str(ent.label_) == "PERSON":
ner_collection['Person'].append(ent.text)
date.append(ent.text)
elif str(ent.label_) == "ORG":
ner_collection['Organisation'].append(ent.text)
quantity.append(ent.text)
elif str(ent.label_) == "QUANTITY":
ner_collection['Quantity'].append(ent.text)
organisation.append(ent.text)
else:
continue
return ner_collection
articles_word_limit['ner_data'] = articles_word_limit['text'].map(lambda x: ner_text(x))
location_name = []
location_count = []
for i in location_counts.most_common()[:10]:
location_name.append(i[0].upper())
location_count.append(i[1])
fig, ax = plt.subplots(figsize=(15, 8), dpi=100)
ax.barh(location_name, location_count, alpha=0.7,
# width = 0.5,
color=cm.Blues([i / 0.00525 for i in [ 0.00208, 0.00235, 0.00281, 0.00317, 0.00362,
0.00371, 0.00525, 0.00679, 0.00761, 0.00833]])
)
plt.rcParams.update({'font.size': 10})
rects = ax.patches
for i, label in enumerate(location_count):
ax.text(label+100 , i, str(label), size=10, ha='center', va='center')
ax.text(0, 1.02, 'Count of Location name Extracted from Reuters Articles',
transform=ax.transAxes, size=12, weight=600, color='#777777')
ax.xaxis.set_ticks_position('bottom')
ax.tick_params(axis='y', colors='black', labelsize=12)
ax.set_axisbelow(True)
ax.text(0, 1.08, 'TOP 10 Location Mention in Reuters Articles',
transform=ax.transAxes, size=22, weight=600, ha='left')
ax.text(0, -0.1, 'Source: http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html',
transform=ax.transAxes, size=12, weight=600, color='#777777')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
plt.tick_params(axis='y',which='both', left=False, top=False, labelbottom=False)
ax.set_xticks([])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment