Created
September 5, 2020 13:52
-
-
Save manmohan24nov/0ae15381db74956ccf5a1ab1752e5d67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
from matplotlib import cm | |
from matplotlib.pyplot import plt | |
nlp = spacy.load('en_core_web_sm') | |
ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]} | |
location = [] | |
person = [] | |
date = [] | |
quantity = [] | |
organisation = [] | |
def ner_text(text): | |
doc = nlp(text) | |
ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]} | |
for ent in doc.ents: | |
if str(ent.label_) == "GPE": | |
ner_collection['Location'].append(ent.text) | |
location.append(ent.text) | |
elif str(ent.label_) == "DATE": | |
ner_collection['Date'].append(ent.text) | |
person.append(ent.text) | |
elif str(ent.label_) == "PERSON": | |
ner_collection['Person'].append(ent.text) | |
date.append(ent.text) | |
elif str(ent.label_) == "ORG": | |
ner_collection['Organisation'].append(ent.text) | |
quantity.append(ent.text) | |
elif str(ent.label_) == "QUANTITY": | |
ner_collection['Quantity'].append(ent.text) | |
organisation.append(ent.text) | |
else: | |
continue | |
return ner_collection | |
articles_word_limit['ner_data'] = articles_word_limit['text'].map(lambda x: ner_text(x)) | |
location_name = [] | |
location_count = [] | |
for i in location_counts.most_common()[:10]: | |
location_name.append(i[0].upper()) | |
location_count.append(i[1]) | |
fig, ax = plt.subplots(figsize=(15, 8), dpi=100) | |
ax.barh(location_name, location_count, alpha=0.7, | |
# width = 0.5, | |
color=cm.Blues([i / 0.00525 for i in [ 0.00208, 0.00235, 0.00281, 0.00317, 0.00362, | |
0.00371, 0.00525, 0.00679, 0.00761, 0.00833]]) | |
) | |
plt.rcParams.update({'font.size': 10}) | |
rects = ax.patches | |
for i, label in enumerate(location_count): | |
ax.text(label+100 , i, str(label), size=10, ha='center', va='center') | |
ax.text(0, 1.02, 'Count of Location name Extracted from Reuters Articles', | |
transform=ax.transAxes, size=12, weight=600, color='#777777') | |
ax.xaxis.set_ticks_position('bottom') | |
ax.tick_params(axis='y', colors='black', labelsize=12) | |
ax.set_axisbelow(True) | |
ax.text(0, 1.08, 'TOP 10 Location Mention in Reuters Articles', | |
transform=ax.transAxes, size=22, weight=600, ha='left') | |
ax.text(0, -0.1, 'Source: http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html', | |
transform=ax.transAxes, size=12, weight=600, color='#777777') | |
ax.spines['right'].set_visible(False) | |
ax.spines['top'].set_visible(False) | |
ax.spines['left'].set_visible(False) | |
ax.spines['bottom'].set_visible(False) | |
plt.tick_params(axis='y',which='both', left=False, top=False, labelbottom=False) | |
ax.set_xticks([]) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment