Skip to content

Instantly share code, notes, and snippets.

View tomasonjo's full-sized avatar
🏠
Working from home

Tomaz Bratanic tomasonjo

🏠
Working from home
View GitHub Profile
# https://www.gutenberg.org/ebooks/95 Prisoner of Zelda
# Fetch the data
target_url = 'https://www.gutenberg.org/files/95/95-0.txt'
import urllib.request
data = urllib.request.urlopen(target_url)
raw_data = data.read().decode('utf8').strip()
# Preprocess text into chapters
import re
# Analyze the first chapter
c = chapters[0]
# Get a list of persons
doc=nlp(c)
involved = list(set([ent.text for ent in doc.ents if ent.label_=='PERSON']))
# replace names of involved in the text
# with an id and save the mapping
decode = dict()
for i,x in enumerate(involved):
# Get mapping
# Get an array of words
ws = c.split()
l = len(ws)
# Iterate through words
for wi,w in enumerate(ws):
# Skip if the word is not a person
if not w[:2] == '$$':
continue
# Check next x words for any involved person
title score
scream 1.8314404488
a nightmare on elm street: the dream child 1.3354252577
a nightmare on elm street 4: the dream master 1.1446502209
bull durham 1.1446502209
a nightmare on elm street 3: dream warriors 1.1446502209
title score
a nightmare on elm street 4: the dream master 1
a nightmare on elm street: the dream child 1
my mother dreams the satan's disciples in new york 1
a nightmare on elm street 3: dream warriors 1
title score
thx 1138 1
title score
american psycho 0.4959547818
bamboozled 0.4959547818
cast away 0.4959547818
george washington 0.4959547818
gladiator 0.4959547818
title score
a nightmare on elm street: the dream child 2.641166687
a nightmare on elm street 4: the dream master 2.2638571262
a nightmare on elm street 3: dream warriors 2.2638571262
title score
1492: conquest of paradise 1.0
15 minutes 1.0
2001: a space odyssey 1.0
48 hrs. 1.0
the fifth element 1.0
title score
a nightmare on elm street 4: the dream master 2.463479518890381
a nightmare on elm street 3: dream warriors 2.463479518890381
a nightmare on elm street: the dream child 1.2535805702209473
1492: conquest of paradise 0.1572420448064804
15 minutes 0.1572420448064804