Skip to content

Instantly share code, notes, and snippets.

import hmni
# Initialize a Matcher Object
matcher = hmni.Matcher(model='latin')
# Single Pair Similarity
matcher.similarity('Alan', 'Al')
# 0.6838303319889133
matcher.similarity('Alan', 'Al', prob=False)
# 1
def wiki_page(page_name):
wiki_api = wikipediaapi.Wikipedia(language='en',
extract_format=wikipediaapi.ExtractFormat.WIKI)
page_name = wiki_api.page(page_name)
if not page_name.exists():
print('Page {} does not exist.'.format(page_name))
return
page_data = pd.DataFrame({
'page': page_name,
import itertools
def prepro(pairs, filterout=None, hard_lmt=100000):
pairs['relation'] = 1
G = nx.from_pandas_edgelist(pairs, 'subject', 'object',
create_using=nx.DiGraph())
if filterout:
nodes = \
list(set(pairs[~pairs.subject_type.isin(filterout)]['subject'].tolist()
+ pairs[~pairs.object_type.isin(filterout)]['object'].tolist()))
def filter_graph(pairs, node):
k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
create_using=nx.MultiDiGraph())
edges = nx.dfs_successors(k_graph, node)
nodes = []
for k, v in edges.items():
nodes.extend([k])
nodes.extend(v)
subgraph = k_graph.subgraph(nodes)
layout = (nx.random_layout(k_graph))
import pandas as pd
import re
import spacy
import neuralcoref
nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)
def get_entity_pairs(text, coref=True):
import networkx as nx
import matplotlib.pyplot as plt
def draw_kg(pairs):
k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
create_using=nx.MultiDiGraph())
node_deg = nx.degree(k_graph)
layout = nx.spring_layout(k_graph, k=0.15, iterations=20)
plt.figure(num=None, figsize=(120, 90), dpi=80)
import wikipediaapi # pip install wikipedia-api
import pandas as pd
import concurrent.futures
from tqdm import tqdm
def wiki_scrape(topic_name, verbose=True):
def wiki_link(link):
try:
page = wiki_api.page(link)
if page.exists():
import urllib.request
from bs4 import BeautifulSoup
import spacy
import neuralcoref
nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)
html = urllib.request.urlopen('https://www.law.cornell.edu/supremecourt/text/418/683').read()
soup = BeautifulSoup(html, 'html.parser')
text = ''.join([t for t in soup.find_all(text=True) if t.parent.name == 'p' and len(t) >= 25])
@Christopher-Thornton
Christopher-Thornton / installation.sh
Last active May 7, 2020 03:00
Install Neuralcoref from source
venv .env
source .env/bin/activate
git clone https://github.com/huggingface/neuralcoref.git
cd neuralcoref
pip install -r requirements.txt
pip install -e .