Christopher-Thornton

## hmni_usage.py
import hmni

# Initialize a Matcher Object
matcher = hmni.Matcher(model='latin')

# Single Pair Similarity
matcher.similarity('Alan', 'Al')
# 0.6838303319889133
matcher.similarity('Alan', 'Al', prob=False)
# 1

## wikipedia_scrape_page.py
def wiki_page(page_name):
    wiki_api = wikipediaapi.Wikipedia(language='en',
            extract_format=wikipediaapi.ExtractFormat.WIKI)
    page_name = wiki_api.page(page_name)
    if not page_name.exists():
        print('Page {} does not exist.'.format(page_name))
        return

    page_data = pd.DataFrame({
        'page': page_name,

## prepro.py
import itertools

def prepro(pairs, filterout=None, hard_lmt=100000):
    pairs['relation'] = 1
    G = nx.from_pandas_edgelist(pairs, 'subject', 'object',
                                create_using=nx.DiGraph())
    if filterout:
        nodes = \
            list(set(pairs[~pairs.subject_type.isin(filterout)]['subject'].tolist()
                 + pairs[~pairs.object_type.isin(filterout)]['object'].tolist()))

## filter_graph.py
def filter_graph(pairs, node):
    k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
            create_using=nx.MultiDiGraph())
    edges = nx.dfs_successors(k_graph, node)
    nodes = []
    for k, v in edges.items():
        nodes.extend([k])
        nodes.extend(v)
    subgraph = k_graph.subgraph(nodes)
    layout = (nx.random_layout(k_graph))

## get_entity_pairs.py
import pandas as pd
import re
import spacy
import neuralcoref

nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)


def get_entity_pairs(text, coref=True):

## draw_kg.py
import networkx as nx
import matplotlib.pyplot as plt


def draw_kg(pairs):
    k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
            create_using=nx.MultiDiGraph())
    node_deg = nx.degree(k_graph)
    layout = nx.spring_layout(k_graph, k=0.15, iterations=20)
    plt.figure(num=None, figsize=(120, 90), dpi=80)

## wikipedia_scrape.py
import wikipediaapi  # pip install wikipedia-api
import pandas as pd
import concurrent.futures
from tqdm import tqdm

def wiki_scrape(topic_name, verbose=True):
    def wiki_link(link):
        try:
            page = wiki_api.page(link)
            if page.exists():

## neuralcoref_ex.py
import urllib.request
from bs4 import BeautifulSoup
import spacy
import neuralcoref
nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)

html = urllib.request.urlopen('https://www.law.cornell.edu/supremecourt/text/418/683').read()
soup = BeautifulSoup(html, 'html.parser')
text = ''.join([t for t in soup.find_all(text=True) if t.parent.name == 'p' and len(t) >= 25])

## installation.sh
venv .env
source .env/bin/activate
git clone https://github.com/huggingface/neuralcoref.git
cd neuralcoref
pip install -r requirements.txt
pip install -e .
	import hmni

	# Initialize a Matcher Object
	matcher = hmni.Matcher(model='latin')

	# Single Pair Similarity
	matcher.similarity('Alan', 'Al')
	# 0.6838303319889133
	matcher.similarity('Alan', 'Al', prob=False)
	# 1
	def wiki_page(page_name):
	wiki_api = wikipediaapi.Wikipedia(language='en',
	extract_format=wikipediaapi.ExtractFormat.WIKI)
	page_name = wiki_api.page(page_name)
	if not page_name.exists():
	print('Page {} does not exist.'.format(page_name))
	return

	page_data = pd.DataFrame({
	'page': page_name,
	import itertools

	def prepro(pairs, filterout=None, hard_lmt=100000):
	pairs['relation'] = 1
	G = nx.from_pandas_edgelist(pairs, 'subject', 'object',
	create_using=nx.DiGraph())
	if filterout:
	nodes = \
	list(set(pairs[~pairs.subject_type.isin(filterout)]['subject'].tolist()
	+ pairs[~pairs.object_type.isin(filterout)]['object'].tolist()))
	def filter_graph(pairs, node):
	k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
	create_using=nx.MultiDiGraph())
	edges = nx.dfs_successors(k_graph, node)
	nodes = []
	for k, v in edges.items():
	nodes.extend([k])
	nodes.extend(v)
	subgraph = k_graph.subgraph(nodes)
	layout = (nx.random_layout(k_graph))
	import pandas as pd
	import re
	import spacy
	import neuralcoref

	nlp = spacy.load('en_core_web_lg')
	neuralcoref.add_to_pipe(nlp)


	def get_entity_pairs(text, coref=True):
	import networkx as nx
	import matplotlib.pyplot as plt


	def draw_kg(pairs):
	k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object',
	create_using=nx.MultiDiGraph())
	node_deg = nx.degree(k_graph)
	layout = nx.spring_layout(k_graph, k=0.15, iterations=20)
	plt.figure(num=None, figsize=(120, 90), dpi=80)
	import wikipediaapi # pip install wikipedia-api
	import pandas as pd
	import concurrent.futures
	from tqdm import tqdm

	def wiki_scrape(topic_name, verbose=True):
	def wiki_link(link):
	try:
	page = wiki_api.page(link)
	if page.exists():
	import urllib.request
	from bs4 import BeautifulSoup
	import spacy
	import neuralcoref
	nlp = spacy.load('en_core_web_lg')
	neuralcoref.add_to_pipe(nlp)

	html = urllib.request.urlopen('https://www.law.cornell.edu/supremecourt/text/418/683').read()
	soup = BeautifulSoup(html, 'html.parser')
	text = ''.join([t for t in soup.find_all(text=True) if t.parent.name == 'p' and len(t) >= 25])
	venv .env
	source .env/bin/activate
	git clone https://github.com/huggingface/neuralcoref.git
	cd neuralcoref
	pip install -r requirements.txt
	pip install -e .