Skip to content

Instantly share code, notes, and snippets.

import wikipediaapi # pip install wikipedia-api
import pandas as pd
import concurrent.futures
from tqdm import tqdm
def wiki_scrape(topic_name, verbose=True):
def wiki_link(link):
try:
page = wiki_api.page(link)
if page.exists():
def wiki_page(page_name):
wiki_api = wikipediaapi.Wikipedia(language='en',
extract_format=wikipediaapi.ExtractFormat.WIKI)
page_name = wiki_api.page(page_name)
if not page_name.exists():
print('Page {} does not exist.'.format(page_name))
return
page_data = pd.DataFrame({
'page': page_name,
import pandas as pd
import re
import spacy
import neuralcoref
nlp = spacy.load('en_core_web_lg')
neuralcoref.add_to_pipe(nlp)
def get_entity_pairs(text, coref=True):