This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Load the library and make a basic map | |
library(leaflet) | |
leaflet() %>% addTiles() | |
#Show a map with a satellite picture on it | |
leaflet() %>% | |
addTiles(urlTemplate="http://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}") | |
#Make a demo fake data set |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def filter_graph(pairs, node): | |
k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object', | |
create_using=nx.MultiDiGraph()) | |
edges = nx.dfs_successors(k_graph, node) | |
nodes = [] | |
for k, v in edges.items(): | |
nodes.extend([k]) | |
nodes.extend(v) | |
subgraph = k_graph.subgraph(nodes) | |
layout = (nx.random_layout(k_graph)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
import matplotlib.pyplot as plt | |
def draw_kg(pairs): | |
k_graph = nx.from_pandas_edgelist(pairs, 'subject', 'object', | |
create_using=nx.MultiDiGraph()) | |
node_deg = nx.degree(k_graph) | |
layout = nx.spring_layout(k_graph, k=0.15, iterations=20) | |
plt.figure(num=None, figsize=(120, 90), dpi=80) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wikipediaapi # pip install wikipedia-api | |
import pandas as pd | |
import concurrent.futures | |
from tqdm import tqdm | |
def wiki_scrape(topic_name, verbose=True): | |
def wiki_link(link): | |
try: | |
page = wiki_api.page(link) | |
if page.exists(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
import spacy | |
import neuralcoref | |
nlp = spacy.load('en_core_web_lg') | |
neuralcoref.add_to_pipe(nlp) | |
def entity_pairs(text, coref=True): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wikipediaapi | |
import pandas as pd | |
def wiki_page(page_name): | |
wiki_api = wikipediaapi.Wikipedia(language='en', | |
extract_format=wikipediaapi.ExtractFormat.WIKI) | |
page_name = wiki_api.page(page_name) | |
if not page_name.exists(): | |
print('page does not exist') | |
return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# put all selection together | |
feature_selection_df = pd.DataFrame({'Feature':feature_name, 'Pearson':cor_support, 'Chi-2':chi_support, 'RFE':rfe_support, 'Logistics':embeded_lr_support, | |
'Random Forest':embeded_rf_support, 'LightGBM':embeded_lgb_support}) | |
# count the selected times for each feature | |
feature_selection_df['Total'] = np.sum(feature_selection_df, axis=1) | |
# display the top 100 | |
feature_selection_df = feature_selection_df.sort_values(['Total','Feature'] , ascending=False) | |
feature_selection_df.index = range(1, len(feature_selection_df)+1) | |
feature_selection_df.head(num_feats) |