Skip to content

Instantly share code, notes, and snippets.

@Christopher-Thornton
Last active August 1, 2021 14:15
Show Gist options
  • Save Christopher-Thornton/77d157158f643c75e2033779c1370de6 to your computer and use it in GitHub Desktop.
Save Christopher-Thornton/77d157158f643c75e2033779c1370de6 to your computer and use it in GitHub Desktop.
import itertools
def prepro(pairs, filterout=None, hard_lmt=100000):
pairs['relation'] = 1
G = nx.from_pandas_edgelist(pairs, 'subject', 'object',
create_using=nx.DiGraph())
if filterout:
nodes = \
list(set(pairs[~pairs.subject_type.isin(filterout)]['subject'].tolist()
+ pairs[~pairs.object_type.isin(filterout)]['object'].tolist()))
else:
nodes = G.nodes()
permutation = \
pd.DataFrame(list(set(itertools.islice(itertools.permutations(nodes,
2), hard_lmt)) - set(zip(pairs.subject,
pairs.object))), columns=['subject', 'object'])
permutation.insert(1, 'relation', 0)
pairs.drop(['subject_type', 'object_type'], axis=1, inplace=True)
pairs = pairs.append(permutation)
pairs['subject_vector_norm'] = pairs.apply(lambda row: \
nlp(row.subject).vector_norm, axis=1)
pairs['object_vector_norm'] = pairs.apply(lambda row: \
nlp(row.object).vector_norm, axis=1)
pairs['cosine_similarity'] = pairs.apply(lambda row: \
nlp(row.subject).similarity(nlp(row.object)), axis=1)
out_deg = nx.out_degree_centrality(G)
in_deg = nx.in_degree_centrality(G)
pairs['subject_out_centrality'] = pairs.apply(lambda row: \
out_deg[row.subject], axis=1)
pairs['object_in_centrality'] = pairs.apply(lambda row: \
in_deg[row.object], axis=1)
pagerank = nx.pagerank_scipy(G)
pairs['object_pagerank'] = pairs.apply(lambda row: \
pagerank[row.object], axis=1)
hits = nx.hits(G)
pairs['subject_hub'] = pairs.apply(lambda row: \
hits[0][row.subject], axis=1)
pairs['object_authority'] = pairs.apply(lambda row: \
hits[1][row.object], axis=1)
cols = pairs.columns.tolist()
cols.insert(len(cols), cols.pop(cols.index('relation')))
pairs = pairs.reindex(columns=cols)
return pairs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment