Last active
August 3, 2017 14:38
-
-
Save joelbecker/ccac03fdc5715b302a04510fdb816ebb to your computer and use it in GitHub Desktop.
Some example code for upcoming data fusion tools in recordlinkage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Initialize | |
fuse = rl.FuseLinks(unique_a=False, unique_b=False) | |
# Queue inclusion of non-conflicting columns | |
fuse.keep(['dfa_col_1', 'dfa_col_2', 'dfa_col_3'], ['dfb_col_1', 'dfb_col_2', 'dfb_col_3']) | |
# Queue conflict resolution jobs | |
fuse.no_gossiping('col1', 'col2', name='no_gossip') | |
fuse.roll_the_dice('col1', 'col2', name='random') | |
fuse.trust_your_friends('col1', 'col2', trusted='b', name='trust_b') | |
fuse.trust_your_friends('col1', 'col2', trusted='a', name='trust_a') | |
fuse.pass_it_on('col1', 'col2', name='group') | |
fuse.cry_with_the_wolves('col1', 'col2', name='wolves') | |
fuse.meet_in_the_middle('col1', 'col2', 'sum', name='sum') | |
fuse.meet_in_the_middle('col1', 'col2', 'mean', name='mean') | |
fuse.meet_in_the_middle('col1', 'col2', 'stdev', name='stdev') | |
fuse.meet_in_the_middle('col1', 'col2', 'variance', name='var') | |
# Perform conflict resolution and data fusion | |
fused = fuse.fuse(comp.vectors, comp.df_a, comp.df_b) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment