Skip to content

Instantly share code, notes, and snippets.

@linuskohl
Created June 26, 2020 20:02
Show Gist options
  • Save linuskohl/6b06b30a98017580460d63c3c7743c1f to your computer and use it in GitHub Desktop.
Save linuskohl/6b06b30a98017580460d63c3c7743c1f to your computer and use it in GitHub Desktop.
# Load similarities
cui_similarities = pd.read_csv("cui_pairings_out.csv", header=None, names=["cui_0","cui_1","lch","path","wup"])
# Build index for faster access
cui_similarities_reverse = cui_similarities.copy()
cui_similarities_reverse.rename(columns={"cui_0": "cui_1", "cui_1": "cui_0"}, inplace=True)
cui_table = pd.concat([cui_similarities, cui_similarities_reverse], sort=False)
cui_table.set_index(["cui_0","cui_1"], inplace=True)
cui_table = cui_table.sort_index(level='cui_1')
cui_table = cui_table.sort_index()
cui_table = cui_table[~cui_table.index.duplicated(keep='first')]
# Clean values
cui_table['lch'] = pd.to_numeric(cui_table['lch'], errors='coerce')
cui_table['path'] = pd.to_numeric(cui_table['path'], errors='coerce')
cui_table['wup'] = pd.to_numeric(cui_table['wup'], errors='coerce')
cui_table.dropna(inplace=True)
# Drop all rows without valid distance
cui_table = cui_table[(cui_table.lch != -1) | (cui_table.path != -1) | (cui_table.wup != -1)]
# Scale similarities to range [0,1]
cui_table['path'] = minmax_scale(cui_table['path'])
cui_table['lch'] = minmax_scale(cui_table['lch'])
cui_table['wup'] = minmax_scale(cui_table['wup'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment