Created
January 31, 2015 19:44
-
-
Save he7d3r/398f8000d445ccd1558f to your computer and use it in GitHub Desktop.
Prints a graph in graphviz syntax showing the dependencies between features and data sources of revscoring
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from revscoring.features import * | |
from revscoring.datasources import * | |
features = [added_badwords_ratio, added_misspellings_ratio, badwords_added, | |
bytes_changed, chars_added, day_of_week_in_utc, hour_of_day_in_utc, | |
is_content_namespace, is_custom_comment, is_mainspace, | |
is_previous_user_same, is_section_comment, longest_repeated_char_added, | |
longest_token_added, markup_chars_added, misspellings_added, | |
numeric_chars_added, page_age_in_seconds, prev_badwords, | |
prev_misspellings, prev_words, proportion_of_badwords_added, | |
proportion_of_markup_added, proportion_of_misspellings_added, | |
proportion_of_numeric_added, proportion_of_prev_badwords, | |
proportion_of_prev_misspellings, proportion_of_symbolic_added, | |
proportion_of_uppercase_added, seconds_since_last_page_edit, | |
seconds_since_last_user_edit, segments_added, segments_removed, | |
symbolic_chars_added, uppercase_chars_added, user_age_in_seconds, | |
user_is_anon, user_is_bot, words_added, words_removed] | |
datasources = [contiguous_segments_added, contiguous_segments_removed, | |
first_rev_doc, first_revision_metadata, namespaces, previous_rev_doc, | |
previous_revision_metadata, previous_revision_text, previous_user_rev_doc, | |
previous_user_revision_metadata, rev_doc, revision_diff, revision_metadata, | |
revision_text, site_info_doc, tokens_added, tokens_removed, user_doc, user_info] | |
other = ['language', 'session', 'rev_id'] | |
nodes = features + datasources + other | |
print('digraph G {') | |
print('\tlabel = "Dependencies between features and data sources of revscoring";') | |
print('\trankdir = "TB";') | |
print('\tnode [style="filled",fillcolor="palegreen"];') | |
for i, feature in enumerate(nodes): | |
if isinstance(feature, Feature): | |
print('\t{0} [label="{1}"];'.format(i,feature)) | |
else: | |
print('\t{0} [label="{1}"fillcolor="orange"];'.format(i,feature)) | |
for i, feature in enumerate(nodes): | |
if hasattr(feature, 'dependencies'): | |
for dependency in feature.dependencies: | |
print('\t{0} -> {1};'.format(i, nodes.index(dependency))) | |
print('}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment