Skip to content

Instantly share code, notes, and snippets.

@he7d3r
Created January 31, 2015 19:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save he7d3r/398f8000d445ccd1558f to your computer and use it in GitHub Desktop.
Save he7d3r/398f8000d445ccd1558f to your computer and use it in GitHub Desktop.
Prints a graph in graphviz syntax showing the dependencies between features and data sources of revscoring
from revscoring.features import *
from revscoring.datasources import *
features = [added_badwords_ratio, added_misspellings_ratio, badwords_added,
bytes_changed, chars_added, day_of_week_in_utc, hour_of_day_in_utc,
is_content_namespace, is_custom_comment, is_mainspace,
is_previous_user_same, is_section_comment, longest_repeated_char_added,
longest_token_added, markup_chars_added, misspellings_added,
numeric_chars_added, page_age_in_seconds, prev_badwords,
prev_misspellings, prev_words, proportion_of_badwords_added,
proportion_of_markup_added, proportion_of_misspellings_added,
proportion_of_numeric_added, proportion_of_prev_badwords,
proportion_of_prev_misspellings, proportion_of_symbolic_added,
proportion_of_uppercase_added, seconds_since_last_page_edit,
seconds_since_last_user_edit, segments_added, segments_removed,
symbolic_chars_added, uppercase_chars_added, user_age_in_seconds,
user_is_anon, user_is_bot, words_added, words_removed]
datasources = [contiguous_segments_added, contiguous_segments_removed,
first_rev_doc, first_revision_metadata, namespaces, previous_rev_doc,
previous_revision_metadata, previous_revision_text, previous_user_rev_doc,
previous_user_revision_metadata, rev_doc, revision_diff, revision_metadata,
revision_text, site_info_doc, tokens_added, tokens_removed, user_doc, user_info]
other = ['language', 'session', 'rev_id']
nodes = features + datasources + other
print('digraph G {')
print('\tlabel = "Dependencies between features and data sources of revscoring";')
print('\trankdir = "TB";')
print('\tnode [style="filled",fillcolor="palegreen"];')
for i, feature in enumerate(nodes):
if isinstance(feature, Feature):
print('\t{0} [label="{1}"];'.format(i,feature))
else:
print('\t{0} [label="{1}"fillcolor="orange"];'.format(i,feature))
for i, feature in enumerate(nodes):
if hasattr(feature, 'dependencies'):
for dependency in feature.dependencies:
print('\t{0} -> {1};'.format(i, nodes.index(dependency)))
print('}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment