Skip to content

Instantly share code, notes, and snippets.

@dominiek
Created March 25, 2010 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dominiek/343781 to your computer and use it in GitHub Desktop.
Save dominiek/343781 to your computer and use it in GitHub Desktop.
/*
* First (promising) results from my 'Semantic Categorizer' which I use to classify interests.
* This example run takes the word 'Ruby' which has 2 main meanings (programming language
* and gem stone) and tries to rank the categories of both meanings. The more recursive
* the searches, the more abstract the categories.
*
* This run is using still very crude and yet incomplete Wikipedia category hierarchies.
* It runs on a highly performing combination of MongoDB and NodeJS, but it's still
* not fast enough. Soon this system must be able to hundreds of classifications per second.
*
*/
// INPUT = 'Ruby'
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 1 2>/dev/null
// Categorization of 'Ruby', 1 lookup degrees (took 1704 ms)
[ [ 2, 'Dynamically-typed_programming_languages' ]
, [ 2, 'Programming_languages' ]
, [ 2, 'Class-based_programming_languages' ]
, [ 1, 'Dynamic_programming_languages' ]
, [ 1, 'Object-oriented_programming' ]
]
[ [ 3, 'Minerals' ]
, [ 2, 'Superhard_materials' ]
, [ 1, 'Gemstones' ]
, [ 1, 'Wealth' ]
, [ 1, 'Materials' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 2 2>/dev/null
// Categorization of 'Ruby', 2 lookup degrees (took 3731 ms)
[ [ 6, 'Programming_languages' ]
, [ 2, 'Dynamically-typed_programming_languages' ]
, [ 2, 'Programming_paradigms' ]
, [ 2, 'Object-oriented_programming' ]
, [ 2, 'Class-based_programming_languages' ]
]
[ [ 3, 'Minerals' ]
, [ 2, 'Chemical_compounds_by_element' ]
, [ 2, 'Wealth' ]
, [ 2, 'Superhard_materials' ]
, [ 1, 'Aluminium_compounds' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 3 2>/dev/null
// Categorization of 'Ruby', 3 lookup degrees (took 7425 ms)
[ [ 6, 'Programming_languages' ]
, [ 4, 'Computer_programming' ]
, [ 3, 'Computing' ]
, [ 3, 'Computer_science' ]
, [ 2, 'Programming_language_topics' ]
]
[ [ 4, 'Chemical_elements' ]
, [ 4, 'Minerals' ]
, [ 3, 'Socioeconomics' ]
, [ 3, 'Materials' ]
, [ 2, 'Chemical_compounds_by_element' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 4 2>/dev/null
// Categorization of 'Ruby', 4 lookup degrees (took 16881 ms)
[ [ 6, 'Programming_languages' ]
, [ 5, 'Computing' ]
, [ 4, 'Computer_science' ]
, [ 4, 'Computer_programming' ]
, [ 3, 'Programming_language_topics' ]
]
[ [ 9, 'Chemistry' ]
, [ 5, 'Economics' ]
, [ 4, 'Materials' ]
, [ 4, 'Socioeconomics' ]
, [ 4, 'Business' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 5 2>/dev/null
// Categorization of 'Ruby', 5 lookup degrees (took 31622 ms)
[ [ 8, 'Computing' ]
, [ 6, 'Programming_languages' ]
, [ 5, 'Software_engineering' ]
, [ 5, 'Information_technology' ]
, [ 4, 'Computer_science' ]
]
[ [ 12, 'Chemistry' ]
, [ 9, 'Society' ]
, [ 7, 'Social_sciences' ]
, [ 7, 'Business' ]
, [ 7, 'Economics' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 6 2>/dev/null
// Categorization of 'Ruby', 6 lookup degrees (took 54567 ms)
[ [ 10, 'Computing' ]
, [ 6, 'Programming_languages' ]
, [ 5, 'Software_engineering' ]
, [ 5, 'Information_technology' ]
, [ 5, 'Project_management' ]
]
[ [ 18, 'Chemistry' ]
, [ 12, 'Society' ]
, [ 11, 'Social_sciences' ]
, [ 11, 'Physics' ]
, [ 10, 'Interdisciplinary_fields' ]
]
// thegibson:db dodo$ /usr/local/node-0.1.33/bin/node categorize.js Ruby 7 2>/dev/null
// Categorization of 'Ruby', 7 lookup degrees (took 82148 ms)
[ [ 11, 'Computing' ]
, [ 8, 'Science' ]
, [ 8, 'Interdisciplinary_fields' ]
, [ 6, 'Problem_solving' ]
, [ 6, 'Science_studies' ]
]
[ [ 20, 'Chemistry' ]
, [ 19, 'Interdisciplinary_fields' ]
, [ 17, 'Physics' ]
, [ 16, 'Society' ]
, [ 14, 'Old_requests_for_peer_review' ]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment