Skip to content

Instantly share code, notes, and snippets.

@zackster
Last active October 19, 2017 03:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zackster/24f04d1e18754694cd13457f81db7f2b to your computer and use it in GitHub Desktop.
Save zackster/24f04d1e18754694cd13457f81db7f2b to your computer and use it in GitHub Desktop.
For the benefit of https://codefor.cash subscribers – automatic matching of skills to online job ads
def expand_keyword_list(keyword_list)
synonym_list = {
'rails': ['ror', 'ruby on rails', 'rails'],
'html': ['html5', 'html', 'html4'],
'css': ['css3', 'css', 'cascading style sheets', 'cascading stylesheets'],
'node': ['node', 'nodejs', 'node.js', 'node js'],
'javascript': ['javascript', 'java script', 'js'],
' unity ': [' unity ', 'unity3d', 'unity 3d']
}
synonym_list.each do |k, v|
v = v.concat([k.to_s])
if v.any?{|syn| keyword_list.include?(syn)}
keyword_list = keyword_list.concat(v)
end
end
keyword_list.uniq!
keyword_list.dup.each do |kw|
# Anything with a .?JS => the version without the .js or js at the end
if kw =~ /\.?js$/
keyword_list = keyword_list.concat(kw.split(/\.?js/))
end
# Anything with a space => remove the spaces, or join with dashes
if kw =~ /\s+\S+$/
split_kw = kw.split(/\s+/)
keyword_list = keyword_list.append(split_kw.join(''))
keyword_list = keyword_list.append(split_kw.join('-'))
end
# Anything with a DB => the word without the DB
if kw =~ /db$/
keyword_list << kw[0..-3]
end
end
keyword_list.uniq!
php_frameworks = ['laravel', 'phalcon', 'symfony', 'codeigniter', 'cakephp',
'zend', 'fuelphp', 'slim', 'phpixie', 'fat-free', 'aura', 'silverstripe', 'wordpress']
ruby_frameworks = ['sinatra', 'rails']
python_frameworks = ['growler', 'cherrypy', 'morepath', 'turbogears2', 'circuits', 'watson-framework', 'pycnic', 'webcore', 'reahl', 'django', 'flask', 'tornado', 'falcon', 'hug', 'sanic', 'aiohttp', 'pyramid', 'tensorflow', 'requests', 'simplejson', 'sql alchemy', 'scrapy', 'matplotlib', 'scikit-learn', 'scikit', 'pygame', 'arrow', 'wxpython', 'beautiful soup', 'natural language toolkit', 'nltk', 'twisted', 'pyglet', 'nose', 'peewee', 'scapy', 'numpy', 'scipy', 'ipython', 'sympy']
javascript_frameworks = ['es6', 'coffeescript', 'typescript', 'elm', 'react', 'angular', 'ember', 'vue', 'backbone', 'redux', 'mobx', 'relay', 'firebase', 'graphql', 'apollo', 'falcor', 'horizon', 'meteor', 'feathersjs', 'donejs', 'mern', 'mean', 'mocha', 'jasmine', 'enzyme', 'jest', 'ava', 'aphrodite', 'webpack', 'grunt', 'gulp', 'browserify', 'bower', 'react native', 'cordova', 'phonegap', 'nativescript']
java_frameworks = ['spring', 'struts', 'hibernate', 'jsf', 'java ee', 'vaadin', 'google web toolkit', 'gwt', 'grails', 'flexive', 'jspx', 'openxava', 'crawler4j', 'apache nutch', 'hk2', 'dagger', 'appfuse', 'jlisa', 'drools', 'easy rules', 'jbpm', 'jeddict', 'actframework', 'activiti', 'akka', 'android plot', 'apache accumulo', 'apache activemq', 'apache avalon', 'apache avro', 'apache axis', 'apache blur', 'apache bookkeeper', 'apache camel', 'apache cayenne', 'apache click', 'apache cocoon', 'apache commons', 'apache crunch', 'apache cxf', 'apache datafu', 'apache empire db', 'apache felix', 'apache flume', 'apache geronimo', 'apache giraph', 'apache hadoop', 'apache hbase', 'apache hive', 'apache jackrabbit', 'apache javanlp', 'apache jena', 'apache kafka', 'apache log4j', 'apache lucene', 'apache mahout', 'apache mesos', 'apache mina', 'apache oodt', 'apache oozie', 'apache opennlp', 'apache pdfbox', 'apache pig', 'apache pivot', 'apache poi', 'apache qpid', 'apache river (jini)', 'apache samza', 'apache shiro', 'apache sling', 'apache solr', 'apache spark', 'apache storm', 'apache struts', 'apache tapestry', 'apache tika', 'apache tomcat', 'apache turbine', 'apache uima', 'apache usergrid', 'apache velocity', 'apache vxquery', 'apache wink', 'apache xerces', 'apache zookeeper', 'axon', 'barracuda', 'beads', 'birt', 'bigfaceless', 'biojava', 'bluecove', 'bouncy castle cryptographic', 'cascading', 'checker', 'cogcompnlp', 'codename one', 'controlsfx', 'deeplearning4j', 'directwebremoting', 'dropwizard jersey', 'eclipselink', 'ehcache', 'ejml', 'facebook4j', 'fmj', 'frame4j', 'freemarker template', 'gcviewer', 'geoapi', 'geotools', 'glassfish', 'google gson', 'google guava', 'google guice', 'google web toolkit (gwt)', 'gstreamer', 'gxt', 'hibernate', 'hsqldb', 'ibatis', 'infinispan', 'itext', 'jackcess', 'jackson', 'java collections framework', 'java media framework', 'javassist', 'javers', 'jaxp', 'jboss seam', 'jcabi', 'jdom', 'jello', 'jersey', 'jetty', 'jfreechart', 'jidesoft', 'jmock', 'jmonkeyengine', 'joda time', 'jogamp', 'jooby', 'jppf', 'jprofiler', 'jrockit', 'jsf', 'jsonlib', 'jsoup', 'jsyn', 'jts topology', 'junit', 'liquibase', 'logback', 'lombok', 'loopj', 'lwjgl', 'mapdb', 'mockito', 'mybatis', 'nd4j', 'netty', 'neuroph', 'ninja', 'opencsv', 'oracle weblogic', 'orientdb', 'ormlite', 'pi4j', 'play', 'primefaces', 'quartz', 'quasar', 'rabbitmq', 'ratpack', 'reactor', 'resteasy', 'restfb', 'restlet', 'sax', 'scribejava', 'selenide', 'selenium', 'slf4j', 'slick2d', 'smack', 'sonarlint', 'sonarqube', 'jasper reports', 'spock', 'spring amqp', 'spring android', 'spring aop(aspect oriented programming)', 'spring batch', 'spring boot', 'spring core', 'spring data', 'spring data jdbc extensions', 'spring data mongodb', 'spring data neo4j', 'spring hateoas', 'spring integration', 'spring ldap', 'spring mvc', 'spring roo', 'spring security', 'spring session', 'spring social', 'spring xd', 'stormpath', 'stringtemplate', 'suanshu', 'testng', 'thymeleaf', 'twitter4j', 'vaadin', 'vertx', 'vraptor', 'webfirmframework', 'wildfly', 'wordcram', 'wso2', 'wso2 api manager', 'wso2 app manager', 'wso2 esb', 'wso2 governance registry', 'wso2 identity server', 'wso2 machine learner', 'wso2 message broker', 'xuggler', 'zkoss', 'prova', 'openrules', 'jruleengine', 'jess', 'accumulo', 'activemq', 'avalon', 'avro', 'axis', 'blur', 'bookkeeper', 'camel', 'cayenne', 'click', 'cocoon', 'commons', 'crunch', 'cxf', 'datafu', 'empire db', 'felix', 'flume', 'geronimo', 'giraph', 'hadoop', 'hbase', 'hive', 'jackrabbit', 'javanlp', 'jena', 'kafka', 'log4j', 'lucene', 'mahout', 'mesos', 'mina', 'oodt', 'oozie', 'opennlp', 'pdfbox', 'pig', 'pivot', 'poi', 'qpid', 'river (jini)', 'samza', 'shiro', 'sling', 'solr', 'spark', 'storm', 'struts', 'tapestry', 'tika', 'tomcat', 'turbine', 'uima', 'usergrid', 'velocity', 'vxquery', 'wink', 'xerces', 'zookeeper']
css_frameworks = ['sass', 'scss', 'less', 'css modules']
implied_skills_tree = {
['rails'] => ['html', 'css', 'javascript'],
['node'] => ['javascript'],
php_frameworks => ['php'],
ruby_frameworks => ['ruby'],
python_frameworks => ['python'],
javascript_frameworks => ['javascript', 'js'],
java_frameworks => ['java '],
css_frameworks => ['css'],
['lamp'] => ['linux', 'apache', 'mysql', 'php'],
['mean'] => ['mongo', 'express', 'angular', 'node']
}
implied_skills_tree.each do |frameworks, implied_skills|
if frameworks.any?{|fw| keyword_list.include?(fw)}
keyword_list = keyword_list.concat(implied_skills)
end
end
keyword_list.uniq
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment