Create a gist now

Instantly share code, notes, and snippets.

Rate affinity score between 2 web sites
package com.gfrison
import groovy.json.*
import static org.apache.commons.lang3.StringUtils.*
* @author Giancarlo Frison <>
* rate the affinity between 2 different web sites
* groovy Affinity.groovy
* remember you need to set the calaisKey for inquiry the Calais web service
class Affinity {
def notallow = ['aboard', 'about', 'above', 'absent', 'across', 'after', 'against', 'along', 'alongside', 'amid', 'amidst', 'among', 'amongst', 'around', 'aside', 'astride', 'athwart', 'atop', 'barring', 'before', 'behind', 'below', 'beneath', 'beside', 'besides', 'between', 'betwixt', 'beyond', 'but', 'circa', 'concerning', 'despite', 'down', 'during', 'except', 'excluding', 'failing', 'following', 'for', 'from', 'given', 'including', 'inside', 'into', 'like', 'mid', 'minus', 'near', 'next', 'off', 'onto', 'opposite', 'out', 'outside', 'over', 'pace', 'past', 'per', 'plus', 'pro', 'qua', 'regarding', 'round', 'save', 'since', 'than', 'through', 'throughout', 'till', 'times', 'toward', 'towards', 'under', 'underneath', 'unlike', 'until', 'up', 'upon', 'versus', 'via', 'vice', 'with', 'within', 'without', 'worth', 'come', 'get', 'give', 'go', 'keep', 'let', 'make', 'put', 'seem', 'take', 'be', 'do', 'have', 'say', 'see', 'send', 'may', 'will', 'about', 'across', 'after', 'against', 'among', 'at', 'before', 'between', 'by', 'down', 'from', 'in', 'off', 'on', 'over', 'through', 'to', 'under', 'up', 'with', 'as', 'for', 'of', 'till', 'than', 'a', 'the', 'all', 'any', 'every', 'no', 'other', 'some', 'such', 'that', 'this', 'i', 'he', 'you', 'who', 'and', 'because', 'but', 'or', 'if', 'though', 'while', 'how', 'when', 'where', 'why', 'again', 'ever', 'far', 'forward', 'here', 'near', 'now', 'out', 'still', 'then', 'there', 'together', 'well', 'almost', 'enough', 'even', 'little', 'much', 'not', 'only', 'quite', 'so', 'very', 'tomorrow', 'yesterday', 'north', 'south', 'east', 'west', 'please', 'yes']
def calaisKey = '...' // create the key on
static String PLAIN_ASCII = "AaEeIiOoUu" + "AaEeIiOoUuYy"+ "AaEeIiOoUuYy" + "AaOoNn" + "AaEeIiOoUuYy" + "Aa"+ "Cc" + "OoUu"
static String UNICODE =
public static String convertNonAscii(String s) {
if (s == null) return null;
StringBuilder sb = new StringBuilder();
int n = s.length();
for (int i = 0; i < n; i++) {
def c = s.substring(i, i+1);
int pos = UNICODE.indexOf(c);
if (pos > -1) {
} else {
return sb.toString();
def analyze = {url->
def entities = [:]
String json
try {
json = ('' + calaisKey + '/json/' + url).toURL().text;
} catch (e) {
println e.message + ', url:' + url
return entities
def slurper = new JsonSlurper()
def doc = slurper.parseText(json)
if ( {
def perms = convertNonAscii(' ').findAll {it.size() > 2 && !notallow.contains(it)}.collect {
lowerCase(it).replaceAll("[^a-zA-Z0-9]", "");
def name = ''
perms.each {
if ((name + ' ' + it).length() < 50)
name += ' ' + it
name = name.trim()
if (isNotBlank(name)) {
if (it.value.relevance)
entities.put(name, it.value.relevance.toDouble() * 1000)
return entities
public static void main(String[] args) {
println 'usage: groovy Affinity.groovy <site1> <site2>'
Affinity aff = new Affinity()
def tags1 = aff.analyze(args[0])
def tags2 = aff.analyze(args[1])
long score=0
score += tags2[k]*v
println "score:$score"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment