import scala.Option.option2Iterable
import scala.concurrent.Future
import org.elasticsearch.client.transport.TransportClient
import org.elasticsearch.common.settings.ImmutableSettings
import org.elasticsearch.common.transport.InetSocketTransportAddress
import com.sksamuel.elastic4s.ElasticClient
import com.sksamuel.elastic4s.ElasticDsl._
import scala.concurrent.ExecutionContext
class RankingComparison(clusters: Map[String, ElasticClient], queries: List[String]) {
val results = (for {
(cluster, client) <- clusters
} yield (cluster -> (for {
title <- queries
} yield client.execute {
(search in "*" query {
termsFilter("redirect", true),
regexFilter("text", "redirect[^\\s]+")
.query(matches("text", title))
}) fields ("title") size 10
}.map(result => title -> rank(title, result))))).toMap
println(s"Created $results")
def rank(query: String, result: SearchResponse): Option[Int] = {
val index = result.getHits().getHits().indexWhere(hit => {
if (index >= 0) {
Some(index + 1)
} else {
object RankingComparison extends App {
val commonSettings = ImmutableSettings.settingsBuilder()
.put("transport.type", "no.found.elasticsearch.transport.netty.FoundNettyTransportModule")
.put("transport.found.api-key", "<api-key>")
.put("client.transport.ignore_cluster_name", false)
.put("transport.tcp.connect_timeout", "12000ms")
def clientBuilder(clusterId: String, region: String) = {
val address = new InetSocketTransportAddress(s"$clusterId-$", 9343);
val settings = ImmutableSettings.settingsBuilder().put(commonSettings).put("", clusterId).build()
val tClient = new TransportClient(settings).addTransportAddress(address)
val clusters = Map(
"default" -> clientBuilder("<cluster-id>", "us-east-1"),
"bm25" -> clientBuilder("<cluster-id>", "us-east-1")
val titles = List(
"communications of the acm", "Columbus Mountain",
"Bonnie Schneider",
"Miguel Montuori",
"Papua New Guinean kina",
"Qanat-e Bid, Jiroft",
"Heikki Huttunen",
"Leonel Power",
"Chris Kramer",
"Charles Brutton",
"Every Face Tells a Story",
"The Delta Study",
"The Biltmore Company",
"USS Austin (DE-15)",
"All India Radio (band)",
"Ivan Boyadzhiev",
"18127 Denversmith",
"Ulster Junior Football Championship",
"Salvador Morales",
"950s in poetry",
"Marinekazerne Suffisant",
"Revolutions of 1989",
"Subby Anzaldo",
"Viceroy Special",
"List of people convicted under Terrorism Acts in the United Kingdom",
"West Bromwich Central tram stop",
"Politics of Dominica",
"Nimo tube",
"Degtyarsky mine",
"OGAE Second Chance Contest 1989",
"Zou language",
"Rob Kurz",
"Koning Willem II Stadion",
"Sector General (novel)",
"Horsfieldia kingii",
"Sarah Maria Cornell",
"Clarke Medal",
"Hume, Illinois",
"2005 European Cup (athletics)",
"Lonan (parish)",
"Battle of Montebello (1800)",
"Dollond (crater)",
"Fire proximity suit",
"Suwannee River Stakes top three finishers",
"Draka Nunatak",
"CKPC (AM)",
"Netball in Malawi",
"Lutz Liwowski",
"Kul Chenar",
"Joachim Carvallo",
"Armenia Tree Project",
".303 (film)",
"Fire Station (disambiguation)",
"Kevin Page",
"Things We Said Today",
"Charly Gaul",
"Rucava parish",
"Richard F. Abel",
"Julia Somerville",
"James Finley House",
"Nishi-Tsubame Station",
"Hypermastus casta",
"Prateep Ungsongtham Hata",
"Weierstrass transform",
"8156 Tsukada",
"Kenelm Hutchinson Digby",
"Daniel H. Miller",
"Nye Committee",
"Fistball at the World Games 2013",
"Honda SH150i",
"Gunnar Lund",
"Jacky Cupit",
"United States House of Representatives special election in the District of Columbia, 1971",
"2002 FINA World Open Water Swimming Championships",
"Andrei Sakharov",
"Nordic Light",
"Una parte di me (Amaury Vassili album)",
"List of Presbyterian churches",
"Macaldenia palumba",
"Sri Venkateswara Hindu College of Engineering, Machilipatnam",
"Paddy Crozier",
"Celebrity Eclipse",
"UD Marinaleda",
"Endre Csillag",
"Ichitana Station",
"The Sharon Osbourne Show",
"Noriko Nakayama",
"Jens Staubrand",
"Jacobus Craandijk",
"Populus Ltd",
"Kotor-class frigate",
"Haakon Stein",
"Kermit, West Virginia",
"Seven Steps to Heaven",
"Cherry Hill Mall",
"ERT B",
"Coptodactyla glabricollis",
"Amarin Plaza",
"Andrew Sidamon-Eristoff",
"The Heiresses",
"List of 2000 Seattle Mariners draft picks",
"Petroleum industry in Kuwait",
"Error function",
"Jamshid Iskanderov",
"Burfa Castle",
"Dimitar Grabchev",
"Belvelly Castle",
"Never Trust a Guy Who After Having Been a Punk, Is Now Playing Electro",
"Winiary, Proszowice County",
"Sulphur Springs, Trinity County, Texas",
"Lossen rearrangement",
"Za dom spremni",
"Honda CBX400F",
"Joos Horsten",
"Croston railway station",
"Frunzenskaya (Moscow Metro)",
"Ted Tripp",
"Roman Catholic Territorial Prelature of Mission de France",
"David Agmon",
"Marinus Larsen",
"Training analysis",
"Cornwall Capital",
"Syrian-Egyptic Gnosticism",
"Northern Mariana Islands Supreme Court",
"Marcus Atilius Regulus (consul 227 BC)",
"Julia Richman Education Complex",
"Portulaca pilosa",
"Absolute Radio 90s",
"Kaka Abbas",
"O Drakos",
"JPP (disambiguation)",
"The First Word Is the Hardest",
"Doll Domination",
"Bob Burkard",
"The New World Order (Robertson)",
"Paulo e Virginia",
"Positive airway pressure",
"Singaporean by-election, 1967",
"Badminton World Federation",
"Venice Biennale",
"Outside Woman Blues",
"Vanessa Kerry",
"DLR Smartfish",
"Folga Pierwsza",
"Scopula rantaizanensis",
"List of Czech Republic international footballers",
"Moto Guzzi",
"Duke Zhao of Jin",
"Gordon Poirier",
"Brad Roberts",
"Banzai Venus",
"Dijana Ravnikar",
"Hyaloperonospora parasitica",
"2012 Pennsylvania 400",
"Hidden Valley, El Dorado County, California",
"Emmet (heraldry)",
"Ironport (beverage)",
"Village of Four Seasons, Missouri",
"Caucasus Research Resource Centers",
"Harbinger Hall",
"Innovation Place Research Park",
"BSC YB Frauen",
"Champneuf, Quebec",
"Roman Catholic Archdiocese of Ho Chi Minh city",
"Catfish (film)",
"Francesco I Sforza",
"Potomac Heights, Maryland",
"IC 2000",
"Guetta Blaster",
"Ormanov Potok",
"Quebec general election, 1952",
"Hall Lake",
"Ariadne of Phrygia",
"Midnapore College",
"Nebria nudicollis",
"List of South Korean films of 1951",
"Tom Barry (soldier)",
"Ernest Linton",
"Fred Beir",
"Norio Suzuki (footballer)",
"Stewart SF3",
"William Mills Ivins, Sr.",
"Free Citizens",
"Let George Do It!",
"Stan Kielty",
"Stuttgart Airport",
"Mount Maya",
"Robert Land Academy",
"Double Geneva",
"Battle of Araviana",
"Pala Empire",
"Isotopes of tellurium",
"New Frontiers School Board",
"Prison Act",
"Petra Bagust",
"Shrub (band)",
"Etobicoke-Finch West LRT",
"Brad Backer",
"Dogsomyn Ganbold",
"I Bruise Easily",
"Helene Holzman",
"Dolenja Dobrava, Trebnje",
"Wesley Posvar",
"2008 Malta Cup",
"Qeshlaq-e Galam Ali Safar",
"750 BC",
"Karangan, Razavi Khorasan",
"Ahmad Shahrul Azhar Sofian",
"Spich station",
"Investigate (magazine)",
"Jimmy Cringan",
"Territories of Mexico",
"Naxibacter haematophilus",
"North Bank Depot Buildings",
"George La Plata",
"The League",
"Barry Mehler",
"Jock West",
"Captain Austin Jenks House",
"Ballbreaker (disambiguation)",
"William Upham",
"Multi-standard television",
"Scott Cleverdon",
"Harry R. Clements",
"Serruria aemula",
"The Brilliant Things",
"Marcus Paulsson",
"Alta controversy",
"Abraham Albert Heaps",
"Iowa Falls Bridge",
"Magic Affair",
"1975 in sports",
"Northern Line (disambiguation)",
"Leo Beuerman",
"Francesco Francavilla",
"Utricularia simulans",
"Subtract with carry",
"Thomas Ellys",
"Richmond Hill Curling Club",
"Georges Thurston",
"Stauning or Chaos",
"Vought Corsair",
"Lavar-e Razemi",
"Merkuriusz Polski Ordynaryjny",
"Providence Plantations",
"Realsports Football",
"Vitaliy Shchedov",
"Occupy Philadelphia",
"Monmouth cap",
"Agata and the Storm",
"Armando Castagna",
"Northern Ireland local elections, 2011",
"Lysimachia pendens",
"The Riddle of the Universe and Its Solution",
"IBM 8100 DPCX",
"Coins and postage stamps of Sealand",
"Birdshot chorioretinopathy",
"Ildar Nugumanov",
"Samarskoye, Rostov Oblast",
"Beauty TV",
"National Instant Criminal Background Check System",
"List of St. Francis College presidents",
"Mishpat Ivri",
"Miriam Chamani",
"Charlie Burtenshaw",
"Electromagnetic electron wave",
"Dudley Ryder",
"Hylton Viaduct",
"Ernst Klee",
"Black Weblog Awards",
"Philatelic Society of India",
"Palazzo Gabrielli-Borromeo",
"Tiga Dam",
"Armudlu, Iran",
"Londonderry, New Hampshire",
"Tommaso Lequio di Assaba",
"Syrian Revolution General Commission",
"Municipal elections in Conil de la Frontera",
"Greg Salas",
"Peter Christie",
"Findlater Stewart",
"Live Kreation: Revisioned Glory",
"Phavaraea rectangularis",
"Gocha Trapaidze",
"Dale Barnstable",
"Daniele Capezzone",
"Reading to Kids",
"2013 Connecticut Huskies football team",
"Watson Elkinah Reid",
"Francis J. Dewes House",
"IHI Corporation F7",
"Canadian Association of Professional Speakers",
"Antonio Beato",
"Mark Stewart (musician)",
"Wemindji (Cree village municipality)",
"Dieter Paucken",
"Herbert Huffman",
"Susan B. Ganong",
"Paul Kim (academic)",
"Joni Pirtskhalaishvili",
"Darkness (Darren Hayes song)",
"Harry Powell (footballer)",
"Levally Lower",
"Beaten by Them",
"The Thin Blue Line (film)",
"Arata: The Legend",
"Hamilton, Ontario municipal election, 1997",
"Manuel Apicella",
"HMS James (1634)",
"Anti-runway penetration bomb",
"Isabella Glyn",
"Ahmad Jamal at the Blackhawk",
"Abarema cochleata",
"Bretnor Apartments",
"The Bankfield School",
"William Thornton (rugby league)",
"Georgiana (steamboat)",
"Christopher Pickett",
"Leptospermum squarrosum",
"Carex canescens",
"Movement for Democracy and Independence",
"Tanvi Verma",
"Frederick Taylor (golfer)",
"William E. Harmon Foundation award for distinguished achievement among Negroes",
"Foreign exchange service (telecommunications)",
"Watanga Football Club",
"Richard Pratt (Australian businessman)",
"Hachiro Maekawa",
"Fitzpatrick House (Lockport, Illinois)",
"Calosoma sayi",
"Paul Hines",
"International Miniature Aerobatic Club",
"Dan Michaelson",
"Kendriya Vihar",
"Huaihai Campaign",
"Hanover Square, Syracuse",
"Brady Hotel (Tulsa)",
"Warrington, Florida",
"Brian Wooten",
"Charles Chaplin (disambiguation)",
"Independent Labour Publications",
"Sierra La Esmeralda",
"International Convention for the Protection of All Persons from Enforced Disappearance",
"U.S. China Policy Foundation",
"Pinocchio (2002 film)",
"Hillcrest mine disaster",
"Zonulispira crocata",
"Louis Racine",
"E. C. Buley",
"Mountain Wave",
"Lower Swatara Township, Dauphin County, Pennsylvania",
"Andy Warhol",
"Altena (disambiguation)",
"Tvida Vision",
"Le blog de Frantico",
"Bill Bowes (American football)",
"Oregon Parks and Recreation Department",
"Shane Campbell (artist)",
"Zero instruction set computer",
"Matthias Gallas",
"Tackle Happy",
"Kalkberg Stadium",
"Jim Lawrence",
"Seidel adjacency matrix",
"Trade magazine",
"The Last of the Jedi: Dark Warning",
"Michael McCullers",
"Picard, Dominica",
"Kosuke Ito",
"Lophophelma niveata",
"Maurice Ashley (historian)",
"M-149 (Michigan highway)",
"List of The Young and the Restless cast members",
"Evalea emeryi",
"Andrey Bryukhankov",
"List of New Age topics",
"Royal Auxiliary Air Force",
"6 Feet Under (album)",
"Pijush Ganguly",
"Baldwin School",
"Brian Kraft",
"Sun Valley, Idaho",
"Hurricane Ava",
"Compagnie industrielle de Monthey",
"Hali Bon",
"Sonny Malone",
"Zoya Polunina",
"Luigi Cagni",
"14571 Caralexander",
"Some Velvet Morning",
"William Atcheson Traill",
"Guan Jing",
"Tainter, Wisconsin",
"Chihuahua white pine",
"Dan Rosen",
"Gaius Julius Verus Maximus",
"Saleh Al Shal",
"Perrinia docili",
"Dubuque, Iowa",
"Aspen, Colorado",
"Schistura caudofurca",
"Zenochloris paradoxa",
"Max-Eckart Wolff",
"Renata Mauer",
"Sjoerd Hoekstra",
"Jung Jin-Hwa",
"David B. Harmony",
"Monson Engine House (Former)",
"Manx people",
"Delayed Gadolinium Enhanced Magnetic Resonance Imaging of Cartilage (dGEMRIC)",
"Kheyrabad, Birjand",
"Cook Strait",
"SS Venezuela",
"USS Roanoke",
"Falsimargarita benthicola",
"Television Critics Association",
"Sturisoma frenatum",
"Frederick Charles Bothwell, Jr.",
"Vessel traffic service",
"Jeff Essmann",
"Rhode Island Democratic primary, 2008",
"The Georgian House",
"Tottenham cake",
"Santa Elena",
"Lindsay Dracass",
"Omen (band)",
"Charles August Nichols",
"Kelly Watson",
"Magnus Poulsson",
"Jay Ritchie",
"Strider Rock",
"Coal Chamber",
"Ancient Roman units of measurement",
"1951 Pacific hurricane season",
"Rudy Clay",
"Pentax SF7",
"Brian Fischer",
"Pyrrocoma apargioides",
"Louis IV of France",
"Virtual trading point",
"86th Regiment of Foot (disambiguation)",
"Mark Samuels Lasner",
"Andrew Stewart (died 1872)",
"Lafut-e Pain",
"Prime Minister of the West Indies Federation",
"Edward Jurith",
"Task (computing)",
"Nicaragua at the 1984 Summer Olympics",
"William Archer (architect)",
"World Organization for Islamic Services",
"My Life as a Fake",
"80 metres hurdles",
"17th New Zealand Parliament",
"Over-Thirty Alumnus Association",
"Morena (disambiguation)",
"Religion in El Salvador",
"Battle of Ghaghra",
"Discrete differential geometry",
"La valse",
"XML Encoding Rules",
"Buxton School (Leytonstone)",
"Kenneth, Minnesota",
"The Secret Heart",
"Allegheny Mountain dusky salamander",
"Bob Casullo",
"National psychology",
"Ministry of Culture (Colombia)",
"Days into Years",
"Eusko Gudariak",
"Collateral Damage (The Wire)",
"Shadow Records",
"John Coffey (hurler)",
"Liz Burch",
"15068 Wiegert",
"GPCR oligomer",
"Lessebo Municipality",
"Lithoxus surinamensis",
"Yang Chou",
"Illawarra Mercury",
"Alberto Winkler",
"Arabesque (rapper)",
"Huntley Montgomery",
"Toc-H Public School",
"Erich Zeisl",
"Stellate reticulum",
"1810s in sociology",
"Honda MB50",
"Omoglymmius pilosus",
"Shizunai Stallion Station",
"Salt Lake Bees",
"Jazz hands",
"Mary Kenneth Keller",
"Klek, Croatia",
"Zhang Hao (Wu)",
"D. R. Mehta",
"Mehdigulu Khan Vafa",
"Islamic economics in the world",
"Puka-Puka Airport",
"Boughton (surname)",
"Tivadar Uray",
"Polytechnic University of the Philippines, San Juan",
"Imani (rapper)",
"Pablo Aguilar (footballer born 1984)",
"Generoso Vetta railway station",
"List of Phyllis episodes",
"Peter Gordon (radio presenter)",
"Key to My Soul",
"Good Words",
"In the Right Place",
"RMS Carmania",
"Lorenzo Hoopes",
"Simon the Leper",
"Quirijn van Brekelenkam",
"If All Goes Wrong",
"Business Traveller",
"British Railways Mark 1 sleeping car",
"Thiruvarur Bakthavathsalam",
"Leona Telek",
"Nawabganj District",
"Raphael Schaschko",
"Czech passport",
"United States Department of Defense",
"National Youth Day (India)",
"Malcolm Jack",
"Resurrection (Venom album)",
"Ismat ad-Din",
"List of bombings during the Syrian Civil War",
"Wande (town)",
"Albert Ingham",
"Garry Pagel",
"113th meridian west",
"Fagaloa Bay",
"Franklin Grove, Illinois",
"Epp Sell",
"Seal of Hawaii",
"Melek (disambiguation)",
"Christopher Wybrow",
"Steven Korte",
"James Johnson (Kentucky)",
"Open flap debridement",
"Edward Croft (MP)",
"Government Gazette of South Africa",
"The Closest Thing to Crazy",
"Kiss Network",
"FC Skala Stryi",
"The Price of the Phoenix",
"Julia Chang",
"Zhytomyrska (Kiev Metro)",
"Nicolas Chorier",
"St Lawrence railway station",
"Ed Madjeski",
"Platyptilia suigensis",
"Bacillus arseniciselenatis",
"142nd Pennsylvania Infantry",
"Banff, Macduff and Turriff Junction Railway",
"National Film Award for Best Feature Film in Marathi",
"Truro School",
"Robert Strange (engraver)",
"Knoxville, Georgia",
"Highcroft Racing",
"Mutsun language",
"USS Shirk (DD-318)",
"Belgian coins of World War II",
"Nawab of Sarhad",
"Poplar Springs",
"Prince Stefan",
"J Beez wit the Remedy",
"Kelly Skidmore",
"R. H. Bing",
"Pectinivalva acmenae",
"Talbot, California",
"Munchies (TV series)",
"Gordon Dean (Australian politician)",
"Sangarius Bridge",
"Fritz Thiede",
"Delaware Route 1",
"Sedra Bistodeau",
"Bourbon Street Hotel and Casino",
"8th General Assembly of Nova Scotia",
"Juan Gil Zambrano",
"Referential indeterminacy",
"Rachel Feinstein",
"Louisiana Highway 64",
"Matt Gentry",
"Aleksei Mikhailovich Abaza",
"Christos Constantinidis",
"Frisco, North Carolina",
"Dance Marathon at the University of Michigan",
"Kazunogawa Pumped Storage Power Station",
"Herbert Yardley",
"Davazdeh Emam",
"Tarachodes abyssinicus",
"Renault Sherpa 2",
"Les Baux-Sainte-Croix",
"Robert Chaudenson",
"Coca-Cola with Lemon",
"Oulmes, Morocco",
"Studio 57",
"Police Academy 3: Back in Training",
"Osborn Road and Central Avenue (Metro Light Rail station)",
"Nipponaphera argo",
"Vitali Belichenko",
"Elkins Junction, West Virginia",
"Mamat Khalid",
"Reverse motion",
"Burnt mound",
"The Cardinal Sins",
"Delaware International Speedway",
"Edner Brutus",
"Ibrahim of Kazan",
"Mr. 12 String Guitar",
"Jalan Changkat Keruing",
"Flexor retinaculum",
"HMS Sable",
"Inferior dental plexus",
"Sky High (1922 film)",
"International Westminster Bank",
"List of awards and nominations received by M.I.A.",
"Everytime You Touch Me",
"Fontecilla (surname)",
"Ditton Park",
"Soloviev D-25",
"Tammy Jansen",
"Winslow, Indiana",
"Digital mockup",
"Shock tube",
"David Arnoldo Cabrera",
"Columbus, Mississippi",
"Evangelia Micheli-Tzanakou",
"Jiangsu-Hong Kong Personnel Training Cooperation Programme",
"Sergey Nikolayevich Ryzhikov",
"Joint ownership",
"Amna Ilyas",
"UFC 14",
"2012 Outback Bowl",
"Salman Shahid",
"Harald Gutzelnig",
"List of Australian Senate appointments",
"List of NBC television affiliates (table)",
"Jeff Chapman",
"!Women Art Revolution",
"The Pops Goes Country",
"Nyora railway station",
"How to Grow a Planet",
"Salvatore Lo Piccolo",
"List of tourist sites in Helsinki",
"Lyudmila Gromova",
"Arbeiter-Zeitung (Luxembourg)",
"Portrait of Alison",
"Golden Gramophone Award",
"Proximodorsal process",
"Saskatchewan Highway 649",
"Psychology of sexual monogamy",
"Alexander Westerhout",
"Chino Sing",
"Judge Samuel Holten House",
"Scott Blackwell",
"Vida Blue",
"Sy Schulman",
"The Hostages",
"P. vinifera",
"Tokyo Marble Chocolate",
"Full Gospel Baptist Church Fellowship",
"Stephen Fry bibliography and filmography",
"List of 2008 Summer Paralympics medal winners",
"Ernst Emil Alexander Back",
"ISO 31-12",
"Rocca al Mare Shopping Centre",
"Der Ladenprinz",
"Scabricola barrywilsoni",
"Moshe Wolman",
"David Gamkrelidze",
"Enforcement Directive",
"Yopno language",
"Alistair Harrison",
"Gaius Ateius Capito (tribune)",
"1986 PBA All-Filipino Conference",
"Amal (film)",
"Fall (Clay Walker song)",
"List of flag bearers for Italy at the Olympics",
"Marie Anusorn School",
"Maksim Nesterov",
"Geoffrey Kwame Tomtania",
"Juergen Hescheler",
"Sequence (medicine)",
"Yuji Ichioka",
"Sicambeni Rural University",
"Robert Tiffany",
"Herodian architecture",
"Prunus sargentii",
"Catcliffe Glass Cone",
"Crafton, Pennsylvania",
"List of medical abbreviations: H",
"Les Watkins",
"Anthidium niveocinctum",
"Kristian Ystaas",
"Steven Naismith",
"Sylvester, Georgia",
"Carol Weyland Conner",
"Lokmanya Nagar",
"Yuam River",
"2008 Trinidad and Tobago Pro Bowl",
"The World Showcase March",
"Vila Flores",
"Inventing Myself",
"Dozdak, Sari",
"Arcata Wastewater Treatment Plant and Wildlife Sanctuary",
"Stone (1974 film)",
"Armentarius of Pavia",
"Johnny Jones and the King Casuals",
"Armand Putzeys",
"Eurytyla automacha",
"10266 Vladishukhov",
"Gellius Maximus",
"Rotation group",
"Royal descent",
"Matli Taluka",
"Stewart Shapiro",
"Ricardo, California",
"Arystan oil field",
"Richard Pollack",
"Wrigley Brook",
"Let the Great World Spin",
"Golden Monarch",
"School of Medicine, University of Zagreb",
"Spread Your Wings",
"Blanket of Secrecy",
"Prints in the Stone",
"Hummingbird sage",
"Byrd Park",
"L. alba",
"Pound Middle School",
"Brinsley Forde",
"Cok Istri Krisnanda Widani",
"Shenandoah University",
"Alexander Burns (minister)",
"Minster School",
"The Delphi Bureau",
"Ramaria gelatinosa",
"Prince of Smolensk",
"Saiin (priestess)",
"Infection and Drug Resistance",
"Nancy Kerrigan",
"Arrothia bicolor",
"Tamia (album)",
"Bittium delicatum",
"Streptococcus pseudopneumoniae",
"2012 Tennessee Titans season",
"King Island",
"Dghe Subregion",
"Musique Mecanique",
"Germantown, Virginia",
"Zhongguancun Administrative Committee",
"Valentina Popova",
"Carcassonne: Wheel of Fortune",
"Sean Reynolds (soccer)",
"Lionel Hutz",
"The Very Best of Carly Simon: Nobody Does It Better",
"25415 Jocelyn",
"Cape Grafton",
"George Boulton Mainwaring",
"Dry day",
"Ministry of Justice (Italy)",
"Killa Saifullah District",
"Donald Gotterbarn",
"Cultural Muslim",
"Gentryville, Missouri",
"Nicolaie Taga",
"Ystrad Mynach Hospital",
"Anna Poray",
"Crawley railway station",
"List of shipwrecks in 2013",
"Viccourt Cup",
"David Baltimore",
"Sharp GX29",
"Montour Wildlife Management Area",
"Geosmin synthase",
"Thomas McClary",
"RF antenna ion source",
"Clare Drake",
"MRO Software",
"Beijing Jiaotong Tai",
"Central Standard Time (disambiguation)",
"Ryan Meili",
"Seven Mile Island (disambiguation)",
"Greg Campbell (cricketer)",
"String theory landscape",
"U10 (Berlin U-Bahn)",
"Sign cricket",
"Australian Mammalogy",
"Bobby Gough",
"1995 Kremlin Cup",
"Anthene lamprocles",
"Jorunn Ringstad",
"BM Ciudad Encantada",
"Mayor of Bristol",
"List of currently active Russian military land vehicles",
"Sports in California",
"Darreh Barik, Izeh",
"Competitive eating",
"The Mouth of the Wolf",
"Steve Slaunwhite",
"Nutrition for Learning",
"Ocean (Sebadoh song)",
"Asperula tinctoria",
"Friday Mountain",
"Windy City Rollers",
"Growden Memorial Park",
"Battle of Scotch Corner",
"Sentinel cell",
"Hotel (novel)",
"Sword of Fargoal",
"Grammy Award for Best Instrumental Performance",
"Stratton Hills",
"Douglas Todd",
"Vince Howard",
"University of Antofagasta"
val res = new RankingComparison(clusters, titles).results.mapValues(list => Future.sequence(list))
case class Score(averageRank: Double, missingPercentage: Double)
val average = res.mapValues(
scores => {
val ranks = scores.flatMap(tuple => tuple._2)
Score(ranks.sum.doubleValue/ranks.size, (scores.size - ranks.size) * 100d/scores.size)
val output = res.mapValues(
tuple => {
val (title, score) = tuple
s"$title: $score\n"
}).fold("")(_ + _)))
for ((cluster, future) <- output) {
future.onFailure {
case error => {
System.err.println(s"Failed for cluster: [$cluster] with reason [${error}]")
Future.sequence(output.values).onComplete {
case s => {
for ((cluster, future) <- average) {
case score => println(s"$cluster: $score")
