Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save riordan/81849cd554043a3d5cfe to your computer and use it in GitHub Desktop.
Save riordan/81849cd554043a3d5cfe to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from postal.expand import expand_address\n",
"from postal.parser import parse_address\n",
"import json\n",
"import glob"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def expandparse(query):\n",
" return map(parse_address,expand_address(query))\n",
"\n",
"def print_autocomplete(query):\n",
" sofar = \"\"\n",
" for char in query:\n",
" sofar += char\n",
" print(sofar)\n",
" print(expandparse(sofar))\n",
" print('\\n')\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TEST SET: labels\n",
"TEST: San Francisco, San Francisco County, CA\n",
"S\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"Sa\n",
"[[(u'southern', u'house'), (u'australia', u'country')]]\n",
"\n",
"\n",
"San\n",
"[[(u'san', u'road')]]\n",
"\n",
"\n",
"San \n",
"[[(u'san', u'road')]]\n",
"\n",
"\n",
"San F\n",
"[[(u'san', u'road'), (u'flat', u'house')]]\n",
"\n",
"\n",
"San Fr\n",
"[[(u'san father', u'road')], [(u'san frontage', u'road')]]\n",
"\n",
"\n",
"San Fra\n",
"[[(u'san', u'road'), (u'fra', u'house')]]\n",
"\n",
"\n",
"San Fran\n",
"[[(u'san fran', u'road')]]\n",
"\n",
"\n",
"San Franc\n",
"[[(u'san franc', u'road')]]\n",
"\n",
"\n",
"San Franci\n",
"[[(u'san franci', u'road')]]\n",
"\n",
"\n",
"San Francis\n",
"[[(u'san francis', u'house')]]\n",
"\n",
"\n",
"San Francisc\n",
"[[(u'san francisc', u'house')]]\n",
"\n",
"\n",
"San Francisco\n",
"[[(u'san francisco', u'city')]]\n",
"\n",
"\n",
"San Francisco,\n",
"[[(u'san francisco', u'city')]]\n",
"\n",
"\n",
"San Francisco, \n",
"[[(u'san francisco', u'city')]]\n",
"\n",
"\n",
"San Francisco, S\n",
"[[(u'san francisco san', u'road')], [(u'san', u'city'), (u'francisco south', u'road')], [(u'san francisco s', u'road')]]\n",
"\n",
"\n",
"San Francisco, Sa\n",
"[[(u'san francisco southern', u'road'), (u'australia', u'country')]]\n",
"\n",
"\n",
"San Francisco, San\n",
"[[(u'san francisco san', u'road')]]\n",
"\n",
"\n",
"San Francisco, San \n",
"[[(u'san francisco san', u'road')]]\n",
"\n",
"\n",
"San Francisco, San F\n",
"[[(u'san francisco san flat', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Fr\n",
"[[(u'san francisco san father', u'road')], [(u'san francisco san frontage', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Fra\n",
"[[(u'san francisco san', u'road'), (u'fra', u'country')]]\n",
"\n",
"\n",
"San Francisco, San Fran\n",
"[[(u'san francisco san fran', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Franc\n",
"[[(u'san francisco san franc', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Franci\n",
"[[(u'san francisco san franci', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francis\n",
"[[(u'san francisco san francis', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisc\n",
"[[(u'san francisco san francisc', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco\n",
"[[(u'san francisco', u'road'), (u'san', u'city'), (u'francisco', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco \n",
"[[(u'san francisco', u'road'), (u'san', u'city'), (u'francisco', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco C\n",
"[[(u'san francisco san francisco centre', u'road')], [(u'san francisco san francisco center', u'road')], [(u'san francisco san francisco central', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco Co\n",
"[[(u'san francisco', u'road'), (u'san', u'city'), (u'francisco', u'road'), (u'colorado', u'state')], [(u'san francisco san francisco', u'road'), (u'co', u'state')], [(u'san francisco san francisco', u'road'), (u'company', u'house')], [(u'san francisco san francisco', u'road'), (u'county', u'house')]]\n",
"\n",
"\n",
"San Francisco, San Francisco Cou\n",
"[[(u'san francisco san francisco cou', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco Coun\n",
"[[(u'san francisco', u'road'), (u'san', u'city'), (u'francisco coun', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco Count\n",
"[[(u'san francisco', u'road'), (u'san', u'city'), (u'francisco count', u'road')]]\n",
"\n",
"\n",
"San Francisco, San Francisco County\n",
"[[(u'san francisco san francisco', u'road'), (u'county', u'house')]]\n",
"\n",
"\n",
"San Francisco, San Francisco County,\n",
"[[(u'san francisco san francisco', u'road'), (u'county', u'house')]]\n",
"\n",
"\n",
"San Francisco, San Francisco County, \n",
"[[(u'san francisco san francisco', u'road'), (u'county', u'house')]]\n",
"\n",
"\n",
"San Francisco, San Francisco County, C\n",
"[[(u'san francisco san francisco', u'road'), (u'county centre', u'house')], [(u'san francisco san francisco county center', u'road')], [(u'san francisco san francisco', u'road'), (u'county central', u'house')]]\n",
"\n",
"\n",
"San Francisco, San Francisco County, CA\n",
"[[(u'san francisco san francisco county', u'road'), (u'california', u'state')], [(u'san francisco san francisco', u'road'), (u'county', u'state_district'), (u'ca', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 30 West 26th Street, Manhattan, NY\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"30\n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 \n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 W\n",
"[[(u'30', u'house_number'), (u'west', u'road')], [(u'30', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"30 We\n",
"[[(u'30', u'house_number'), (u'warehouse', u'road')]]\n",
"\n",
"\n",
"30 Wes\n",
"[[(u'30', u'house_number'), (u'wes', u'road')]]\n",
"\n",
"\n",
"30 West\n",
"[[(u'30', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"30 West \n",
"[[(u'30', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"30 West 2\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'2', u'house_number')]]\n",
"\n",
"\n",
"30 West 26\n",
"[[(u'30', u'house_number'), (u'west 26', u'road')]]\n",
"\n",
"\n",
"30 West 26t\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'26t', u'house_number')]]\n",
"\n",
"\n",
"30 West 26th\n",
"[[(u'30', u'house_number'), (u'west 26th', u'road')]]\n",
"\n",
"\n",
"30 West 26th \n",
"[[(u'30', u'house_number'), (u'west 26th', u'road')]]\n",
"\n",
"\n",
"30 West 26th S\n",
"[[(u'30', u'house_number'), (u'west 26th san', u'road')], [(u'30', u'house_number'), (u'west 26th south', u'road')], [(u'30', u'house_number'), (u'west 26th s', u'road')]]\n",
"\n",
"\n",
"30 West 26th St\n",
"[[(u'30', u'house_number'), (u'west 26th saint', u'road')], [(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Str\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Stre\n",
"[[(u'30', u'house_number'), (u'west 26th stre', u'road')]]\n",
"\n",
"\n",
"30 West 26th Stree\n",
"[[(u'30', u'house_number'), (u'west 26th stree', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street,\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street, \n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street, M\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'mail', u'suburb')]]\n",
"\n",
"\n",
"30 West 26th Street, Ma\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'massachusetts', u'state')], [(u'30', u'house_number'), (u'west 26th street', u'road'), (u'ma', u'state')]]\n",
"\n",
"\n",
"30 West 26th Street, Man\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'man', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manh\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manh', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manha\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manha', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhat\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhat', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhatt\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhatt', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhatta\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhatta', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhattan\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhattan,\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhattan, \n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhattan, N\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan north', u'city')], [(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city'), (u'n', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street, Manhattan, NY\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')], [(u'30', u'house_number'), (u'west 26th street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: New South Wales, Australia\n",
"N\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"Ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"New\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"New \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"New S\n",
"[[(u'new s', u'house')]]\n",
"\n",
"\n",
"New So\n",
"[[(u'new so', u'house')]]\n",
"\n",
"\n",
"New Sou\n",
"[[(u'new', u'road'), (u'sou', u'house')]]\n",
"\n",
"\n",
"New Sout\n",
"[[(u'new sout', u'house')]]\n",
"\n",
"\n",
"New South\n",
"[[(u'new', u'house'), (u'south', u'road')]]\n",
"\n",
"\n",
"New South \n",
"[[(u'new', u'house'), (u'south', u'road')]]\n",
"\n",
"\n",
"New South W\n",
"[[]]\n",
"\n",
"\n",
"New South Wa\n",
"[[]]\n",
"\n",
"\n",
"New South Wal\n",
"[[]]\n",
"\n",
"\n",
"New South Wale\n",
"[[]]\n",
"\n",
"\n",
"New South Wales\n",
"[[(u'new south wales', u'state')]]\n",
"\n",
"\n",
"New South Wales,\n",
"[[(u'new south wales', u'state')]]\n",
"\n",
"\n",
"New South Wales, \n",
"[[(u'new south wales', u'state')]]\n",
"\n",
"\n",
"New South Wales, A\n",
"[[(u'new south wales', u'state'), (u'a', u'postcode')]]\n",
"\n",
"\n",
"New South Wales, Au\n",
"[[(u'new south wales', u'state'), (u'au', u'house')]]\n",
"\n",
"\n",
"New South Wales, Aus\n",
"[[(u'new south wales', u'state'), (u'aus', u'country')]]\n",
"\n",
"\n",
"New South Wales, Aust\n",
"[[(u'new south wales', u'state'), (u'aust', u'postcode')]]\n",
"\n",
"\n",
"New South Wales, Austr\n",
"[[(u'new south wales', u'state'), (u'austr', u'house')]]\n",
"\n",
"\n",
"New South Wales, Austra\n",
"[[(u'new south wales', u'state'), (u'austra', u'postcode')]]\n",
"\n",
"\n",
"New South Wales, Austral\n",
"[[(u'new south wales', u'state'), (u'austral', u'postcode')]]\n",
"\n",
"\n",
"New South Wales, Australi\n",
"[[(u'new south wales', u'state'), (u'australi', u'country')]]\n",
"\n",
"\n",
"New South Wales, Australia\n",
"[[(u'new south wales', u'state'), (u'australia', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: California, CA\n",
"C\n",
"[[(u'centre', u'house')], [(u'center', u'house')], [(u'central', u'house')]]\n",
"\n",
"\n",
"Ca\n",
"[[(u'california', u'house')], [(u'ca', u'country')]]\n",
"\n",
"\n",
"Cal\n",
"[[(u'cal', u'house')]]\n",
"\n",
"\n",
"Cali\n",
"[[(u'cali', u'city')]]\n",
"\n",
"\n",
"Calif\n",
"[[(u'calif', u'house')]]\n",
"\n",
"\n",
"Califo\n",
"[[(u'califo', u'house')]]\n",
"\n",
"\n",
"Califor\n",
"[[(u'califor', u'house')]]\n",
"\n",
"\n",
"Californ\n",
"[[(u'californ', u'house')]]\n",
"\n",
"\n",
"Californi\n",
"[[(u'californi', u'house')]]\n",
"\n",
"\n",
"California\n",
"[[(u'california', u'house')]]\n",
"\n",
"\n",
"California,\n",
"[[(u'california', u'house')]]\n",
"\n",
"\n",
"California, \n",
"[[(u'california', u'house')]]\n",
"\n",
"\n",
"California, C\n",
"[[(u'california centre', u'house')], [(u'california center', u'house')], [(u'california central', u'house')]]\n",
"\n",
"\n",
"California, CA\n",
"[[(u'california', u'city'), (u'california', u'state')], [(u'california', u'state'), (u'ca', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: West Bengal, India\n",
"W\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"We\n",
"[[(u'warehouse', u'house')]]\n",
"\n",
"\n",
"Wes\n",
"[[(u'wes', u'road')]]\n",
"\n",
"\n",
"West\n",
"[[(u'west', u'city_district')]]\n",
"\n",
"\n",
"West \n",
"[[(u'west', u'city_district')]]\n",
"\n",
"\n",
"West B\n",
"[[(u'west', u'road'), (u'b', u'house')]]\n",
"\n",
"\n",
"West Be\n",
"[[(u'west', u'road'), (u'be', u'country')]]\n",
"\n",
"\n",
"West Ben\n",
"[[(u'west ben', u'road')]]\n",
"\n",
"\n",
"West Beng\n",
"[[(u'west', u'road'), (u'beng', u'suburb')]]\n",
"\n",
"\n",
"West Benga\n",
"[[(u'west benga', u'road')]]\n",
"\n",
"\n",
"West Bengal\n",
"[[(u'west bengal', u'state')]]\n",
"\n",
"\n",
"West Bengal,\n",
"[[(u'west bengal', u'state')]]\n",
"\n",
"\n",
"West Bengal, \n",
"[[(u'west bengal', u'state')]]\n",
"\n",
"\n",
"West Bengal, I\n",
"[[(u'west bengal', u'state'), (u'interstate', u'suburb')], [(u'west bengal', u'state'), (u'island', u'country')]]\n",
"\n",
"\n",
"West Bengal, In\n",
"[[(u'west bengal', u'state'), (u'in', u'country')], [(u'west bengal indiana', u'state')]]\n",
"\n",
"\n",
"West Bengal, Ind\n",
"[[(u'west bengal', u'state'), (u'ind', u'country')]]\n",
"\n",
"\n",
"West Bengal, Indi\n",
"[[(u'west bengal indi', u'state')]]\n",
"\n",
"\n",
"West Bengal, India\n",
"[[(u'west bengal', u'state'), (u'india', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: North West, Singapore\n",
"N\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"No\n",
"[[(u'number', u'house')]]\n",
"\n",
"\n",
"Nor\n",
"[[(u'nor', u'country')]]\n",
"\n",
"\n",
"Nort\n",
"[[(u'north', u'road')]]\n",
"\n",
"\n",
"North\n",
"[[(u'north', u'road')]]\n",
"\n",
"\n",
"North \n",
"[[(u'north', u'road')]]\n",
"\n",
"\n",
"North W\n",
"[[(u'northwest', u'state')]]\n",
"\n",
"\n",
"North We\n",
"[[(u'north', u'road'), (u'warehouse', u'house')]]\n",
"\n",
"\n",
"North Wes\n",
"[[(u'north wes', u'road')]]\n",
"\n",
"\n",
"North West\n",
"[[(u'northwest', u'state')]]\n",
"\n",
"\n",
"North West,\n",
"[[(u'northwest', u'state')]]\n",
"\n",
"\n",
"North West, \n",
"[[(u'northwest', u'state')]]\n",
"\n",
"\n",
"North West, S\n",
"[[(u'northwest', u'suburb'), (u'san', u'road')], [(u'northwest south', u'road')], [(u'northwest s', u'road')]]\n",
"\n",
"\n",
"North West, Si\n",
"[[(u'northwest', u'road'), (u'si', u'country')]]\n",
"\n",
"\n",
"North West, Sin\n",
"[[(u'northwest', u'road'), (u'sin', u'suburb')]]\n",
"\n",
"\n",
"North West, Sing\n",
"[[(u'northwest', u'road'), (u'sing', u'house')]]\n",
"\n",
"\n",
"North West, Singa\n",
"[[(u'northwest', u'road'), (u'singa', u'suburb')]]\n",
"\n",
"\n",
"North West, Singap\n",
"[[(u'northwest singap', u'road')]]\n",
"\n",
"\n",
"North West, Singapo\n",
"[[(u'northwest singapo', u'road')]]\n",
"\n",
"\n",
"North West, Singapor\n",
"[[(u'northwest singapor', u'road')]]\n",
"\n",
"\n",
"North West, Singapore\n",
"[[(u'northwest', u'road'), (u'singapore', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Arbil, Iraq\n",
"A\n",
"[[(u'a', u'road')]]\n",
"\n",
"\n",
"Ar\n",
"[[(u'ar', u'state')], [(u'arkansas', u'state')]]\n",
"\n",
"\n",
"Arb\n",
"[[(u'arb', u'house')]]\n",
"\n",
"\n",
"Arbi\n",
"[[(u'arbi', u'house')]]\n",
"\n",
"\n",
"Arbil\n",
"[[(u'arbil', u'road')]]\n",
"\n",
"\n",
"Arbil,\n",
"[[(u'arbil', u'road')]]\n",
"\n",
"\n",
"Arbil, \n",
"[[(u'arbil', u'road')]]\n",
"\n",
"\n",
"Arbil, I\n",
"[[(u'arbil', u'house_number'), (u'interstate', u'road')], [(u'arbil island', u'road')]]\n",
"\n",
"\n",
"Arbil, Ir\n",
"[[(u'arbil', u'house'), (u'ir', u'road')]]\n",
"\n",
"\n",
"Arbil, Ira\n",
"[[(u'arbil', u'house_number'), (u'ira', u'road')]]\n",
"\n",
"\n",
"Arbil, Iraq\n",
"[[(u'arbil', u'road'), (u'iraq', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Madrid, Spain\n",
"M\n",
"[[(u'mail', u'road')]]\n",
"\n",
"\n",
"Ma\n",
"[[(u'massachusetts', u'state')], [(u'ma', u'house')]]\n",
"\n",
"\n",
"Mad\n",
"[[(u'mad', u'house')]]\n",
"\n",
"\n",
"Madr\n",
"[[(u'madr', u'house')]]\n",
"\n",
"\n",
"Madri\n",
"[[(u'madri', u'house')]]\n",
"\n",
"\n",
"Madrid\n",
"[[(u'madrid', u'city')]]\n",
"\n",
"\n",
"Madrid,\n",
"[[(u'madrid', u'city')]]\n",
"\n",
"\n",
"Madrid, \n",
"[[(u'madrid', u'city')]]\n",
"\n",
"\n",
"Madrid, S\n",
"[[(u'madrid san', u'road')], [(u'madrid', u'city'), (u'south', u'house')], [(u'madrid s', u'road')]]\n",
"\n",
"\n",
"Madrid, Sp\n",
"[[(u'madrid', u'city'), (u'sole proprietorship', u'house')]]\n",
"\n",
"\n",
"Madrid, Spa\n",
"[[(u'madrid spa', u'house')]]\n",
"\n",
"\n",
"Madrid, Spai\n",
"[[(u'madrid spai', u'house')]]\n",
"\n",
"\n",
"Madrid, Spain\n",
"[[(u'madrid', u'city'), (u'spain', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 Main St, Dungannon, United Kingdom\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 M\n",
"[[(u'1', u'house_number'), (u'mail', u'road')]]\n",
"\n",
"\n",
"1 Ma\n",
"[[(u'1', u'house_number'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'ma', u'road')]]\n",
"\n",
"\n",
"1 Mai\n",
"[[(u'1 mai', u'road')]]\n",
"\n",
"\n",
"1 Main\n",
"[[(u'1', u'house_number'), (u'main', u'road')]]\n",
"\n",
"\n",
"1 Main \n",
"[[(u'1', u'house_number'), (u'main', u'road')]]\n",
"\n",
"\n",
"1 Main S\n",
"[[(u'1', u'house_number'), (u'main san', u'road')], [(u'1', u'house_number'), (u'main south', u'road')], [(u'1', u'house_number'), (u'main s', u'road')]]\n",
"\n",
"\n",
"1 Main St\n",
"[[(u'1', u'house_number'), (u'main saint', u'road')], [(u'1', u'house_number'), (u'main street', u'road')]]\n",
"\n",
"\n",
"1 Main St,\n",
"[[(u'1', u'house_number'), (u'main saint', u'road')], [(u'1', u'house_number'), (u'main street', u'road')]]\n",
"\n",
"\n",
"1 Main St, \n",
"[[(u'1', u'house_number'), (u'main saint', u'road')], [(u'1', u'house_number'), (u'main street', u'road')]]\n",
"\n",
"\n",
"1 Main St, D\n",
"[[(u'1', u'house_number'), (u'main saint d', u'road')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'500', u'house_number')], [(u'1', u'house_number'), (u'main street d', u'road')], [(u'1', u'house_number'), (u'main street', u'road'), (u'500', u'house_number')]]\n",
"\n",
"\n",
"1 Main St, Du\n",
"[[(u'1', u'house_number'), (u'main saint du', u'road')], [(u'1', u'house_number'), (u'main street du', u'road')]]\n",
"\n",
"\n",
"1 Main St, Dun\n",
"[[(u'1', u'house_number'), (u'main saint dun', u'road')], [(u'1', u'house_number'), (u'main street dun', u'road')]]\n",
"\n",
"\n",
"1 Main St, Dung\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dung', u'city')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dung', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dunga\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dunga', u'city')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dunga', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungan\n",
"[[(u'1', u'house_number'), (u'main saint dungan', u'road')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungan', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungann\n",
"[[(u'1', u'house_number'), (u'main saint dungann', u'road')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungann', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dunganno\n",
"[[(u'1', u'house_number'), (u'main saint dunganno', u'road')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dunganno', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungannon\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungannon,\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, \n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, U\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon u', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'u', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, Un\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'house'), (u'unit', u'house_number')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon union', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'unit', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon union', u'city')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, Uni\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon university', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon university', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, Unit\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'house'), (u'unit', u'house_number')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'unit', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, Unite\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'unite', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'unite', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United \n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United K\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united', u'house'), (u'k', u'country')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united', u'house'), (u'k', u'country')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United Ki\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united ki', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united ki', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United Kin\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united kin', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united kin', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United King\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united king', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united king', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United Kingd\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united kingd', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united kingd', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United Kingdo\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united kingdo', u'house')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united kingdo', u'house')]]\n",
"\n",
"\n",
"1 Main St, Dungannon, United Kingdom\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'dungannon', u'city'), (u'united kingdom', u'country')], [(u'1', u'house_number'), (u'main street', u'road'), (u'dungannon', u'city'), (u'united kingdom', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Hackney City Farm, Haggerston, Greater London\n",
"H\n",
"[[(u'h', u'house')]]\n",
"\n",
"\n",
"Ha\n",
"[[(u'ha', u'house')]]\n",
"\n",
"\n",
"Hac\n",
"[[(u'hac', u'house')]]\n",
"\n",
"\n",
"Hack\n",
"[[(u'hack', u'house')]]\n",
"\n",
"\n",
"Hackn\n",
"[[(u'hackn', u'house')]]\n",
"\n",
"\n",
"Hackne\n",
"[[(u'hackne', u'house')]]\n",
"\n",
"\n",
"Hackney\n",
"[[(u'hackney', u'house')]]\n",
"\n",
"\n",
"Hackney \n",
"[[(u'hackney', u'house')]]\n",
"\n",
"\n",
"Hackney C\n",
"[[(u'hackney', u'road'), (u'centre', u'house')], [(u'hackney center', u'house')], [(u'hackney central', u'suburb')]]\n",
"\n",
"\n",
"Hackney Ci\n",
"[[(u'hackney circuit', u'road')]]\n",
"\n",
"\n",
"Hackney Cit\n",
"[[(u'hackney', u'road'), (u'cit', u'house')]]\n",
"\n",
"\n",
"Hackney City\n",
"[[(u'hackney', u'city_district'), (u'city', u'house')]]\n",
"\n",
"\n",
"Hackney City \n",
"[[(u'hackney', u'city_district'), (u'city', u'house')]]\n",
"\n",
"\n",
"Hackney City F\n",
"[[(u'hackney', u'city_district'), (u'city flat', u'house')]]\n",
"\n",
"\n",
"Hackney City Fa\n",
"[[(u'hackney', u'city_district'), (u'city fa', u'house')]]\n",
"\n",
"\n",
"Hackney City Far\n",
"[[(u'hackney', u'city_district'), (u'city far', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm,\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, \n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, H\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'h', u'house_number')]]\n",
"\n",
"\n",
"Hackney City Farm, Ha\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'ha', u'city')]]\n",
"\n",
"\n",
"Hackney City Farm, Hag\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'hag', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Hagg\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'hagg', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Hagge\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'hagge', u'city')]]\n",
"\n",
"\n",
"Hackney City Farm, Hagger\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'hagger', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggers\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggers', u'city')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerst\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerst', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggersto\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggersto', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston,\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, \n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, G\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'house_number'), (u'ground', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Gr\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston grove', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Gre\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'gre', u'city')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Grea\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm haggerston grea', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Great\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston great', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greate\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston greate', u'road')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater \n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater L\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater level', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater Lo\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater lo', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater Lon\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater lon', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater Lond\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater lond', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater Londo\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater londo', u'house')]]\n",
"\n",
"\n",
"Hackney City Farm, Haggerston, Greater London\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house'), (u'haggerston', u'road'), (u'greater london', u'state_district')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 Grolmanstraße, Berlin, Germany\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 G\n",
"[[(u'1', u'house_number'), (u'ground', u'road')]]\n",
"\n",
"\n",
"1 Gr\n",
"[[(u'1', u'house_number'), (u'grove', u'road')]]\n",
"\n",
"\n",
"1 Gro\n",
"[[(u'1', u'house_number'), (u'grove', u'road')]]\n",
"\n",
"\n",
"1 Grol\n",
"[[(u'1', u'house_number'), (u'grol', u'road')]]\n",
"\n",
"\n",
"1 Grolm\n",
"[[(u'1', u'house_number'), (u'grolm', u'road')]]\n",
"\n",
"\n",
"1 Grolma\n",
"[[(u'1', u'house_number'), (u'grolma', u'road')]]\n",
"\n",
"\n",
"1 Grolman\n",
"[[(u'1', u'house_number'), (u'grolman', u'road')]]\n",
"\n",
"\n",
"1 Grolmans\n",
"[[(u'1', u'house_number'), (u'grolmans', u'road')]]\n",
"\n",
"\n",
"1 Grolmanst\n",
"[[(u'1', u'house_number'), (u'grolmanst', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstr\n",
"[[(u'1', u'house_number'), (u'grolmanstr', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstra\n",
"[[(u'1', u'house_number'), (u'grolmanstra', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraß\n",
"[[(u'1', u'house_number'), (u'grolmanstrass', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraße\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraße,\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraße, \n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraße, B\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'b', u'house_number')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Be\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'be', u'country')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Ber\n",
"[[(u'1', u'house'), (u'grolmanstrasse ber', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berl\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berl', u'house')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berli\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berli', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin,\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, \n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, G\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin ground', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, Ge\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city'), (u'ge', u'country')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, Ger\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin ger', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, Germ\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'house'), (u'germ', u'country')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, Germa\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city'), (u'germa', u'house')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, German\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin german', u'city')]]\n",
"\n",
"\n",
"1 Grolmanstraße, Berlin, Germany\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road'), (u'berlin', u'city'), (u'germany', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: New Zealand\n",
"N\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"Ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"New\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"New \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"New Z\n",
"[[(u'new z', u'house')]]\n",
"\n",
"\n",
"New Ze\n",
"[[(u'new ze', u'house')]]\n",
"\n",
"\n",
"New Zea\n",
"[[(u'new zea', u'house')]]\n",
"\n",
"\n",
"New Zeal\n",
"[[(u'new zeal', u'house')]]\n",
"\n",
"\n",
"New Zeala\n",
"[[(u'new zeala', u'house')]]\n",
"\n",
"\n",
"New Zealan\n",
"[[(u'new zealan', u'house')]]\n",
"\n",
"\n",
"New Zealand\n",
"[[(u'new zealand', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: McDonald's, Central Singapore, Singapore\n",
"M\n",
"[[(u'mail', u'road')]]\n",
"\n",
"\n",
"Mc\n",
"[[(u'mailcenter', u'road')]]\n",
"\n",
"\n",
"McD\n",
"[[(u'mcd', u'house')], [(u'1400', u'house_number')]]\n",
"\n",
"\n",
"McDo\n",
"[[(u'mcdo', u'house')]]\n",
"\n",
"\n",
"McDon\n",
"[[(u'mcdon', u'house')]]\n",
"\n",
"\n",
"McDona\n",
"[[(u'mcdona', u'house')]]\n",
"\n",
"\n",
"McDonal\n",
"[[(u'mcdonal', u'house')]]\n",
"\n",
"\n",
"McDonald\n",
"[[(u'mcdonald', u'house')]]\n",
"\n",
"\n",
"McDonald'\n",
"[[(u\"mcdonald '\", u'house')]]\n",
"\n",
"\n",
"McDonald's\n",
"[[(u\"mcdonald's\", u'house')]]\n",
"\n",
"\n",
"McDonald's,\n",
"[[(u\"mcdonald's\", u'house')]]\n",
"\n",
"\n",
"McDonald's, \n",
"[[(u\"mcdonald's\", u'house')]]\n",
"\n",
"\n",
"McDonald's, C\n",
"[[(u\"mcdonald's centre\", u'house')], [(u\"mcdonald's center\", u'house')], [(u\"mcdonald's central\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Ce\n",
"[[(u\"mcdonald's\", u'house'), (u'cove', u'road')]]\n",
"\n",
"\n",
"McDonald's, Cen\n",
"[[(u\"mcdonald's center\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Cent\n",
"[[(u\"mcdonald's cent\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Centr\n",
"[[(u\"mcdonald's\", u'house'), (u'centr', u'road')]]\n",
"\n",
"\n",
"McDonald's, Centra\n",
"[[(u\"mcdonald's centra\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central\n",
"[[(u\"mcdonald's central\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central \n",
"[[(u\"mcdonald's central\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central S\n",
"[[(u\"mcdonald's central san\", u'house')], [(u\"mcdonald's central\", u'house'), (u'south', u'road')], [(u\"mcdonald's central\", u'house'), (u's', u'road')]]\n",
"\n",
"\n",
"McDonald's, Central Si\n",
"[[(u\"mcdonald's central\", u'house'), (u'si', u'country')]]\n",
"\n",
"\n",
"McDonald's, Central Sin\n",
"[[(u\"mcdonald's central sin\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Sing\n",
"[[(u\"mcdonald's central sing\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singa\n",
"[[(u\"mcdonald's central singa\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singap\n",
"[[(u\"mcdonald's central singap\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapo\n",
"[[(u\"mcdonald's central singapo\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapor\n",
"[[(u\"mcdonald's central singapor\", u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore,\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, \n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, S\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u'san', u'road')], [(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u'south', u'road')], [(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u's', u'road')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Si\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u'si', u'country')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Sin\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u'sin', u'suburb')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Sing\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore sing', u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Singa\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city'), (u'singa', u'country')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Singap\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore singap', u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Singapo\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore singapo', u'house')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Singapor\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'house'), (u'singapor', u'country')]]\n",
"\n",
"\n",
"McDonald's, Central Singapore, Singapore\n",
"[[(u\"mcdonald's\", u'house'), (u'central', u'road'), (u'singapore', u'city'), (u'singapore', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: autocomplete_focus\n",
"TEST: DiDi dumpling\n",
"D\n",
"[[(u'd', u'house')], [(u'500', u'house_number')]]\n",
"\n",
"\n",
"Di\n",
"[[(u'di', u'house')], [(u'501', u'house_number')]]\n",
"\n",
"\n",
"DiD\n",
"[[(u'did', u'house')], [(u'500', u'house_number')]]\n",
"\n",
"\n",
"DiDi\n",
"[[(u'didi', u'house')], [(u'501', u'house_number')]]\n",
"\n",
"\n",
"DiDi \n",
"[[(u'didi', u'house')]]\n",
"\n",
"\n",
"DiDi d\n",
"[[(u'didi d', u'house')], [(u'501', u'house'), (u'500', u'house_number')]]\n",
"\n",
"\n",
"DiDi du\n",
"[[(u'didi du', u'house')], [(u'501', u'house_number'), (u'du', u'road')]]\n",
"\n",
"\n",
"DiDi dum\n",
"[[(u'didi dum', u'house')], [(u'501', u'house_number'), (u'dum', u'road')]]\n",
"\n",
"\n",
"DiDi dump\n",
"[[(u'didi dump', u'house')], [(u'501', u'house_number'), (u'dump', u'road')]]\n",
"\n",
"\n",
"DiDi dumpl\n",
"[[(u'didi dumpl', u'house')], [(u'501', u'house_number'), (u'dumpl', u'road')]]\n",
"\n",
"\n",
"DiDi dumpli\n",
"[[(u'didi dumpli', u'house')], [(u'501', u'house_number'), (u'dumpli', u'road')]]\n",
"\n",
"\n",
"DiDi dumplin\n",
"[[(u'didi dumplin', u'house')], [(u'501', u'house_number'), (u'dumplin', u'road')]]\n",
"\n",
"\n",
"DiDi dumpling\n",
"[[(u'didi dumpling', u'house')], [(u'501', u'house_number'), (u'dumpling', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: union square\n",
"u\n",
"[[(u'u', u'house')]]\n",
"\n",
"\n",
"un\n",
"[[(u'unit', u'house')], [(u'union', u'house')]]\n",
"\n",
"\n",
"uni\n",
"[[(u'university', u'house')]]\n",
"\n",
"\n",
"unio\n",
"[[(u'unio', u'road')]]\n",
"\n",
"\n",
"union\n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union \n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union s\n",
"[[(u'union san', u'road')], [(u'union', u'house'), (u'south', u'road')], [(u'union s', u'road')]]\n",
"\n",
"\n",
"union sq\n",
"[[(u'union', u'city'), (u'square', u'road')]]\n",
"\n",
"\n",
"union squ\n",
"[[(u'union squ', u'house')]]\n",
"\n",
"\n",
"union squa\n",
"[[(u'union squa', u'house')]]\n",
"\n",
"\n",
"union squar\n",
"[[(u'union squar', u'house')]]\n",
"\n",
"\n",
"union square\n",
"[[(u'union', u'city'), (u'square', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: union square\n",
"u\n",
"[[(u'u', u'house')]]\n",
"\n",
"\n",
"un\n",
"[[(u'unit', u'house')], [(u'union', u'house')]]\n",
"\n",
"\n",
"uni\n",
"[[(u'university', u'house')]]\n",
"\n",
"\n",
"unio\n",
"[[(u'unio', u'road')]]\n",
"\n",
"\n",
"union\n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union \n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union s\n",
"[[(u'union san', u'road')], [(u'union', u'house'), (u'south', u'road')], [(u'union s', u'road')]]\n",
"\n",
"\n",
"union sq\n",
"[[(u'union', u'city'), (u'square', u'road')]]\n",
"\n",
"\n",
"union squ\n",
"[[(u'union squ', u'house')]]\n",
"\n",
"\n",
"union squa\n",
"[[(u'union squa', u'house')]]\n",
"\n",
"\n",
"union squar\n",
"[[(u'union squar', u'house')]]\n",
"\n",
"\n",
"union square\n",
"[[(u'union', u'city'), (u'square', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: hard rock cafe\n",
"h\n",
"[[(u'h', u'house')]]\n",
"\n",
"\n",
"ha\n",
"[[(u'ha', u'house')]]\n",
"\n",
"\n",
"har\n",
"[[(u'har', u'house')]]\n",
"\n",
"\n",
"hard\n",
"[[(u'hard', u'city')]]\n",
"\n",
"\n",
"hard \n",
"[[(u'hard', u'city')]]\n",
"\n",
"\n",
"hard r\n",
"[[(u'hard river', u'house')]]\n",
"\n",
"\n",
"hard ro\n",
"[[(u'hard', u'city'), (u'ro', u'country')]]\n",
"\n",
"\n",
"hard roc\n",
"[[(u'hard roc', u'house')]]\n",
"\n",
"\n",
"hard rock\n",
"[[(u'hard rock', u'house')]]\n",
"\n",
"\n",
"hard rock \n",
"[[(u'hard rock', u'house')]]\n",
"\n",
"\n",
"hard rock c\n",
"[[(u'hard rock centre', u'house')], [(u'hard rock center', u'house')], [(u'hard rock central', u'house')]]\n",
"\n",
"\n",
"hard rock ca\n",
"[[(u'hard rock california', u'house')], [(u'hard rock', u'house'), (u'ca', u'country')]]\n",
"\n",
"\n",
"hard rock caf\n",
"[[(u'hard rock caf', u'house')]]\n",
"\n",
"\n",
"hard rock cafe\n",
"[[(u'hard rock cafe', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: hard rock cafe\n",
"h\n",
"[[(u'h', u'house')]]\n",
"\n",
"\n",
"ha\n",
"[[(u'ha', u'house')]]\n",
"\n",
"\n",
"har\n",
"[[(u'har', u'house')]]\n",
"\n",
"\n",
"hard\n",
"[[(u'hard', u'city')]]\n",
"\n",
"\n",
"hard \n",
"[[(u'hard', u'city')]]\n",
"\n",
"\n",
"hard r\n",
"[[(u'hard river', u'house')]]\n",
"\n",
"\n",
"hard ro\n",
"[[(u'hard', u'city'), (u'ro', u'country')]]\n",
"\n",
"\n",
"hard roc\n",
"[[(u'hard roc', u'house')]]\n",
"\n",
"\n",
"hard rock\n",
"[[(u'hard rock', u'house')]]\n",
"\n",
"\n",
"hard rock \n",
"[[(u'hard rock', u'house')]]\n",
"\n",
"\n",
"hard rock c\n",
"[[(u'hard rock centre', u'house')], [(u'hard rock center', u'house')], [(u'hard rock central', u'house')]]\n",
"\n",
"\n",
"hard rock ca\n",
"[[(u'hard rock california', u'house')], [(u'hard rock', u'house'), (u'ca', u'country')]]\n",
"\n",
"\n",
"hard rock caf\n",
"[[(u'hard rock caf', u'house')]]\n",
"\n",
"\n",
"hard rock cafe\n",
"[[(u'hard rock cafe', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: exact_matches\n",
"TEST: 100 20th street\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"10\n",
"[[(u'10', u'house_number')]]\n",
"\n",
"\n",
"100\n",
"[[(u'100', u'house_number')]]\n",
"\n",
"\n",
"100 \n",
"[[(u'100', u'house_number')]]\n",
"\n",
"\n",
"100 2\n",
"[[(u'100 2', u'house_number')]]\n",
"\n",
"\n",
"100 20\n",
"[[(u'100 20', u'postcode')]]\n",
"\n",
"\n",
"100 20t\n",
"[[(u'100 20t', u'house_number')]]\n",
"\n",
"\n",
"100 20th\n",
"[[(u'100', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"100 20th \n",
"[[(u'100', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"100 20th s\n",
"[[(u'100', u'house_number'), (u'20th san', u'road')], [(u'100', u'house_number'), (u'20th south', u'road')], [(u'100', u'house_number'), (u'20th s', u'road')]]\n",
"\n",
"\n",
"100 20th st\n",
"[[(u'100', u'house_number'), (u'20th saint', u'road')], [(u'100', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"100 20th str\n",
"[[(u'100', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"100 20th stre\n",
"[[(u'100', u'house_number'), (u'20th stre', u'road')]]\n",
"\n",
"\n",
"100 20th stree\n",
"[[(u'100', u'house_number'), (u'20th stree', u'road')]]\n",
"\n",
"\n",
"100 20th street\n",
"[[(u'100', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 40 20th street\n",
"4\n",
"[[(u'4', u'house_number')]]\n",
"\n",
"\n",
"40\n",
"[[(u'40', u'house_number')]]\n",
"\n",
"\n",
"40 \n",
"[[(u'40', u'house_number')]]\n",
"\n",
"\n",
"40 2\n",
"[[(u'40', u'road'), (u'2', u'house_number')]]\n",
"\n",
"\n",
"40 20\n",
"[[(u'40', u'house'), (u'20', u'house_number')]]\n",
"\n",
"\n",
"40 20t\n",
"[[(u'40', u'road'), (u'20t', u'house_number')]]\n",
"\n",
"\n",
"40 20th\n",
"[[(u'40', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"40 20th \n",
"[[(u'40', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"40 20th s\n",
"[[(u'40', u'house_number'), (u'20th san', u'road')], [(u'40', u'house_number'), (u'20th south', u'road')], [(u'40', u'house_number'), (u'20th s', u'road')]]\n",
"\n",
"\n",
"40 20th st\n",
"[[(u'40', u'house_number'), (u'20th saint', u'road')], [(u'40', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"40 20th str\n",
"[[(u'40', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"40 20th stre\n",
"[[(u'40', u'house_number'), (u'20th stre', u'road')]]\n",
"\n",
"\n",
"40 20th stree\n",
"[[(u'40', u'house_number'), (u'20th stree', u'road')]]\n",
"\n",
"\n",
"40 20th street\n",
"[[(u'40', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 120 42nd Street\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"12\n",
"[[(u'12', u'house_number')]]\n",
"\n",
"\n",
"120\n",
"[[(u'120', u'house_number')]]\n",
"\n",
"\n",
"120 \n",
"[[(u'120', u'house_number')]]\n",
"\n",
"\n",
"120 4\n",
"[[(u'120 4', u'house_number')]]\n",
"\n",
"\n",
"120 42\n",
"[[(u'120', u'road'), (u'42', u'house_number')]]\n",
"\n",
"\n",
"120 42n\n",
"[[(u'120 42n', u'house_number')]]\n",
"\n",
"\n",
"120 42nd\n",
"[[(u'120', u'house_number'), (u'42nd', u'road')]]\n",
"\n",
"\n",
"120 42nd \n",
"[[(u'120', u'house_number'), (u'42nd', u'road')]]\n",
"\n",
"\n",
"120 42nd S\n",
"[[(u'120', u'house_number'), (u'42nd san', u'road')], [(u'120', u'house_number'), (u'42nd south', u'road')], [(u'120', u'house_number'), (u'42nd s', u'road')]]\n",
"\n",
"\n",
"120 42nd St\n",
"[[(u'120', u'house_number'), (u'42nd saint', u'road')], [(u'120', u'house_number'), (u'42nd street', u'road')]]\n",
"\n",
"\n",
"120 42nd Str\n",
"[[(u'120', u'house_number'), (u'42nd street', u'road')]]\n",
"\n",
"\n",
"120 42nd Stre\n",
"[[(u'120', u'house_number'), (u'42nd stre', u'road')]]\n",
"\n",
"\n",
"120 42nd Stree\n",
"[[(u'120', u'house_number'), (u'42nd stree', u'road')]]\n",
"\n",
"\n",
"120 42nd Street\n",
"[[(u'120', u'house_number'), (u'42nd street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 358 41st street, ny\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"35\n",
"[[(u'35', u'house_number')]]\n",
"\n",
"\n",
"358\n",
"[[(u'358', u'house_number')]]\n",
"\n",
"\n",
"358 \n",
"[[(u'358', u'house_number')]]\n",
"\n",
"\n",
"358 4\n",
"[[(u'358 4', u'house_number')]]\n",
"\n",
"\n",
"358 41\n",
"[[(u'358 41', u'postcode')]]\n",
"\n",
"\n",
"358 41s\n",
"[[(u'358', u'house_number'), (u'41s', u'road')]]\n",
"\n",
"\n",
"358 41st\n",
"[[(u'358', u'house_number'), (u'41st', u'road')]]\n",
"\n",
"\n",
"358 41st \n",
"[[(u'358', u'house_number'), (u'41st', u'road')]]\n",
"\n",
"\n",
"358 41st s\n",
"[[(u'358', u'house_number'), (u'41st san', u'road')], [(u'358', u'house_number'), (u'41st south', u'road')], [(u'358', u'house_number'), (u'41st s', u'road')]]\n",
"\n",
"\n",
"358 41st st\n",
"[[(u'358', u'house_number'), (u'41st saint', u'road')], [(u'358', u'house_number'), (u'41st street', u'road')]]\n",
"\n",
"\n",
"358 41st str\n",
"[[(u'358', u'house_number'), (u'41st street', u'road')]]\n",
"\n",
"\n",
"358 41st stre\n",
"[[(u'358', u'house_number'), (u'41st stre', u'road')]]\n",
"\n",
"\n",
"358 41st stree\n",
"[[(u'358', u'house_number'), (u'41st stree', u'road')]]\n",
"\n",
"\n",
"358 41st street\n",
"[[(u'358', u'house_number'), (u'41st street', u'road')]]\n",
"\n",
"\n",
"358 41st street,\n",
"[[(u'358', u'house_number'), (u'41st street', u'road')]]\n",
"\n",
"\n",
"358 41st street, \n",
"[[(u'358', u'house_number'), (u'41st street', u'road')]]\n",
"\n",
"\n",
"358 41st street, n\n",
"[[(u'358', u'house_number'), (u'41st street north', u'road')], [(u'358', u'house_number'), (u'41st street n', u'road')]]\n",
"\n",
"\n",
"358 41st street, ny\n",
"[[(u'358', u'house_number'), (u'41st street', u'road'), (u'new york', u'state')], [(u'358', u'house_number'), (u'41st street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1359 54 street, ny\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"13\n",
"[[(u'13', u'house_number')]]\n",
"\n",
"\n",
"135\n",
"[[(u'135', u'house_number')]]\n",
"\n",
"\n",
"1359\n",
"[[(u'1359', u'house_number')]]\n",
"\n",
"\n",
"1359 \n",
"[[(u'1359', u'house_number')]]\n",
"\n",
"\n",
"1359 5\n",
"[[(u'1359 5', u'house_number')]]\n",
"\n",
"\n",
"1359 54\n",
"[[(u'1359', u'house_number'), (u'54', u'road')]]\n",
"\n",
"\n",
"1359 54 \n",
"[[(u'1359', u'house_number'), (u'54', u'road')]]\n",
"\n",
"\n",
"1359 54 s\n",
"[[(u'1359', u'house_number'), (u'54 san', u'road')], [(u'1359', u'house_number'), (u'54 south', u'road')], [(u'1359', u'house_number'), (u'54 s', u'road')]]\n",
"\n",
"\n",
"1359 54 st\n",
"[[(u'1359', u'house_number'), (u'54 saint', u'road')], [(u'1359', u'house_number'), (u'54 street', u'road')]]\n",
"\n",
"\n",
"1359 54 str\n",
"[[(u'1359', u'house_number'), (u'54 street', u'road')]]\n",
"\n",
"\n",
"1359 54 stre\n",
"[[(u'1359', u'house_number'), (u'54 stre', u'road')]]\n",
"\n",
"\n",
"1359 54 stree\n",
"[[(u'1359', u'house_number'), (u'54 stree', u'road')]]\n",
"\n",
"\n",
"1359 54 street\n",
"[[(u'1359', u'house_number'), (u'54 street', u'road')]]\n",
"\n",
"\n",
"1359 54 street,\n",
"[[(u'1359', u'house_number'), (u'54 street', u'road')]]\n",
"\n",
"\n",
"1359 54 street, \n",
"[[(u'1359', u'house_number'), (u'54 street', u'road')]]\n",
"\n",
"\n",
"1359 54 street, n\n",
"[[(u'1359', u'house_number'), (u'54 street north', u'road')], [(u'1359', u'house_number'), (u'54 street n', u'road')]]\n",
"\n",
"\n",
"1359 54 street, ny\n",
"[[(u'1359', u'house_number'), (u'54 street', u'road'), (u'new york', u'state')], [(u'1359', u'house_number'), (u'54 street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 310 7 street, ny\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"31\n",
"[[(u'31', u'house_number')]]\n",
"\n",
"\n",
"310\n",
"[[(u'310', u'house_number')]]\n",
"\n",
"\n",
"310 \n",
"[[(u'310', u'house_number')]]\n",
"\n",
"\n",
"310 7\n",
"[[(u'310 7', u'house_number')]]\n",
"\n",
"\n",
"310 7 \n",
"[[(u'310 7', u'house_number')]]\n",
"\n",
"\n",
"310 7 s\n",
"[[(u'310 7', u'house_number'), (u'san', u'road')], [(u'310 7', u'house_number'), (u'south', u'road')], [(u'310 7', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"310 7 st\n",
"[[(u'310 7', u'house_number'), (u'saint', u'road')], [(u'310', u'house_number'), (u'7 street', u'road')]]\n",
"\n",
"\n",
"310 7 str\n",
"[[(u'310', u'house_number'), (u'7 street', u'road')]]\n",
"\n",
"\n",
"310 7 stre\n",
"[[(u'310 7', u'house_number'), (u'stre', u'road')]]\n",
"\n",
"\n",
"310 7 stree\n",
"[[(u'310 7', u'house_number'), (u'stree', u'road')]]\n",
"\n",
"\n",
"310 7 street\n",
"[[(u'310', u'house_number'), (u'7 street', u'road')]]\n",
"\n",
"\n",
"310 7 street,\n",
"[[(u'310', u'house_number'), (u'7 street', u'road')]]\n",
"\n",
"\n",
"310 7 street, \n",
"[[(u'310', u'house_number'), (u'7 street', u'road')]]\n",
"\n",
"\n",
"310 7 street, n\n",
"[[(u'310', u'house_number'), (u'7 street north', u'road')], [(u'310', u'house_number'), (u'7 street n', u'road')]]\n",
"\n",
"\n",
"310 7 street, ny\n",
"[[(u'310', u'house_number'), (u'7 street', u'road'), (u'new york', u'state')], [(u'310', u'house_number'), (u'7 street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 921 83 street, ny\n",
"9\n",
"[[(u'9', u'house_number')]]\n",
"\n",
"\n",
"92\n",
"[[(u'92', u'house_number')]]\n",
"\n",
"\n",
"921\n",
"[[(u'921', u'house_number')]]\n",
"\n",
"\n",
"921 \n",
"[[(u'921', u'house_number')]]\n",
"\n",
"\n",
"921 8\n",
"[[(u'921 8', u'house_number')]]\n",
"\n",
"\n",
"921 83\n",
"[[(u'921', u'road'), (u'83', u'house_number')]]\n",
"\n",
"\n",
"921 83 \n",
"[[(u'921', u'road'), (u'83', u'house_number')]]\n",
"\n",
"\n",
"921 83 s\n",
"[[(u'921', u'road'), (u'83', u'house_number'), (u'san', u'road')], [(u'921', u'road'), (u'83', u'house_number'), (u'south', u'road')], [(u'921', u'road'), (u'83', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"921 83 st\n",
"[[(u'921', u'road'), (u'83', u'house_number'), (u'saint', u'road')], [(u'921 83 street', u'road')]]\n",
"\n",
"\n",
"921 83 str\n",
"[[(u'921 83 street', u'road')]]\n",
"\n",
"\n",
"921 83 stre\n",
"[[(u'921', u'road'), (u'83', u'house_number'), (u'stre', u'road')]]\n",
"\n",
"\n",
"921 83 stree\n",
"[[(u'921', u'road'), (u'83', u'house_number'), (u'stree', u'road')]]\n",
"\n",
"\n",
"921 83 street\n",
"[[(u'921 83 street', u'road')]]\n",
"\n",
"\n",
"921 83 street,\n",
"[[(u'921 83 street', u'road')]]\n",
"\n",
"\n",
"921 83 street, \n",
"[[(u'921 83 street', u'road')]]\n",
"\n",
"\n",
"921 83 street, n\n",
"[[(u'921 83 street north', u'road')], [(u'921 83 street n', u'road')]]\n",
"\n",
"\n",
"921 83 street, ny\n",
"[[(u'921 83 street', u'road'), (u'new york', u'state')], [(u'921 83 street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 518 3 street, ny\n",
"5\n",
"[[(u'5', u'house_number')]]\n",
"\n",
"\n",
"51\n",
"[[(u'51', u'house_number')]]\n",
"\n",
"\n",
"518\n",
"[[(u'518', u'house_number')]]\n",
"\n",
"\n",
"518 \n",
"[[(u'518', u'house_number')]]\n",
"\n",
"\n",
"518 3\n",
"[[(u'518 3', u'house_number')]]\n",
"\n",
"\n",
"518 3 \n",
"[[(u'518 3', u'house_number')]]\n",
"\n",
"\n",
"518 3 s\n",
"[[(u'518 3', u'house_number'), (u'san', u'road')], [(u'518 3', u'house_number'), (u'south', u'road')], [(u'518 3', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"518 3 st\n",
"[[(u'518 3', u'house_number'), (u'saint', u'road')], [(u'518', u'house_number'), (u'3 street', u'road')]]\n",
"\n",
"\n",
"518 3 str\n",
"[[(u'518', u'house_number'), (u'3 street', u'road')]]\n",
"\n",
"\n",
"518 3 stre\n",
"[[(u'518 3', u'house_number'), (u'stre', u'road')]]\n",
"\n",
"\n",
"518 3 stree\n",
"[[(u'518 3', u'house_number'), (u'stree', u'road')]]\n",
"\n",
"\n",
"518 3 street\n",
"[[(u'518', u'house_number'), (u'3 street', u'road')]]\n",
"\n",
"\n",
"518 3 street,\n",
"[[(u'518', u'house_number'), (u'3 street', u'road')]]\n",
"\n",
"\n",
"518 3 street, \n",
"[[(u'518', u'house_number'), (u'3 street', u'road')]]\n",
"\n",
"\n",
"518 3 street, n\n",
"[[(u'518', u'house_number'), (u'3 street north', u'road')], [(u'518', u'house_number'), (u'3 street n', u'road')]]\n",
"\n",
"\n",
"518 3 street, ny\n",
"[[(u'518', u'house_number'), (u'3 street', u'road'), (u'new york', u'state')], [(u'518', u'house_number'), (u'3 street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: landmarks\n",
"TEST: statue of liberty\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"st\n",
"[[(u'saint', u'house')], [(u'street', u'road')]]\n",
"\n",
"\n",
"sta\n",
"[[(u'station', u'house')], [(u'santa', u'road')]]\n",
"\n",
"\n",
"stat\n",
"[[(u'stat', u'house')]]\n",
"\n",
"\n",
"statu\n",
"[[(u'statu', u'house')]]\n",
"\n",
"\n",
"statue\n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue \n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue o\n",
"[[(u'statue o', u'house')]]\n",
"\n",
"\n",
"statue of\n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of \n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of l\n",
"[[(u'statue of level', u'house')]]\n",
"\n",
"\n",
"statue of li\n",
"[[(u'statue of li', u'house')], [(u'statue of 51', u'house')]]\n",
"\n",
"\n",
"statue of lib\n",
"[[(u'statue of lib', u'house')]]\n",
"\n",
"\n",
"statue of libe\n",
"[[(u'statue of libe', u'house')]]\n",
"\n",
"\n",
"statue of liber\n",
"[[(u'statue of liber', u'house')]]\n",
"\n",
"\n",
"statue of libert\n",
"[[(u'statue of libert', u'house')]]\n",
"\n",
"\n",
"statue of liberty\n",
"[[(u'statue of liberty', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: search_coarse\n",
"TEST: brooklyn\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"br\n",
"[[(u'brae', u'road')], [(u'brother', u'house')], [(u'brace', u'house')], [(u'bridge', u'house')], [(u'branch', u'house')]]\n",
"\n",
"\n",
"bro\n",
"[[(u'bro', u'road')]]\n",
"\n",
"\n",
"broo\n",
"[[(u'broo', u'house')]]\n",
"\n",
"\n",
"brook\n",
"[[(u'brook', u'house')]]\n",
"\n",
"\n",
"brookl\n",
"[[(u'brookl', u'house')]]\n",
"\n",
"\n",
"brookly\n",
"[[(u'brookly', u'house')]]\n",
"\n",
"\n",
"brooklyn\n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new york\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new y\n",
"[[(u'new y', u'house')]]\n",
"\n",
"\n",
"new yo\n",
"[[(u'new yo', u'house')]]\n",
"\n",
"\n",
"new yor\n",
"[[(u'new yor', u'house')]]\n",
"\n",
"\n",
"new york\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: address_matching\n",
"TEST: 30 w 26 st\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"30\n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 \n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 w\n",
"[[(u'30', u'house_number'), (u'west', u'road')], [(u'30', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"30 w \n",
"[[(u'30', u'house_number'), (u'west', u'road')], [(u'30', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"30 w 2\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'2', u'house_number')], [(u'30', u'house_number'), (u'w', u'road'), (u'2', u'house_number')]]\n",
"\n",
"\n",
"30 w 26\n",
"[[(u'30', u'house_number'), (u'west 26', u'road')], [(u'30', u'house_number'), (u'w 26', u'road')]]\n",
"\n",
"\n",
"30 w 26 \n",
"[[(u'30', u'house_number'), (u'west 26', u'road')], [(u'30', u'house_number'), (u'w 26', u'road')]]\n",
"\n",
"\n",
"30 w 26 s\n",
"[[(u'30', u'house_number'), (u'west 26', u'road'), (u'san', u'city')], [(u'30', u'house_number'), (u'west 26 south', u'road')], [(u'30', u'house_number'), (u'west 26 s', u'road')], [(u'30', u'house_number'), (u'w 26', u'road'), (u'san', u'city')], [(u'30', u'house_number'), (u'w 26 south', u'road')], [(u'30', u'house_number'), (u'w 26 s', u'road')]]\n",
"\n",
"\n",
"30 w 26 st\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'26', u'house_number'), (u'saint', u'road')], [(u'30', u'house_number'), (u'west 26 street', u'road')], [(u'30', u'house_number'), (u'w', u'road'), (u'26', u'house_number'), (u'saint', u'road')], [(u'30', u'house_number'), (u'w 26 street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 507 hackney rd\n",
"5\n",
"[[(u'5', u'house_number')]]\n",
"\n",
"\n",
"50\n",
"[[(u'50', u'house_number')]]\n",
"\n",
"\n",
"507\n",
"[[(u'507', u'house_number')]]\n",
"\n",
"\n",
"507 \n",
"[[(u'507', u'house_number')]]\n",
"\n",
"\n",
"507 h\n",
"[[(u'507 h', u'house_number')]]\n",
"\n",
"\n",
"507 ha\n",
"[[(u'507', u'house_number'), (u'ha', u'road')]]\n",
"\n",
"\n",
"507 hac\n",
"[[(u'507', u'house_number'), (u'hac', u'road')]]\n",
"\n",
"\n",
"507 hack\n",
"[[(u'507', u'house_number'), (u'hack', u'road')]]\n",
"\n",
"\n",
"507 hackn\n",
"[[(u'507', u'house_number'), (u'hackn', u'road')]]\n",
"\n",
"\n",
"507 hackne\n",
"[[(u'507', u'house_number'), (u'hackne', u'road')]]\n",
"\n",
"\n",
"507 hackney\n",
"[[(u'507', u'house_number'), (u'hackney', u'road')]]\n",
"\n",
"\n",
"507 hackney \n",
"[[(u'507', u'house_number'), (u'hackney', u'road')]]\n",
"\n",
"\n",
"507 hackney r\n",
"[[(u'507', u'house_number'), (u'hackney river', u'road')]]\n",
"\n",
"\n",
"507 hackney rd\n",
"[[(u'507', u'house_number'), (u'hackney road', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 49 Kay Street\n",
"4\n",
"[[(u'4', u'house_number')]]\n",
"\n",
"\n",
"49\n",
"[[(u'49', u'house_number')]]\n",
"\n",
"\n",
"49 \n",
"[[(u'49', u'house_number')]]\n",
"\n",
"\n",
"49 K\n",
"[[(u'49', u'house_number'), (u'k', u'road')]]\n",
"\n",
"\n",
"49 Ka\n",
"[[(u'49', u'house_number'), (u'ka', u'road')]]\n",
"\n",
"\n",
"49 Kay\n",
"[[(u'49', u'house_number'), (u'kay', u'road')]]\n",
"\n",
"\n",
"49 Kay \n",
"[[(u'49', u'house_number'), (u'kay', u'road')]]\n",
"\n",
"\n",
"49 Kay S\n",
"[[(u'49', u'house_number'), (u'kay san', u'road')], [(u'49', u'house_number'), (u'kay south', u'road')], [(u'49', u'house_number'), (u'kay s', u'road')]]\n",
"\n",
"\n",
"49 Kay St\n",
"[[(u'49', u'house_number'), (u'kay saint', u'road')], [(u'49', u'house_number'), (u'kay street', u'road')]]\n",
"\n",
"\n",
"49 Kay Str\n",
"[[(u'49', u'house_number'), (u'kay street', u'road')]]\n",
"\n",
"\n",
"49 Kay Stre\n",
"[[(u'49', u'house_number'), (u'kay stre', u'road')]]\n",
"\n",
"\n",
"49 Kay Stree\n",
"[[(u'49', u'house_number'), (u'kay stree', u'road')]]\n",
"\n",
"\n",
"49 Kay Street\n",
"[[(u'49', u'house_number'), (u'kay street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 339 W Main St, Cheshire, 06410\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"33\n",
"[[(u'33', u'house_number')]]\n",
"\n",
"\n",
"339\n",
"[[(u'339', u'house_number')]]\n",
"\n",
"\n",
"339 \n",
"[[(u'339', u'house_number')]]\n",
"\n",
"\n",
"339 W\n",
"[[(u'339', u'house_number'), (u'west', u'road')], [(u'339', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"339 W \n",
"[[(u'339', u'house_number'), (u'west', u'road')], [(u'339', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"339 W M\n",
"[[(u'339', u'house_number'), (u'west mail', u'road')], [(u'339', u'house_number'), (u'w mail', u'road')]]\n",
"\n",
"\n",
"339 W Ma\n",
"[[(u'339', u'house_number'), (u'west', u'road'), (u'massachusetts', u'state')], [(u'339', u'house_number'), (u'west', u'road'), (u'ma', u'state')], [(u'339', u'house_number'), (u'w', u'road'), (u'massachusetts', u'state')], [(u'339', u'house_number'), (u'w', u'road'), (u'ma', u'state')]]\n",
"\n",
"\n",
"339 W Mai\n",
"[[(u'339', u'house_number'), (u'west mai', u'road')], [(u'339', u'house_number'), (u'w mai', u'road')]]\n",
"\n",
"\n",
"339 W Main\n",
"[[(u'339', u'house_number'), (u'west main', u'road')], [(u'339', u'house_number'), (u'w main', u'road')]]\n",
"\n",
"\n",
"339 W Main \n",
"[[(u'339', u'house_number'), (u'west main', u'road')], [(u'339', u'house_number'), (u'w main', u'road')]]\n",
"\n",
"\n",
"339 W Main S\n",
"[[(u'339', u'house_number'), (u'west main', u'road'), (u'san', u'suburb')], [(u'339', u'house_number'), (u'west main south', u'road')], [(u'339', u'house_number'), (u'west main s', u'road')], [(u'339', u'house_number'), (u'w main', u'road'), (u'san', u'suburb')], [(u'339', u'house_number'), (u'w main south', u'road')], [(u'339', u'house_number'), (u'w main s', u'road')]]\n",
"\n",
"\n",
"339 W Main St\n",
"[[(u'339', u'house_number'), (u'west main saint', u'road')], [(u'339', u'house_number'), (u'west main street', u'road')], [(u'339', u'house_number'), (u'w main saint', u'road')], [(u'339', u'house_number'), (u'w main street', u'road')]]\n",
"\n",
"\n",
"339 W Main St,\n",
"[[(u'339', u'house_number'), (u'west main saint', u'road')], [(u'339', u'house_number'), (u'west main street', u'road')], [(u'339', u'house_number'), (u'w main saint', u'road')], [(u'339', u'house_number'), (u'w main street', u'road')]]\n",
"\n",
"\n",
"339 W Main St, \n",
"[[(u'339', u'house_number'), (u'west main saint', u'road')], [(u'339', u'house_number'), (u'west main street', u'road')], [(u'339', u'house_number'), (u'w main saint', u'road')], [(u'339', u'house_number'), (u'w main street', u'road')]]\n",
"\n",
"\n",
"339 W Main St, C\n",
"[[(u'339', u'house_number'), (u'west main saint', u'road'), (u'centre', u'house')], [(u'339', u'house_number'), (u'west main saint', u'road'), (u'center', u'house')], [(u'339', u'house_number'), (u'west main saint central', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'centre', u'suburb')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'center', u'suburb')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'central', u'suburb')], [(u'339', u'house_number'), (u'w main saint', u'road'), (u'centre', u'house')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'centre', u'suburb')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'center', u'suburb')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'central', u'suburb')]]\n",
"\n",
"\n",
"339 W Main St, Ch\n",
"[[(u'339', u'house_number'), (u'west main saint county highway', u'road')], [(u'339', u'house_number'), (u'west main saint chase', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'county highway', u'suburb')], [(u'339', u'house_number'), (u'west main street chase', u'road')], [(u'339', u'house_number'), (u'w main saint county highway', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'county highway', u'suburb')], [(u'339', u'house_number'), (u'w main street chase', u'road')]]\n",
"\n",
"\n",
"339 W Main St, Che\n",
"[[(u'339', u'house_number'), (u'west main', u'road'), (u'saint', u'city'), (u'che', u'country')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'che', u'country')], [(u'339', u'house_number'), (u'w main', u'road'), (u'saint', u'city'), (u'che', u'country')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'che', u'country')]]\n",
"\n",
"\n",
"339 W Main St, Ches\n",
"[[(u'339', u'house_number'), (u'west main saint ches', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'ches', u'city')], [(u'339', u'house_number'), (u'w main saint ches', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'ches', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Chesh\n",
"[[(u'339', u'house_number'), (u'west main saint chesh', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'chesh', u'city')], [(u'339', u'house_number'), (u'w main saint chesh', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'chesh', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshi\n",
"[[(u'339', u'house_number'), (u'west main saint cheshi', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshi', u'city')], [(u'339', u'house_number'), (u'w main saint cheshi', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshi', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshir\n",
"[[(u'339', u'house_number'), (u'west main saint cheshir', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshir', u'city')], [(u'339', u'house_number'), (u'w main saint cheshir', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshir', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshire', u'city')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshire', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire,\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshire', u'city')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshire', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, \n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshire', u'city')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshire', u'city')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, 0\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'0', u'house_number')], [(u'339', u'house_number'), (u'west main street cheshire', u'road'), (u'0', u'house_number')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'0', u'house_number')], [(u'339', u'house_number'), (u'w main street cheshire', u'road'), (u'0', u'house_number')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, 06\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'06', u'house_number')], [(u'339', u'house_number'), (u'west main street cheshire', u'road'), (u'06', u'house_number')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'06', u'house_number')], [(u'339', u'house_number'), (u'w main street cheshire', u'road'), (u'06', u'house_number')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, 064\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'064', u'house_number')], [(u'339', u'house_number'), (u'west main street cheshire', u'road'), (u'064', u'house_number')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'064', u'house_number')], [(u'339', u'house_number'), (u'w main street cheshire', u'road'), (u'064', u'house_number')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, 0641\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'0641', u'postcode')], [(u'339', u'house_number'), (u'west main street cheshire', u'road'), (u'0641', u'postcode')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'0641', u'postcode')], [(u'339', u'house_number'), (u'w main street cheshire', u'road'), (u'0641', u'postcode')]]\n",
"\n",
"\n",
"339 W Main St, Cheshire, 06410\n",
"[[(u'339', u'house_number'), (u'west main saint cheshire', u'road'), (u'06410', u'postcode')], [(u'339', u'house_number'), (u'west main street', u'road'), (u'cheshire', u'city'), (u'06410', u'postcode')], [(u'339', u'house_number'), (u'w main saint cheshire', u'road'), (u'06410', u'postcode')], [(u'339', u'house_number'), (u'w main street', u'road'), (u'cheshire', u'city'), (u'06410', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: confidence_score\n",
"TEST: 1 West 72nd St, New York, NY, 10023\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 W\n",
"[[(u'1', u'house_number'), (u'west', u'road')], [(u'1', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"1 We\n",
"[[(u'1', u'house_number'), (u'warehouse', u'road')]]\n",
"\n",
"\n",
"1 Wes\n",
"[[(u'1', u'house_number'), (u'wes', u'road')]]\n",
"\n",
"\n",
"1 West\n",
"[[(u'1', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"1 West \n",
"[[(u'1', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"1 West 7\n",
"[[(u'1', u'house_number'), (u'west', u'road'), (u'7', u'house_number')]]\n",
"\n",
"\n",
"1 West 72\n",
"[[(u'1', u'house_number'), (u'west 72', u'road')]]\n",
"\n",
"\n",
"1 West 72n\n",
"[[(u'1', u'house_number'), (u'west', u'road'), (u'72n', u'house_number')]]\n",
"\n",
"\n",
"1 West 72nd\n",
"[[(u'1', u'house_number'), (u'west 72nd', u'road')]]\n",
"\n",
"\n",
"1 West 72nd \n",
"[[(u'1', u'house_number'), (u'west 72nd', u'road')]]\n",
"\n",
"\n",
"1 West 72nd S\n",
"[[(u'1', u'house_number'), (u'west 72nd san', u'road')], [(u'1', u'house_number'), (u'west 72nd south', u'road')], [(u'1', u'house_number'), (u'west 72nd s', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St,\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St, \n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St, N\n",
"[[(u'1', u'house_number'), (u'west 72nd saint north', u'road')], [(u'1', u'house_number'), (u'west 72nd saint n', u'road')], [(u'1', u'house_number'), (u'west 72nd street north', u'road')], [(u'1', u'house_number'), (u'west 72nd street n', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St, Ne\n",
"[[(u'1', u'house_number'), (u'west 72nd saint ne', u'road')], [(u'1', u'house_number'), (u'west 72nd saint northeast', u'road')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'nebraska', u'state')], [(u'1', u'house_number'), (u'west 72nd street ne', u'road')], [(u'1', u'house_number'), (u'west 72nd street northeast', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New\n",
"[[(u'1', u'house_number'), (u'west 72nd saint new', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"1 West 72nd St, New \n",
"[[(u'1', u'house_number'), (u'west 72nd saint new', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"1 West 72nd St, New Y\n",
"[[(u'1', u'house_number'), (u'west 72nd saint new y', u'road')], [(u'1', u'house_number'), (u'west 72nd street new y', u'road')]]\n",
"\n",
"\n",
"1 West 72nd St, New Yo\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new yo', u'house')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new yo', u'suburb')]]\n",
"\n",
"\n",
"1 West 72nd St, New Yor\n",
"[[(u'1', u'house_number'), (u'west 72nd saint new', u'road'), (u'yor', u'suburb')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new yor', u'suburb')]]\n",
"\n",
"\n",
"1 West 72nd St, New York\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York,\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, \n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, N\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new', u'state'), (u'york', u'house'), (u'north', u'road')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'state'), (u'n', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'state'), (u'north', u'road')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'state'), (u'n', u'postcode')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY,\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, \n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, 1\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, 10\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10', u'postcode')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10', u'postcode')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, 100\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'100', u'postcode')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new', u'city'), (u'york', u'house'), (u'ny', u'state'), (u'100', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'100', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'100', u'postcode')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, 1002\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1002', u'postcode')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1002', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1002', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1002', u'postcode')]]\n",
"\n",
"\n",
"1 West 72nd St, New York, NY, 10023\n",
"[[(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10023', u'postcode')], [(u'1', u'house_number'), (u'west 72nd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10023', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'10023', u'postcode')], [(u'1', u'house_number'), (u'west 72nd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'10023', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: address_parsing\n",
"TEST: 101 saint mark pl 10009\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"10\n",
"[[(u'10', u'house_number')]]\n",
"\n",
"\n",
"101\n",
"[[(u'101', u'house_number')]]\n",
"\n",
"\n",
"101 \n",
"[[(u'101', u'house_number')]]\n",
"\n",
"\n",
"101 s\n",
"[[(u'101', u'house_number'), (u'san', u'road')], [(u'101', u'house_number'), (u'south', u'road')], [(u'101', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"101 sa\n",
"[[(u'101', u'house_number'), (u'southern', u'road'), (u'australia', u'country')]]\n",
"\n",
"\n",
"101 sai\n",
"[[(u'101', u'house_number'), (u'sai', u'road')]]\n",
"\n",
"\n",
"101 sain\n",
"[[(u'101', u'house_number'), (u'sain', u'road')]]\n",
"\n",
"\n",
"101 saint\n",
"[[(u'101', u'house_number'), (u'saint', u'road')]]\n",
"\n",
"\n",
"101 saint \n",
"[[(u'101', u'house_number'), (u'saint', u'road')]]\n",
"\n",
"\n",
"101 saint m\n",
"[[(u'101', u'house_number'), (u'saint mail', u'road')]]\n",
"\n",
"\n",
"101 saint ma\n",
"[[(u'101', u'house_number'), (u'saint', u'road'), (u'massachusetts', u'state')], [(u'101', u'house_number'), (u'saint ma', u'road')]]\n",
"\n",
"\n",
"101 saint mar\n",
"[[(u'101', u'house_number'), (u'saint mar', u'road')]]\n",
"\n",
"\n",
"101 saint mark\n",
"[[(u'101', u'house_number'), (u'saint mark', u'road')]]\n",
"\n",
"\n",
"101 saint mark \n",
"[[(u'101', u'house_number'), (u'saint mark', u'road')]]\n",
"\n",
"\n",
"101 saint mark p\n",
"[[(u'101', u'house_number'), (u'saint mark p', u'road')]]\n",
"\n",
"\n",
"101 saint mark pl\n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road')], [(u'101', u'house_number'), (u'saint mark', u'road'), (u'plain', u'city')]]\n",
"\n",
"\n",
"101 saint mark pl \n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road')], [(u'101', u'house_number'), (u'saint mark', u'road'), (u'plain', u'city')]]\n",
"\n",
"\n",
"101 saint mark pl 1\n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road'), (u'1', u'house_number')], [(u'101', u'house_number'), (u'saint mark plain', u'road'), (u'1', u'house_number')]]\n",
"\n",
"\n",
"101 saint mark pl 10\n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road'), (u'10', u'house_number')], [(u'101', u'house_number'), (u'saint mark plain', u'road'), (u'10', u'house_number')]]\n",
"\n",
"\n",
"101 saint mark pl 100\n",
"[[(u'101', u'house_number'), (u'saint mark', u'road'), (u'place', u'house'), (u'100', u'house_number')], [(u'101', u'house_number'), (u'saint mark plain', u'road'), (u'100', u'house_number')]]\n",
"\n",
"\n",
"101 saint mark pl 1000\n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road'), (u'1000', u'postcode')], [(u'101', u'house_number'), (u'saint mark', u'road'), (u'plain', u'suburb'), (u'1000', u'postcode')]]\n",
"\n",
"\n",
"101 saint mark pl 10009\n",
"[[(u'101', u'house_number'), (u'saint mark place', u'road'), (u'10009', u'postcode')], [(u'101', u'house_number'), (u'saint mark plain', u'road'), (u'10009', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 water st manhattan ny\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 w\n",
"[[(u'1', u'house_number'), (u'west', u'road')], [(u'1', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"1 wa\n",
"[[(u'1', u'house_number'), (u'wa', u'state')], [(u'1', u'house_number'), (u'washington', u'road')], [(u'1', u'house_number'), (u'western australia', u'state')]]\n",
"\n",
"\n",
"1 wat\n",
"[[(u'1', u'house_number'), (u'wat', u'road')]]\n",
"\n",
"\n",
"1 wate\n",
"[[(u'1', u'house_number'), (u'wate', u'road')]]\n",
"\n",
"\n",
"1 water\n",
"[[(u'1', u'house_number'), (u'water', u'road')]]\n",
"\n",
"\n",
"1 water \n",
"[[(u'1', u'house_number'), (u'water', u'road')]]\n",
"\n",
"\n",
"1 water s\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 water st\n",
"[[(u'1', u'house_number'), (u'water saint', u'road')], [(u'1', u'house_number'), (u'water street', u'road')]]\n",
"\n",
"\n",
"1 water st \n",
"[[(u'1', u'house_number'), (u'water saint', u'road')], [(u'1', u'house_number'), (u'water street', u'road')]]\n",
"\n",
"\n",
"1 water st m\n",
"[[(u'1', u'house_number'), (u'water saint mail', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'mail', u'suburb')]]\n",
"\n",
"\n",
"1 water st ma\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'ma', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'ma', u'state')]]\n",
"\n",
"\n",
"1 water st man\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'man', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'man', u'city')]]\n",
"\n",
"\n",
"1 water st manh\n",
"[[(u'1', u'house_number'), (u'water saint manh', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manh', u'city')]]\n",
"\n",
"\n",
"1 water st manha\n",
"[[(u'1', u'house_number'), (u'water saint manha', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manha', u'city')]]\n",
"\n",
"\n",
"1 water st manhat\n",
"[[(u'1', u'house_number'), (u'water saint manhat', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhat', u'city')]]\n",
"\n",
"\n",
"1 water st manhatt\n",
"[[(u'1', u'house_number'), (u'water saint manhatt', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhatt', u'city')]]\n",
"\n",
"\n",
"1 water st manhatta\n",
"[[(u'1', u'house_number'), (u'water saint manhatta', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhatta', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan \n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan n\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan north', u'city')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city'), (u'n', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan north', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city'), (u'n', u'road')]]\n",
"\n",
"\n",
"1 water st manhattan ny\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 water st manhattan ny\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 w\n",
"[[(u'1', u'house_number'), (u'west', u'road')], [(u'1', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"1 wa\n",
"[[(u'1', u'house_number'), (u'wa', u'state')], [(u'1', u'house_number'), (u'washington', u'road')], [(u'1', u'house_number'), (u'western australia', u'state')]]\n",
"\n",
"\n",
"1 wat\n",
"[[(u'1', u'house_number'), (u'wat', u'road')]]\n",
"\n",
"\n",
"1 wate\n",
"[[(u'1', u'house_number'), (u'wate', u'road')]]\n",
"\n",
"\n",
"1 water\n",
"[[(u'1', u'house_number'), (u'water', u'road')]]\n",
"\n",
"\n",
"1 water \n",
"[[(u'1', u'house_number'), (u'water', u'road')]]\n",
"\n",
"\n",
"1 water s\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 water st\n",
"[[(u'1', u'house_number'), (u'water saint', u'road')], [(u'1', u'house_number'), (u'water street', u'road')]]\n",
"\n",
"\n",
"1 water st \n",
"[[(u'1', u'house_number'), (u'water saint', u'road')], [(u'1', u'house_number'), (u'water street', u'road')]]\n",
"\n",
"\n",
"1 water st m\n",
"[[(u'1', u'house_number'), (u'water saint mail', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'mail', u'suburb')]]\n",
"\n",
"\n",
"1 water st ma\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'ma', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'ma', u'state')]]\n",
"\n",
"\n",
"1 water st man\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'man', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'man', u'city')]]\n",
"\n",
"\n",
"1 water st manh\n",
"[[(u'1', u'house_number'), (u'water saint manh', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manh', u'city')]]\n",
"\n",
"\n",
"1 water st manha\n",
"[[(u'1', u'house_number'), (u'water saint manha', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manha', u'city')]]\n",
"\n",
"\n",
"1 water st manhat\n",
"[[(u'1', u'house_number'), (u'water saint manhat', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhat', u'city')]]\n",
"\n",
"\n",
"1 water st manhatt\n",
"[[(u'1', u'house_number'), (u'water saint manhatt', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhatt', u'city')]]\n",
"\n",
"\n",
"1 water st manhatta\n",
"[[(u'1', u'house_number'), (u'water saint manhatta', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhatta', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan \n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city')]]\n",
"\n",
"\n",
"1 water st manhattan n\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan north', u'city')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city'), (u'n', u'road')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan north', u'city')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city'), (u'n', u'road')]]\n",
"\n",
"\n",
"1 water st manhattan ny\n",
"[[(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')], [(u'1', u'house_number'), (u'water saint', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'city_district'), (u'new york', u'state')], [(u'1', u'house_number'), (u'water street', u'road'), (u'manhattan', u'state_district'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 450 w 37th st, new york, ny 11232\n",
"4\n",
"[[(u'4', u'house_number')]]\n",
"\n",
"\n",
"45\n",
"[[(u'45', u'house_number')]]\n",
"\n",
"\n",
"450\n",
"[[(u'450', u'house_number')]]\n",
"\n",
"\n",
"450 \n",
"[[(u'450', u'house_number')]]\n",
"\n",
"\n",
"450 w\n",
"[[(u'450', u'house_number'), (u'west', u'road')], [(u'450', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"450 w \n",
"[[(u'450', u'house_number'), (u'west', u'road')], [(u'450', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"450 w 3\n",
"[[(u'450', u'house_number'), (u'west', u'road'), (u'3', u'house_number')], [(u'450', u'house_number'), (u'w', u'road'), (u'3', u'house_number')]]\n",
"\n",
"\n",
"450 w 37\n",
"[[(u'450', u'house_number'), (u'west 37', u'road')], [(u'450', u'house_number'), (u'w 37', u'road')]]\n",
"\n",
"\n",
"450 w 37t\n",
"[[(u'450', u'house_number'), (u'west', u'road'), (u'37t', u'house_number')], [(u'450', u'house_number'), (u'w', u'road'), (u'37t', u'house_number')]]\n",
"\n",
"\n",
"450 w 37th\n",
"[[(u'450', u'house_number'), (u'west 37th', u'road')], [(u'450', u'house_number'), (u'w 37th', u'road')]]\n",
"\n",
"\n",
"450 w 37th \n",
"[[(u'450', u'house_number'), (u'west 37th', u'road')], [(u'450', u'house_number'), (u'w 37th', u'road')]]\n",
"\n",
"\n",
"450 w 37th s\n",
"[[(u'450', u'house_number'), (u'west 37th san', u'road')], [(u'450', u'house_number'), (u'west 37th south', u'road')], [(u'450', u'house_number'), (u'west 37th s', u'road')], [(u'450', u'house_number'), (u'w 37th san', u'road')], [(u'450', u'house_number'), (u'w 37th south', u'road')], [(u'450', u'house_number'), (u'w 37th s', u'road')]]\n",
"\n",
"\n",
"450 w 37th st\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road')], [(u'450', u'house_number'), (u'w 37th saint', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road')]]\n",
"\n",
"\n",
"450 w 37th st,\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road')], [(u'450', u'house_number'), (u'w 37th saint', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road')]]\n",
"\n",
"\n",
"450 w 37th st, \n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road')], [(u'450', u'house_number'), (u'w 37th saint', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road')]]\n",
"\n",
"\n",
"450 w 37th st, n\n",
"[[(u'450', u'house_number'), (u'west 37th saint north', u'road')], [(u'450', u'house_number'), (u'west 37th saint n', u'road')], [(u'450', u'house_number'), (u'west 37th street north', u'road')], [(u'450', u'house_number'), (u'west 37th street n', u'road')], [(u'450', u'house_number'), (u'w 37th saint north', u'road')], [(u'450', u'house_number'), (u'w 37th street north', u'road')], [(u'450', u'house_number'), (u'w 37th street n', u'road')]]\n",
"\n",
"\n",
"450 w 37th st, ne\n",
"[[(u'450', u'house_number'), (u'west 37th saint ne', u'road')], [(u'450', u'house_number'), (u'west 37th saint northeast', u'road')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'nebraska', u'state')], [(u'450', u'house_number'), (u'west 37th street ne', u'road')], [(u'450', u'house_number'), (u'west 37th street northeast', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'nebraska', u'state')], [(u'450', u'house_number'), (u'w 37th saint ne', u'road')], [(u'450', u'house_number'), (u'w 37th street ne', u'road')], [(u'450', u'house_number'), (u'w 37th street northeast', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new\n",
"[[(u'450', u'house_number'), (u'west 37th saint new', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new', u'suburb')], [(u'450', u'house_number'), (u'w 37th saint new', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"450 w 37th st, new \n",
"[[(u'450', u'house_number'), (u'west 37th saint new', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new', u'suburb')], [(u'450', u'house_number'), (u'w 37th saint new', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"450 w 37th st, new y\n",
"[[(u'450', u'house_number'), (u'west 37th saint new y', u'road')], [(u'450', u'house_number'), (u'west 37th street new y', u'road')], [(u'450', u'house_number'), (u'w 37th saint new y', u'road')], [(u'450', u'house_number'), (u'w 37th street new y', u'road')]]\n",
"\n",
"\n",
"450 w 37th st, new yo\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new yo', u'house')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new yo', u'suburb')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new yo', u'house')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new yo', u'suburb')]]\n",
"\n",
"\n",
"450 w 37th st, new yor\n",
"[[(u'450', u'house_number'), (u'west 37th saint new', u'road'), (u'yor', u'suburb')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new yor', u'suburb')], [(u'450', u'house_number'), (u'w 37th saint new', u'road'), (u'yor', u'suburb')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new yor', u'suburb')]]\n",
"\n",
"\n",
"450 w 37th st, new york\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new york,\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new york, \n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new york, n\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new', u'state'), (u'york', u'house'), (u'north', u'road')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'state'), (u'n', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'state'), (u'north', u'road')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'state'), (u'n', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new', u'state'), (u'york', u'house'), (u'north', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'state'), (u'north', u'road')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'state'), (u'n', u'postcode')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny \n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny 1\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny 11\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11', u'postcode')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny 112\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new', u'city'), (u'york', u'house'), (u'ny', u'state'), (u'112', u'house_number')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'112', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'112', u'postcode')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny 1123\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1123', u'postcode')]]\n",
"\n",
"\n",
"450 w 37th st, new york, ny 11232\n",
"[[(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'west 37th saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'west 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'w 37th saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'450', u'house_number'), (u'w 37th street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: starbucks 10010\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"st\n",
"[[(u'saint', u'house')], [(u'street', u'road')]]\n",
"\n",
"\n",
"sta\n",
"[[(u'station', u'house')], [(u'santa', u'road')]]\n",
"\n",
"\n",
"star\n",
"[[(u'star', u'house')]]\n",
"\n",
"\n",
"starb\n",
"[[(u'starb', u'house')]]\n",
"\n",
"\n",
"starbu\n",
"[[(u'starbu', u'house')]]\n",
"\n",
"\n",
"starbuc\n",
"[[(u'starbuc', u'house')]]\n",
"\n",
"\n",
"starbuck\n",
"[[(u'starbuck', u'city')]]\n",
"\n",
"\n",
"starbucks\n",
"[[(u'starbucks', u'house')]]\n",
"\n",
"\n",
"starbucks \n",
"[[(u'starbucks', u'house')]]\n",
"\n",
"\n",
"starbucks 1\n",
"[[(u'starbucks', u'house'), (u'1', u'house_number')]]\n",
"\n",
"\n",
"starbucks 10\n",
"[[(u'starbucks', u'house'), (u'10', u'house_number')]]\n",
"\n",
"\n",
"starbucks 100\n",
"[[(u'starbucks', u'house'), (u'100', u'house_number')]]\n",
"\n",
"\n",
"starbucks 1001\n",
"[[(u'starbucks', u'house'), (u'1001', u'house_number')]]\n",
"\n",
"\n",
"starbucks 10010\n",
"[[(u'starbucks', u'house'), (u'10010', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 455 43rd st new york ny 11232\n",
"4\n",
"[[(u'4', u'house_number')]]\n",
"\n",
"\n",
"45\n",
"[[(u'45', u'house_number')]]\n",
"\n",
"\n",
"455\n",
"[[(u'455', u'house_number')]]\n",
"\n",
"\n",
"455 \n",
"[[(u'455', u'house_number')]]\n",
"\n",
"\n",
"455 4\n",
"[[(u'455 4', u'house_number')]]\n",
"\n",
"\n",
"455 43\n",
"[[(u'455', u'road'), (u'43', u'house_number')]]\n",
"\n",
"\n",
"455 43r\n",
"[[(u'455 43r', u'house_number')]]\n",
"\n",
"\n",
"455 43rd\n",
"[[(u'455', u'house_number'), (u'43rd', u'road')]]\n",
"\n",
"\n",
"455 43rd \n",
"[[(u'455', u'house_number'), (u'43rd', u'road')]]\n",
"\n",
"\n",
"455 43rd s\n",
"[[(u'455', u'house_number'), (u'43rd san', u'road')], [(u'455', u'house_number'), (u'43rd south', u'road')], [(u'455', u'house_number'), (u'43rd s', u'road')]]\n",
"\n",
"\n",
"455 43rd st\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road')]]\n",
"\n",
"\n",
"455 43rd st \n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road')]]\n",
"\n",
"\n",
"455 43rd st n\n",
"[[(u'455', u'house_number'), (u'43rd saint north', u'road')], [(u'455', u'house_number'), (u'43rd saint n', u'road')], [(u'455', u'house_number'), (u'43rd street north', u'road')], [(u'455', u'house_number'), (u'43rd street n', u'road')]]\n",
"\n",
"\n",
"455 43rd st ne\n",
"[[(u'455', u'house_number'), (u'43rd saint ne', u'road')], [(u'455', u'house_number'), (u'43rd saint northeast', u'road')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'nebraska', u'state')], [(u'455', u'house_number'), (u'43rd street ne', u'road')], [(u'455', u'house_number'), (u'43rd street northeast', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"455 43rd st new\n",
"[[(u'455', u'house_number'), (u'43rd saint new', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"455 43rd st new \n",
"[[(u'455', u'house_number'), (u'43rd saint new', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"455 43rd st new y\n",
"[[(u'455', u'house_number'), (u'43rd saint new y', u'road')], [(u'455', u'house_number'), (u'43rd street new y', u'road')]]\n",
"\n",
"\n",
"455 43rd st new yo\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new yo', u'house')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new yo', u'suburb')]]\n",
"\n",
"\n",
"455 43rd st new yor\n",
"[[(u'455', u'house_number'), (u'43rd saint new', u'road'), (u'yor', u'suburb')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new yor', u'suburb')]]\n",
"\n",
"\n",
"455 43rd st new york\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"455 43rd st new york \n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"455 43rd st new york n\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new', u'state'), (u'york', u'house'), (u'north', u'road')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'state'), (u'n', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'state'), (u'north', u'road')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'state'), (u'n', u'postcode')]]\n",
"\n",
"\n",
"455 43rd st new york ny\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"455 43rd st new york ny \n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"455 43rd st new york ny 1\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1', u'postcode')]]\n",
"\n",
"\n",
"455 43rd st new york ny 11\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11', u'postcode')]]\n",
"\n",
"\n",
"455 43rd st new york ny 112\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new', u'city'), (u'york', u'house'), (u'ny', u'state'), (u'112', u'house_number')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'112', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'112', u'postcode')]]\n",
"\n",
"\n",
"455 43rd st new york ny 1123\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1123', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'1123', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'1123', u'postcode')]]\n",
"\n",
"\n",
"455 43rd st new york ny 11232\n",
"[[(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'455', u'house_number'), (u'43rd saint', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'new york', u'state'), (u'11232', u'postcode')], [(u'455', u'house_number'), (u'43rd street', u'road'), (u'new york', u'city'), (u'ny', u'state'), (u'11232', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 main st ny 11201\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 m\n",
"[[(u'1', u'house_number'), (u'mail', u'road')]]\n",
"\n",
"\n",
"1 ma\n",
"[[(u'1', u'house_number'), (u'massachusetts', u'state')], [(u'1', u'house_number'), (u'ma', u'road')]]\n",
"\n",
"\n",
"1 mai\n",
"[[(u'1 mai', u'road')]]\n",
"\n",
"\n",
"1 main\n",
"[[(u'1', u'house_number'), (u'main', u'road')]]\n",
"\n",
"\n",
"1 main \n",
"[[(u'1', u'house_number'), (u'main', u'road')]]\n",
"\n",
"\n",
"1 main s\n",
"[[(u'1', u'house_number'), (u'main san', u'road')], [(u'1', u'house_number'), (u'main south', u'road')], [(u'1', u'house_number'), (u'main s', u'road')]]\n",
"\n",
"\n",
"1 main st\n",
"[[(u'1', u'house_number'), (u'main saint', u'road')], [(u'1', u'house_number'), (u'main street', u'road')]]\n",
"\n",
"\n",
"1 main st \n",
"[[(u'1', u'house_number'), (u'main saint', u'road')], [(u'1', u'house_number'), (u'main street', u'road')]]\n",
"\n",
"\n",
"1 main st n\n",
"[[(u'1', u'house_number'), (u'main saint north', u'road')], [(u'1', u'house_number'), (u'main saint n', u'road')], [(u'1', u'house_number'), (u'main street north', u'road')], [(u'1', u'house_number'), (u'main street n', u'road')]]\n",
"\n",
"\n",
"1 main st ny\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"1 main st ny \n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state')]]\n",
"\n",
"\n",
"1 main st ny 1\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'house'), (u'1', u'house_number')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'1', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'1', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'1', u'postcode')]]\n",
"\n",
"\n",
"1 main st ny 11\n",
"[[(u'1', u'house_number'), (u'main saint new york', u'road'), (u'11', u'house_number')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'11', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'11', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'11', u'postcode')]]\n",
"\n",
"\n",
"1 main st ny 112\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'house'), (u'112', u'house_number')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'112', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'112', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'112', u'postcode')]]\n",
"\n",
"\n",
"1 main st ny 1120\n",
"[[(u'1', u'house_number'), (u'main saint new york', u'road'), (u'1120', u'house_number')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'1120', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'1120', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'1120', u'postcode')]]\n",
"\n",
"\n",
"1 main st ny 11201\n",
"[[(u'1', u'house_number'), (u'main saint', u'road'), (u'new york', u'state'), (u'11201', u'postcode')], [(u'1', u'house_number'), (u'main saint', u'road'), (u'ny', u'state'), (u'11201', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'new york', u'state'), (u'11201', u'postcode')], [(u'1', u'house_number'), (u'main street', u'road'), (u'ny', u'state'), (u'11201', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 186 Tuskegee St SE Atlanta GA\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"18\n",
"[[(u'18', u'house_number')]]\n",
"\n",
"\n",
"186\n",
"[[(u'186', u'house_number')]]\n",
"\n",
"\n",
"186 \n",
"[[(u'186', u'house_number')]]\n",
"\n",
"\n",
"186 T\n",
"[[(u'186', u'house_number'), (u't', u'road')]]\n",
"\n",
"\n",
"186 Tu\n",
"[[(u'186', u'house_number'), (u'tu', u'house')]]\n",
"\n",
"\n",
"186 Tus\n",
"[[(u'186', u'house_number'), (u'tus', u'house')]]\n",
"\n",
"\n",
"186 Tusk\n",
"[[(u'186', u'house_number'), (u'tusk', u'road')]]\n",
"\n",
"\n",
"186 Tuske\n",
"[[(u'186', u'house_number'), (u'tuske', u'road')]]\n",
"\n",
"\n",
"186 Tuskeg\n",
"[[(u'186', u'house_number'), (u'tuskeg', u'road')]]\n",
"\n",
"\n",
"186 Tuskege\n",
"[[(u'186', u'house_number'), (u'tuskege', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee\n",
"[[(u'186', u'house_number'), (u'tuskegee', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee \n",
"[[(u'186', u'house_number'), (u'tuskegee', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee S\n",
"[[(u'186', u'house_number'), (u'tuskegee san', u'road')], [(u'186', u'house_number'), (u'tuskegee south', u'road')], [(u'186', u'house_number'), (u'tuskegee s', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St\n",
"[[(u'186', u'house_number'), (u'tuskegee saint', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St \n",
"[[(u'186', u'house_number'), (u'tuskegee saint', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St S\n",
"[[(u'186', u'house_number'), (u'tuskegee saint san', u'road')], [(u'186', u'house_number'), (u'tuskegee saint south', u'road')], [(u'186', u'house_number'), (u'tuskegee saint s', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'san', u'suburb')], [(u'186', u'house_number'), (u'tuskegee street south', u'road')], [(u'186', u'house_number'), (u'tuskegee street s', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'city'), (u'company', u'house')], [(u'186', u'house_number'), (u'tuskegee street se', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE \n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'city'), (u'company', u'house')], [(u'186', u'house_number'), (u'tuskegee street se', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE A\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast a', u'road')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'a', u'road')], [(u'186', u'house_number'), (u'tuskegee saint se a', u'road')], [(u'186', u'house_number'), (u'tuskegee street southeast a', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'city'), (u'company a', u'house')], [(u'186', u'house_number'), (u'tuskegee street se a', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE At\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast at', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company at', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se at', u'road')], [(u'186', u'house_number'), (u'tuskegee street southeast at', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european company', u'city'), (u'at', u'country')], [(u'186', u'house_number'), (u'tuskegee street se at', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atl\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast atl', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company atl', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atl', u'house')], [(u'186', u'house_number'), (u'tuskegee street southeast atl', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'city'), (u'company atl', u'house')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atl', u'city')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atla\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atla', u'city')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company atla', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atla', u'city')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atla', u'city')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european company atla', u'house')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atla', u'city')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlan\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast atlan', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company atlan', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlan', u'house')], [(u'186', u'house_number'), (u'tuskegee street southeast atlan', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european company atlan', u'house')], [(u'186', u'house_number'), (u'tuskegee street se atlan', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlant\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast atlant', u'road')], [(u'186', u'house_number'), (u'tuskegee saint', u'road'), (u'european company atlant', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlant', u'house')], [(u'186', u'house_number'), (u'tuskegee street southeast atlant', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'city'), (u'company atlant', u'house')], [(u'186', u'house_number'), (u'tuskegee street se atlant', u'road')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlanta\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'suburb'), (u'company', u'house'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlanta \n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'suburb'), (u'company', u'house'), (u'atlanta', u'city')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlanta G\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta ground', u'city')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city'), (u'ground', u'house')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta ground', u'city')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atlanta ground', u'city')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'suburb'), (u'company', u'house'), (u'atlanta', u'city'), (u'ground', u'house')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta ground', u'city')]]\n",
"\n",
"\n",
"186 Tuskegee St SE Atlanta GA\n",
"[[(u'186', u'house_number'), (u'tuskegee saint southeast atlanta gate', u'road')], [(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city'), (u'ga', u'state')], [(u'186', u'house_number'), (u'tuskegee saint southeast', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta gate', u'road')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city'), (u'ga', u'state')], [(u'186', u'house_number'), (u'tuskegee saint european', u'road'), (u'company', u'house'), (u'atlanta', u'city'), (u'georgia', u'state')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'gate', u'road')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'ga', u'state')], [(u'186', u'house_number'), (u'tuskegee saint se', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')], [(u'186', u'house_number'), (u'tuskegee street southeast', u'road'), (u'atlanta', u'city'), (u'gate', u'road')], [(u'186', u'house_number'), (u'tuskegee street', u'road'), (u'european', u'suburb'), (u'company', u'house'), (u'atlanta gate', u'road')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'gate', u'road')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'ga', u'state')], [(u'186', u'house_number'), (u'tuskegee street se', u'road'), (u'atlanta', u'city'), (u'georgia', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 3122 16th St San Francisco, CA 94103\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"31\n",
"[[(u'31', u'house_number')]]\n",
"\n",
"\n",
"312\n",
"[[(u'312', u'house_number')]]\n",
"\n",
"\n",
"3122\n",
"[[(u'3122', u'postcode')]]\n",
"\n",
"\n",
"3122 \n",
"[[(u'3122', u'postcode')]]\n",
"\n",
"\n",
"3122 1\n",
"[[(u'3122 1', u'house_number')]]\n",
"\n",
"\n",
"3122 16\n",
"[[(u'3122', u'house_number'), (u'16', u'road')]]\n",
"\n",
"\n",
"3122 16t\n",
"[[(u'3122', u'postcode'), (u'16t', u'house_number')]]\n",
"\n",
"\n",
"3122 16th\n",
"[[(u'3122', u'house_number'), (u'16th', u'road')]]\n",
"\n",
"\n",
"3122 16th \n",
"[[(u'3122', u'house_number'), (u'16th', u'road')]]\n",
"\n",
"\n",
"3122 16th S\n",
"[[(u'3122', u'house_number'), (u'16th san', u'road')], [(u'3122', u'house_number'), (u'16th south', u'road')], [(u'3122', u'house_number'), (u'16th s', u'road')]]\n",
"\n",
"\n",
"3122 16th St\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road')]]\n",
"\n",
"\n",
"3122 16th St \n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road')]]\n",
"\n",
"\n",
"3122 16th St S\n",
"[[(u'3122', u'house_number'), (u'16th saint san', u'road')], [(u'3122', u'house_number'), (u'16th saint south', u'road')], [(u'3122', u'house_number'), (u'16th saint s', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb')], [(u'3122', u'house_number'), (u'16th street south', u'road')], [(u'3122', u'house_number'), (u'16th street s', u'road')]]\n",
"\n",
"\n",
"3122 16th St Sa\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'southern', u'house'), (u'australia', u'country')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'southern', u'state'), (u'australia', u'country')]]\n",
"\n",
"\n",
"3122 16th St San\n",
"[[(u'3122', u'house_number'), (u'16th saint san', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3122 16th St San \n",
"[[(u'3122', u'house_number'), (u'16th saint san', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3122 16th St San F\n",
"[[(u'3122', u'house_number'), (u'16th saint san flat', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb'), (u'flat', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Fr\n",
"[[(u'3122', u'house_number'), (u'16th saint san father', u'road')], [(u'3122', u'house_number'), (u'16th saint san frontage', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san father', u'city')], [(u'3122', u'house_number'), (u'16th street san frontage', u'road')]]\n",
"\n",
"\n",
"3122 16th St San Fra\n",
"[[(u'3122', u'house_number'), (u'16th saint san', u'road'), (u'fra', u'country')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'city'), (u'fra', u'country')]]\n",
"\n",
"\n",
"3122 16th St San Fran\n",
"[[(u'3122', u'house_number'), (u'16th saint san fran', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb'), (u'fran', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Franc\n",
"[[(u'3122', u'house_number'), (u'16th saint san franc', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san franc', u'suburb')]]\n",
"\n",
"\n",
"3122 16th St San Franci\n",
"[[(u'3122', u'house_number'), (u'16th saint san franci', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb'), (u'franci', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francis\n",
"[[(u'3122', u'house_number'), (u'16th saint san francis', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb'), (u'francis', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francisc\n",
"[[(u'3122', u'house_number'), (u'16th saint san francisc', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san', u'suburb'), (u'francisc', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francisco\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francisco,\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, \n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, C\n",
"[[(u'3122', u'house_number'), (u'16th saint san francisco centre', u'road')], [(u'3122', u'house_number'), (u'16th saint san', u'road'), (u'francisco center', u'house')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san', u'city'), (u'francisco central', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco centre', u'city')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco center', u'city')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'central', u'state')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA \n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA 9\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA 94\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA 941\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA 9410\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9410', u'postcode')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9410', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9410', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9410', u'postcode')]]\n",
"\n",
"\n",
"3122 16th St San Francisco, CA 94103\n",
"[[(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94103', u'postcode')], [(u'3122', u'house_number'), (u'16th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94103', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94103', u'postcode')], [(u'3122', u'house_number'), (u'16th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94103', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 3010 20th St San Francisco CA 94110\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"30\n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"301\n",
"[[(u'301', u'house_number')]]\n",
"\n",
"\n",
"3010\n",
"[[(u'3010', u'house_number')]]\n",
"\n",
"\n",
"3010 \n",
"[[(u'3010', u'house_number')]]\n",
"\n",
"\n",
"3010 2\n",
"[[(u'3010 2', u'house_number')]]\n",
"\n",
"\n",
"3010 20\n",
"[[(u'3010', u'house_number'), (u'20', u'road')]]\n",
"\n",
"\n",
"3010 20t\n",
"[[(u'3010', u'postcode'), (u'20t', u'house_number')]]\n",
"\n",
"\n",
"3010 20th\n",
"[[(u'3010', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"3010 20th \n",
"[[(u'3010', u'house_number'), (u'20th', u'road')]]\n",
"\n",
"\n",
"3010 20th S\n",
"[[(u'3010', u'house_number'), (u'20th san', u'road')], [(u'3010', u'house_number'), (u'20th south', u'road')], [(u'3010', u'house_number'), (u'20th s', u'road')]]\n",
"\n",
"\n",
"3010 20th St\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"3010 20th St \n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road')]]\n",
"\n",
"\n",
"3010 20th St S\n",
"[[(u'3010', u'house_number'), (u'20th saint san', u'road')], [(u'3010', u'house_number'), (u'20th saint south', u'road')], [(u'3010', u'house_number'), (u'20th saint s', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb')], [(u'3010', u'house_number'), (u'20th street south', u'road')], [(u'3010', u'house_number'), (u'20th street s', u'road')]]\n",
"\n",
"\n",
"3010 20th St Sa\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'southern', u'house'), (u'australia', u'country')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'southern', u'state'), (u'australia', u'country')]]\n",
"\n",
"\n",
"3010 20th St San\n",
"[[(u'3010', u'house_number'), (u'20th saint san', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3010 20th St San \n",
"[[(u'3010', u'house_number'), (u'20th saint san', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3010 20th St San F\n",
"[[(u'3010', u'house_number'), (u'20th saint san flat', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb'), (u'flat', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Fr\n",
"[[(u'3010', u'house_number'), (u'20th saint san father', u'road')], [(u'3010', u'house_number'), (u'20th saint san frontage', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san father', u'city')], [(u'3010', u'house_number'), (u'20th street san frontage', u'road')]]\n",
"\n",
"\n",
"3010 20th St San Fra\n",
"[[(u'3010', u'house_number'), (u'20th saint san', u'road'), (u'fra', u'country')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'city'), (u'fra', u'country')]]\n",
"\n",
"\n",
"3010 20th St San Fran\n",
"[[(u'3010', u'house_number'), (u'20th saint san fran', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb'), (u'fran', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Franc\n",
"[[(u'3010', u'house_number'), (u'20th saint san franc', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san franc', u'suburb')]]\n",
"\n",
"\n",
"3010 20th St San Franci\n",
"[[(u'3010', u'house_number'), (u'20th saint san franci', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb'), (u'franci', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Francis\n",
"[[(u'3010', u'house_number'), (u'20th saint san francis', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb'), (u'francis', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Francisc\n",
"[[(u'3010', u'house_number'), (u'20th saint san francisc', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san', u'suburb'), (u'francisc', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Francisco\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Francisco \n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3010 20th St San Francisco C\n",
"[[(u'3010', u'house_number'), (u'20th saint san francisco centre', u'road')], [(u'3010', u'house_number'), (u'20th saint san', u'road'), (u'francisco center', u'house')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san', u'city'), (u'francisco central', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco centre', u'city')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco center', u'city')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'central', u'state')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA \n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA 9\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA 94\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA 941\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA 9411\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9411', u'postcode')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9411', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9411', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9411', u'postcode')]]\n",
"\n",
"\n",
"3010 20th St San Francisco CA 94110\n",
"[[(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94110', u'postcode')], [(u'3010', u'house_number'), (u'20th saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94110', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94110', u'postcode')], [(u'3010', u'house_number'), (u'20th street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94110', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 3577 Jackson St San Francisco, CA 94118\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"35\n",
"[[(u'35', u'house_number')]]\n",
"\n",
"\n",
"357\n",
"[[(u'357', u'house_number')]]\n",
"\n",
"\n",
"3577\n",
"[[(u'3577', u'postcode')]]\n",
"\n",
"\n",
"3577 \n",
"[[(u'3577', u'postcode')]]\n",
"\n",
"\n",
"3577 J\n",
"[[(u'3577', u'house_number'), (u'junction', u'road')]]\n",
"\n",
"\n",
"3577 Ja\n",
"[[(u'3577 ja', u'postcode')]]\n",
"\n",
"\n",
"3577 Jac\n",
"[[(u'3577', u'postcode'), (u'jac', u'road')]]\n",
"\n",
"\n",
"3577 Jack\n",
"[[(u'3577', u'house_number'), (u'jack', u'road')]]\n",
"\n",
"\n",
"3577 Jacks\n",
"[[(u'3577', u'house_number'), (u'jacks', u'road')]]\n",
"\n",
"\n",
"3577 Jackso\n",
"[[(u'3577', u'house_number'), (u'jackso', u'road')]]\n",
"\n",
"\n",
"3577 Jackson\n",
"[[(u'3577', u'house_number'), (u'jackson', u'road')]]\n",
"\n",
"\n",
"3577 Jackson \n",
"[[(u'3577', u'house_number'), (u'jackson', u'road')]]\n",
"\n",
"\n",
"3577 Jackson S\n",
"[[(u'3577', u'house_number'), (u'jackson san', u'road')], [(u'3577', u'house_number'), (u'jackson south', u'road')], [(u'3577', u'house_number'), (u'jackson s', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St \n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St S\n",
"[[(u'3577', u'house_number'), (u'jackson saint san', u'road')], [(u'3577', u'house_number'), (u'jackson saint south', u'road')], [(u'3577', u'house_number'), (u'jackson saint s', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb')], [(u'3577', u'house_number'), (u'jackson street south', u'road')], [(u'3577', u'house_number'), (u'jackson street s', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St Sa\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'southern', u'house'), (u'australia', u'country')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'southern', u'state'), (u'australia', u'country')]]\n",
"\n",
"\n",
"3577 Jackson St San\n",
"[[(u'3577', u'house_number'), (u'jackson saint san', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3577 Jackson St San \n",
"[[(u'3577', u'house_number'), (u'jackson saint san', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb')]]\n",
"\n",
"\n",
"3577 Jackson St San F\n",
"[[(u'3577', u'house_number'), (u'jackson saint san flat', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb'), (u'flat', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Fr\n",
"[[(u'3577', u'house_number'), (u'jackson saint san father', u'road')], [(u'3577', u'house_number'), (u'jackson saint san frontage', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san father', u'city')], [(u'3577', u'house_number'), (u'jackson street san frontage', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St San Fra\n",
"[[(u'3577', u'house_number'), (u'jackson saint san', u'road'), (u'fra', u'country')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'city'), (u'fra', u'country')]]\n",
"\n",
"\n",
"3577 Jackson St San Fran\n",
"[[(u'3577', u'house_number'), (u'jackson saint san fran', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb'), (u'fran', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Franc\n",
"[[(u'3577', u'house_number'), (u'jackson saint san franc', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san franc', u'suburb')]]\n",
"\n",
"\n",
"3577 Jackson St San Franci\n",
"[[(u'3577', u'house_number'), (u'jackson saint san franci', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb'), (u'franci', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francis\n",
"[[(u'3577', u'house_number'), (u'jackson saint san francis', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb'), (u'francis', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisc\n",
"[[(u'3577', u'house_number'), (u'jackson saint san francisc', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san', u'suburb'), (u'francisc', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco,\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, \n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, C\n",
"[[(u'3577', u'house_number'), (u'jackson saint san francisco centre', u'road')], [(u'3577', u'house_number'), (u'jackson saint san', u'road'), (u'francisco center', u'house')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san', u'city'), (u'francisco central', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco centre', u'city')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco center', u'city')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'central', u'state')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA \n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA 9\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9', u'house_number')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA 94\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA 941\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'941', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'941', u'road')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA 9411\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9411', u'postcode')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9411', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'9411', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'9411', u'postcode')]]\n",
"\n",
"\n",
"3577 Jackson St San Francisco, CA 94118\n",
"[[(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94118', u'postcode')], [(u'3577', u'house_number'), (u'jackson saint', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94118', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'california', u'state'), (u'94118', u'postcode')], [(u'3577', u'house_number'), (u'jackson street', u'road'), (u'san francisco', u'city'), (u'ca', u'state'), (u'94118', u'postcode')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: whole foods NY\n",
"w\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"wh\n",
"[[(u'wh', u'house_number')]]\n",
"\n",
"\n",
"who\n",
"[[(u'who', u'house')]]\n",
"\n",
"\n",
"whol\n",
"[[(u'whol', u'house')]]\n",
"\n",
"\n",
"whole\n",
"[[(u'whole', u'house')]]\n",
"\n",
"\n",
"whole \n",
"[[(u'whole', u'house')]]\n",
"\n",
"\n",
"whole f\n",
"[[(u'whole flat', u'house')]]\n",
"\n",
"\n",
"whole fo\n",
"[[(u'whole fo', u'house')]]\n",
"\n",
"\n",
"whole foo\n",
"[[(u'whole foo', u'house')]]\n",
"\n",
"\n",
"whole food\n",
"[[(u'whole food', u'house')]]\n",
"\n",
"\n",
"whole foods\n",
"[[(u'whole foods', u'house')]]\n",
"\n",
"\n",
"whole foods \n",
"[[(u'whole foods', u'house')]]\n",
"\n",
"\n",
"whole foods N\n",
"[[(u'whole foods', u'house'), (u'north', u'road')], [(u'whole foods', u'house'), (u'n', u'road')]]\n",
"\n",
"\n",
"whole foods NY\n",
"[[(u'whole foods', u'house'), (u'new york', u'state')], [(u'whole foods', u'house'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: autocomplete_admin_areas\n",
"TEST: brooklyn\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"br\n",
"[[(u'brae', u'road')], [(u'brother', u'house')], [(u'brace', u'house')], [(u'bridge', u'house')], [(u'branch', u'house')]]\n",
"\n",
"\n",
"bro\n",
"[[(u'bro', u'road')]]\n",
"\n",
"\n",
"broo\n",
"[[(u'broo', u'house')]]\n",
"\n",
"\n",
"brook\n",
"[[(u'brook', u'house')]]\n",
"\n",
"\n",
"brookl\n",
"[[(u'brookl', u'house')]]\n",
"\n",
"\n",
"brookly\n",
"[[(u'brookly', u'house')]]\n",
"\n",
"\n",
"brooklyn\n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new york\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new y\n",
"[[(u'new y', u'house')]]\n",
"\n",
"\n",
"new yo\n",
"[[(u'new yo', u'house')]]\n",
"\n",
"\n",
"new yor\n",
"[[(u'new yor', u'house')]]\n",
"\n",
"\n",
"new york\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: london\n",
"l\n",
"[[(u'level', u'house')]]\n",
"\n",
"\n",
"lo\n",
"[[(u'lo', u'house')]]\n",
"\n",
"\n",
"lon\n",
"[[(u'lon', u'road')]]\n",
"\n",
"\n",
"lond\n",
"[[(u'lond', u'house')]]\n",
"\n",
"\n",
"londo\n",
"[[(u'londo', u'house')]]\n",
"\n",
"\n",
"london\n",
"[[(u'london', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: san francisco\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"sa\n",
"[[(u'southern', u'house'), (u'australia', u'country')]]\n",
"\n",
"\n",
"san\n",
"[[(u'san', u'road')]]\n",
"\n",
"\n",
"san \n",
"[[(u'san', u'road')]]\n",
"\n",
"\n",
"san f\n",
"[[(u'san', u'road'), (u'flat', u'house')]]\n",
"\n",
"\n",
"san fr\n",
"[[(u'san father', u'road')], [(u'san frontage', u'road')]]\n",
"\n",
"\n",
"san fra\n",
"[[(u'san', u'road'), (u'fra', u'house')]]\n",
"\n",
"\n",
"san fran\n",
"[[(u'san fran', u'road')]]\n",
"\n",
"\n",
"san franc\n",
"[[(u'san franc', u'road')]]\n",
"\n",
"\n",
"san franci\n",
"[[(u'san franci', u'road')]]\n",
"\n",
"\n",
"san francis\n",
"[[(u'san francis', u'house')]]\n",
"\n",
"\n",
"san francisc\n",
"[[(u'san francisc', u'house')]]\n",
"\n",
"\n",
"san francisco\n",
"[[(u'san francisco', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: victoria\n",
"v\n",
"[[(u'v', u'house')], [(u'5', u'house_number')]]\n",
"\n",
"\n",
"vi\n",
"[[(u'vi', u'house')], [(u'6', u'house_number')]]\n",
"\n",
"\n",
"vic\n",
"[[(u'victoria', u'city')], [(u'vic', u'state')], [(u'100', u'house_number')]]\n",
"\n",
"\n",
"vict\n",
"[[(u'vict', u'house')]]\n",
"\n",
"\n",
"victo\n",
"[[(u'victo', u'house')]]\n",
"\n",
"\n",
"victor\n",
"[[(u'victor', u'road')]]\n",
"\n",
"\n",
"victori\n",
"[[(u'victori', u'house')]]\n",
"\n",
"\n",
"victoria\n",
"[[(u'victoria', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: wales\n",
"w\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"wa\n",
"[[(u'wa', u'state')], [(u'washington', u'city')], [(u'western australia', u'state')]]\n",
"\n",
"\n",
"wal\n",
"[[(u'wal', u'road')]]\n",
"\n",
"\n",
"wale\n",
"[[(u'wale', u'house')]]\n",
"\n",
"\n",
"wales\n",
"[[(u'wales', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new south wales\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new s\n",
"[[(u'new s', u'house')]]\n",
"\n",
"\n",
"new so\n",
"[[(u'new so', u'house')]]\n",
"\n",
"\n",
"new sou\n",
"[[(u'new', u'road'), (u'sou', u'house')]]\n",
"\n",
"\n",
"new sout\n",
"[[(u'new sout', u'house')]]\n",
"\n",
"\n",
"new south\n",
"[[(u'new', u'house'), (u'south', u'road')]]\n",
"\n",
"\n",
"new south \n",
"[[(u'new', u'house'), (u'south', u'road')]]\n",
"\n",
"\n",
"new south w\n",
"[[]]\n",
"\n",
"\n",
"new south wa\n",
"[[]]\n",
"\n",
"\n",
"new south wal\n",
"[[]]\n",
"\n",
"\n",
"new south wale\n",
"[[]]\n",
"\n",
"\n",
"new south wales\n",
"[[(u'new south wales', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: california\n",
"c\n",
"[[(u'centre', u'house')], [(u'center', u'house')], [(u'central', u'house')]]\n",
"\n",
"\n",
"ca\n",
"[[(u'california', u'house')], [(u'ca', u'country')]]\n",
"\n",
"\n",
"cal\n",
"[[(u'cal', u'house')]]\n",
"\n",
"\n",
"cali\n",
"[[(u'cali', u'city')]]\n",
"\n",
"\n",
"calif\n",
"[[(u'calif', u'house')]]\n",
"\n",
"\n",
"califo\n",
"[[(u'califo', u'house')]]\n",
"\n",
"\n",
"califor\n",
"[[(u'califor', u'house')]]\n",
"\n",
"\n",
"californ\n",
"[[(u'californ', u'house')]]\n",
"\n",
"\n",
"californi\n",
"[[(u'californi', u'house')]]\n",
"\n",
"\n",
"california\n",
"[[(u'california', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: west bengal\n",
"w\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"we\n",
"[[(u'warehouse', u'house')]]\n",
"\n",
"\n",
"wes\n",
"[[(u'wes', u'road')]]\n",
"\n",
"\n",
"west\n",
"[[(u'west', u'city_district')]]\n",
"\n",
"\n",
"west \n",
"[[(u'west', u'city_district')]]\n",
"\n",
"\n",
"west b\n",
"[[(u'west', u'road'), (u'b', u'house')]]\n",
"\n",
"\n",
"west be\n",
"[[(u'west', u'road'), (u'be', u'country')]]\n",
"\n",
"\n",
"west ben\n",
"[[(u'west ben', u'road')]]\n",
"\n",
"\n",
"west beng\n",
"[[(u'west', u'road'), (u'beng', u'suburb')]]\n",
"\n",
"\n",
"west benga\n",
"[[(u'west benga', u'road')]]\n",
"\n",
"\n",
"west bengal\n",
"[[(u'west bengal', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: madrid\n",
"m\n",
"[[(u'mail', u'road')]]\n",
"\n",
"\n",
"ma\n",
"[[(u'massachusetts', u'state')], [(u'ma', u'house')]]\n",
"\n",
"\n",
"mad\n",
"[[(u'mad', u'house')]]\n",
"\n",
"\n",
"madr\n",
"[[(u'madr', u'house')]]\n",
"\n",
"\n",
"madri\n",
"[[(u'madri', u'house')]]\n",
"\n",
"\n",
"madrid\n",
"[[(u'madrid', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: malmo\n",
"m\n",
"[[(u'mail', u'road')]]\n",
"\n",
"\n",
"ma\n",
"[[(u'massachusetts', u'state')], [(u'ma', u'house')]]\n",
"\n",
"\n",
"mal\n",
"[[(u'mal', u'house')]]\n",
"\n",
"\n",
"malm\n",
"[[(u'malm', u'city')]]\n",
"\n",
"\n",
"malmo\n",
"[[(u'malmo', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Singarpuram\n",
"S\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"Si\n",
"[[(u'si', u'country')]]\n",
"\n",
"\n",
"Sin\n",
"[[(u'sin', u'house')]]\n",
"\n",
"\n",
"Sing\n",
"[[(u'sing', u'house')]]\n",
"\n",
"\n",
"Singa\n",
"[[(u'singa', u'road')]]\n",
"\n",
"\n",
"Singar\n",
"[[(u'singar', u'house')]]\n",
"\n",
"\n",
"Singarp\n",
"[[(u'singarp', u'house')]]\n",
"\n",
"\n",
"Singarpu\n",
"[[(u'singarpu', u'house')]]\n",
"\n",
"\n",
"Singarpur\n",
"[[(u'singarpur', u'house')]]\n",
"\n",
"\n",
"Singarpura\n",
"[[(u'singarpura', u'house')]]\n",
"\n",
"\n",
"Singarpuram\n",
"[[(u'singarpuram', u'suburb')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: address_type\n",
"TEST: 102 Fleet Street\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"10\n",
"[[(u'10', u'house_number')]]\n",
"\n",
"\n",
"102\n",
"[[(u'102', u'house_number')]]\n",
"\n",
"\n",
"102 \n",
"[[(u'102', u'house_number')]]\n",
"\n",
"\n",
"102 F\n",
"[[(u'102 flat', u'house_number')]]\n",
"\n",
"\n",
"102 Fl\n",
"[[(u'102', u'house_number'), (u'fl', u'road')], [(u'102 flat', u'house_number')], [(u'102', u'house_number'), (u'fall', u'road')], [(u'102', u'house_number'), (u'florida', u'road')], [(u'102', u'house_number'), (u'floor', u'road')]]\n",
"\n",
"\n",
"102 Fle\n",
"[[(u'102', u'house_number'), (u'fle', u'road')]]\n",
"\n",
"\n",
"102 Flee\n",
"[[(u'102', u'house_number'), (u'flee', u'road')]]\n",
"\n",
"\n",
"102 Fleet\n",
"[[(u'102', u'house_number'), (u'fleet', u'road')]]\n",
"\n",
"\n",
"102 Fleet \n",
"[[(u'102', u'house_number'), (u'fleet', u'road')]]\n",
"\n",
"\n",
"102 Fleet S\n",
"[[(u'102', u'house_number'), (u'fleet san', u'road')], [(u'102', u'house_number'), (u'fleet south', u'road')], [(u'102', u'house_number'), (u'fleet s', u'road')]]\n",
"\n",
"\n",
"102 Fleet St\n",
"[[(u'102', u'house_number'), (u'fleet saint', u'road')], [(u'102', u'house_number'), (u'fleet street', u'road')]]\n",
"\n",
"\n",
"102 Fleet Str\n",
"[[(u'102', u'house_number'), (u'fleet street', u'road')]]\n",
"\n",
"\n",
"102 Fleet Stre\n",
"[[(u'102', u'house_number'), (u'fleet stre', u'road')]]\n",
"\n",
"\n",
"102 Fleet Stree\n",
"[[(u'102', u'house_number'), (u'fleet stree', u'road')]]\n",
"\n",
"\n",
"102 Fleet Street\n",
"[[(u'102', u'house_number'), (u'fleet street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Shepherd and Flock\n",
"S\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"Sh\n",
"[[(u'state highway', u'road')]]\n",
"\n",
"\n",
"She\n",
"[[(u'she', u'house')]]\n",
"\n",
"\n",
"Shep\n",
"[[(u'shep', u'house')]]\n",
"\n",
"\n",
"Sheph\n",
"[[(u'sheph', u'house')]]\n",
"\n",
"\n",
"Shephe\n",
"[[(u'shephe', u'house')]]\n",
"\n",
"\n",
"Shepher\n",
"[[(u'shepher', u'house')]]\n",
"\n",
"\n",
"Shepherd\n",
"[[(u'shepherd', u'city')]]\n",
"\n",
"\n",
"Shepherd \n",
"[[(u'shepherd', u'city')]]\n",
"\n",
"\n",
"Shepherd a\n",
"[[(u'shepherd a', u'house')]]\n",
"\n",
"\n",
"Shepherd an\n",
"[[(u'shepherd', u'house'), (u'an', u'road')]]\n",
"\n",
"\n",
"Shepherd and\n",
"[[(u'shepherd and', u'house')]]\n",
"\n",
"\n",
"Shepherd and \n",
"[[(u'shepherd and', u'house')]]\n",
"\n",
"\n",
"Shepherd and F\n",
"[[(u'shepherd', u'house'), (u'0 flat', u'house_number')]]\n",
"\n",
"\n",
"Shepherd and Fl\n",
"[[(u'shepherd', u'house'), (u'0', u'house_number'), (u'fl', u'state')], [(u'shepherd', u'house'), (u'0 flat', u'house_number')], [(u'shepherd', u'house'), (u'0', u'house_number'), (u'fall', u'city')], [(u'shepherd', u'house'), (u'0', u'house_number'), (u'florida', u'road')], [(u'shepherd', u'house'), (u'0 floor', u'house_number')]]\n",
"\n",
"\n",
"Shepherd and Flo\n",
"[[(u'shepherd', u'house'), (u'0', u'house_number'), (u'flo', u'city_district')]]\n",
"\n",
"\n",
"Shepherd and Floc\n",
"[[(u'shepherd', u'house'), (u'0', u'house_number'), (u'floc', u'road')]]\n",
"\n",
"\n",
"Shepherd and Flock\n",
"[[(u'shepherd', u'house'), (u'0', u'house_number'), (u'flock', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 22 Moor Park Lane\n",
"2\n",
"[[(u'2', u'house_number')]]\n",
"\n",
"\n",
"22\n",
"[[(u'22', u'house_number')]]\n",
"\n",
"\n",
"22 \n",
"[[(u'22', u'house_number')]]\n",
"\n",
"\n",
"22 M\n",
"[[(u'22', u'house_number'), (u'mail', u'road')]]\n",
"\n",
"\n",
"22 Mo\n",
"[[(u'22', u'house_number'), (u'mo', u'road')], [(u'22', u'house_number'), (u'missouri', u'road')]]\n",
"\n",
"\n",
"22 Moo\n",
"[[(u'22 moo', u'house_number')]]\n",
"\n",
"\n",
"22 Moor\n",
"[[(u'22', u'house_number'), (u'moor', u'road')]]\n",
"\n",
"\n",
"22 Moor \n",
"[[(u'22', u'house_number'), (u'moor', u'road')]]\n",
"\n",
"\n",
"22 Moor P\n",
"[[(u'22', u'house_number'), (u'moor', u'road'), (u'p', u'house_number')]]\n",
"\n",
"\n",
"22 Moor Pa\n",
"[[(u'22', u'house_number'), (u'moor pa', u'road')], [(u'22', u'house_number'), (u'moor', u'road'), (u'pennsylvania', u'state')]]\n",
"\n",
"\n",
"22 Moor Par\n",
"[[(u'22', u'house_number'), (u'moor par', u'road')]]\n",
"\n",
"\n",
"22 Moor Park\n",
"[[(u'22', u'house_number'), (u'moor park', u'road')]]\n",
"\n",
"\n",
"22 Moor Park \n",
"[[(u'22', u'house_number'), (u'moor park', u'road')]]\n",
"\n",
"\n",
"22 Moor Park L\n",
"[[(u'22', u'house_number'), (u'moor park level', u'road')]]\n",
"\n",
"\n",
"22 Moor Park La\n",
"[[(u'22', u'house_number'), (u'moor park la', u'road')], [(u'22', u'house_number'), (u'moor park lane', u'road')], [(u'22', u'house_number'), (u'moor park', u'road'), (u'louisiana', u'state')]]\n",
"\n",
"\n",
"22 Moor Park Lan\n",
"[[(u'22', u'house_number'), (u'moor park', u'road'), (u'lan', u'suburb')]]\n",
"\n",
"\n",
"22 Moor Park Lane\n",
"[[(u'22', u'house_number'), (u'moor park lane', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: search\n",
"TEST: brooklyn\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"br\n",
"[[(u'brae', u'road')], [(u'brother', u'house')], [(u'brace', u'house')], [(u'bridge', u'house')], [(u'branch', u'house')]]\n",
"\n",
"\n",
"bro\n",
"[[(u'bro', u'road')]]\n",
"\n",
"\n",
"broo\n",
"[[(u'broo', u'house')]]\n",
"\n",
"\n",
"brook\n",
"[[(u'brook', u'house')]]\n",
"\n",
"\n",
"brookl\n",
"[[(u'brookl', u'house')]]\n",
"\n",
"\n",
"brookly\n",
"[[(u'brookly', u'house')]]\n",
"\n",
"\n",
"brooklyn\n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: brooklyn, ny\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"br\n",
"[[(u'brae', u'road')], [(u'brother', u'house')], [(u'brace', u'house')], [(u'bridge', u'house')], [(u'branch', u'house')]]\n",
"\n",
"\n",
"bro\n",
"[[(u'bro', u'road')]]\n",
"\n",
"\n",
"broo\n",
"[[(u'broo', u'house')]]\n",
"\n",
"\n",
"brook\n",
"[[(u'brook', u'house')]]\n",
"\n",
"\n",
"brookl\n",
"[[(u'brookl', u'house')]]\n",
"\n",
"\n",
"brookly\n",
"[[(u'brookly', u'house')]]\n",
"\n",
"\n",
"brooklyn\n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"brooklyn,\n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"brooklyn, \n",
"[[(u'brooklyn', u'house')]]\n",
"\n",
"\n",
"brooklyn, n\n",
"[[(u'brooklyn', u'house'), (u'north', u'road')], [(u'brooklyn n', u'house')]]\n",
"\n",
"\n",
"brooklyn, ny\n",
"[[(u'brooklyn', u'state_district'), (u'new york', u'state')], [(u'brooklyn', u'city'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: philadelphia\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"ph\n",
"[[(u'penthouse', u'house')]]\n",
"\n",
"\n",
"phi\n",
"[[(u'phi', u'house')]]\n",
"\n",
"\n",
"phil\n",
"[[(u'phil', u'house')]]\n",
"\n",
"\n",
"phila\n",
"[[(u'phila', u'city')]]\n",
"\n",
"\n",
"philad\n",
"[[(u'philad', u'house')]]\n",
"\n",
"\n",
"philade\n",
"[[(u'philade', u'house')]]\n",
"\n",
"\n",
"philadel\n",
"[[(u'philadel', u'house')]]\n",
"\n",
"\n",
"philadelp\n",
"[[(u'philadelp', u'house')]]\n",
"\n",
"\n",
"philadelph\n",
"[[(u'philadelph', u'house')]]\n",
"\n",
"\n",
"philadelphi\n",
"[[(u'philadelphi', u'house')]]\n",
"\n",
"\n",
"philadelphia\n",
"[[(u'philadelphia', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: philadelphia, pa\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"ph\n",
"[[(u'penthouse', u'house')]]\n",
"\n",
"\n",
"phi\n",
"[[(u'phi', u'house')]]\n",
"\n",
"\n",
"phil\n",
"[[(u'phil', u'house')]]\n",
"\n",
"\n",
"phila\n",
"[[(u'phila', u'city')]]\n",
"\n",
"\n",
"philad\n",
"[[(u'philad', u'house')]]\n",
"\n",
"\n",
"philade\n",
"[[(u'philade', u'house')]]\n",
"\n",
"\n",
"philadel\n",
"[[(u'philadel', u'house')]]\n",
"\n",
"\n",
"philadelp\n",
"[[(u'philadelp', u'house')]]\n",
"\n",
"\n",
"philadelph\n",
"[[(u'philadelph', u'house')]]\n",
"\n",
"\n",
"philadelphi\n",
"[[(u'philadelphi', u'house')]]\n",
"\n",
"\n",
"philadelphia\n",
"[[(u'philadelphia', u'house')]]\n",
"\n",
"\n",
"philadelphia,\n",
"[[(u'philadelphia', u'house')]]\n",
"\n",
"\n",
"philadelphia, \n",
"[[(u'philadelphia', u'house')]]\n",
"\n",
"\n",
"philadelphia, p\n",
"[[(u'philadelphia p', u'house')]]\n",
"\n",
"\n",
"philadelphia, pa\n",
"[[(u'philadelphia', u'city'), (u'pa', u'state')], [(u'philadelphia', u'city'), (u'pennsylvania', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new york, new york\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new y\n",
"[[(u'new y', u'house')]]\n",
"\n",
"\n",
"new yo\n",
"[[(u'new yo', u'house')]]\n",
"\n",
"\n",
"new yor\n",
"[[(u'new yor', u'house')]]\n",
"\n",
"\n",
"new york\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york,\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york, \n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york, n\n",
"[[(u'new york', u'house'), (u'north', u'road')], [(u'new york', u'state'), (u'n', u'postcode')]]\n",
"\n",
"\n",
"new york, ne\n",
"[[(u'new york ne', u'state')], [(u'new york', u'state'), (u'northeast', u'road')], [(u'new york', u'city'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"new york, new\n",
"[[(u'new york', u'city'), (u'new', u'country')]]\n",
"\n",
"\n",
"new york, new \n",
"[[(u'new york', u'city'), (u'new', u'country')]]\n",
"\n",
"\n",
"new york, new y\n",
"[[(u'new york', u'city'), (u'new y', u'house')]]\n",
"\n",
"\n",
"new york, new yo\n",
"[[(u'new york', u'city'), (u'new yo', u'house')]]\n",
"\n",
"\n",
"new york, new yor\n",
"[[(u'new york', u'city'), (u'new', u'house'), (u'yor', u'city')]]\n",
"\n",
"\n",
"new york, new york\n",
"[[(u'new york', u'city'), (u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new york city\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new y\n",
"[[(u'new y', u'house')]]\n",
"\n",
"\n",
"new yo\n",
"[[(u'new yo', u'house')]]\n",
"\n",
"\n",
"new yor\n",
"[[(u'new yor', u'house')]]\n",
"\n",
"\n",
"new york\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york \n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york c\n",
"[[(u'new york centre', u'house')], [(u'new york center', u'house')], [(u'new york central', u'road')]]\n",
"\n",
"\n",
"new york ci\n",
"[[(u'new york circuit', u'house')]]\n",
"\n",
"\n",
"new york cit\n",
"[[(u'new york cit', u'house')]]\n",
"\n",
"\n",
"new york city\n",
"[[(u'new york city', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: new york city, usa\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new \n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"new y\n",
"[[(u'new y', u'house')]]\n",
"\n",
"\n",
"new yo\n",
"[[(u'new yo', u'house')]]\n",
"\n",
"\n",
"new yor\n",
"[[(u'new yor', u'house')]]\n",
"\n",
"\n",
"new york\n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york \n",
"[[(u'new york', u'state')]]\n",
"\n",
"\n",
"new york c\n",
"[[(u'new york centre', u'house')], [(u'new york center', u'house')], [(u'new york central', u'road')]]\n",
"\n",
"\n",
"new york ci\n",
"[[(u'new york circuit', u'house')]]\n",
"\n",
"\n",
"new york cit\n",
"[[(u'new york cit', u'house')]]\n",
"\n",
"\n",
"new york city\n",
"[[(u'new york city', u'city')]]\n",
"\n",
"\n",
"new york city,\n",
"[[(u'new york city', u'city')]]\n",
"\n",
"\n",
"new york city, \n",
"[[(u'new york city', u'city')]]\n",
"\n",
"\n",
"new york city, u\n",
"[[(u'new york city u', u'house')]]\n",
"\n",
"\n",
"new york city, us\n",
"[[(u'new york city', u'city'), (u'us', u'country')]]\n",
"\n",
"\n",
"new york city, usa\n",
"[[(u'new york city', u'city'), (u'usa', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 130 dean street brooklyn, ny\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"13\n",
"[[(u'13', u'house_number')]]\n",
"\n",
"\n",
"130\n",
"[[(u'130', u'house_number')]]\n",
"\n",
"\n",
"130 \n",
"[[(u'130', u'house_number')]]\n",
"\n",
"\n",
"130 d\n",
"[[(u'130', u'house_number'), (u'd', u'road')], [(u'130', u'house'), (u'500', u'house_number')]]\n",
"\n",
"\n",
"130 de\n",
"[[(u'130', u'house_number'), (u'delaware', u'road')], [(u'130', u'house_number'), (u'de', u'road')]]\n",
"\n",
"\n",
"130 dea\n",
"[[(u'130', u'house_number'), (u'dea', u'road')]]\n",
"\n",
"\n",
"130 dean\n",
"[[(u'130', u'house_number'), (u'dean', u'road')]]\n",
"\n",
"\n",
"130 dean \n",
"[[(u'130', u'house_number'), (u'dean', u'road')]]\n",
"\n",
"\n",
"130 dean s\n",
"[[(u'130', u'house_number'), (u'dean san', u'road')], [(u'130', u'house_number'), (u'dean south', u'road')], [(u'130', u'house_number'), (u'dean s', u'road')]]\n",
"\n",
"\n",
"130 dean st\n",
"[[(u'130', u'house_number'), (u'dean saint', u'road')], [(u'130', u'house_number'), (u'dean street', u'road')]]\n",
"\n",
"\n",
"130 dean str\n",
"[[(u'130', u'house_number'), (u'dean street', u'road')]]\n",
"\n",
"\n",
"130 dean stre\n",
"[[(u'130', u'house_number'), (u'dean stre', u'road')]]\n",
"\n",
"\n",
"130 dean stree\n",
"[[(u'130', u'house_number'), (u'dean stree', u'road')]]\n",
"\n",
"\n",
"130 dean street\n",
"[[(u'130', u'house_number'), (u'dean street', u'road')]]\n",
"\n",
"\n",
"130 dean street \n",
"[[(u'130', u'house_number'), (u'dean street', u'road')]]\n",
"\n",
"\n",
"130 dean street b\n",
"[[(u'130', u'house_number'), (u'dean street b', u'road')]]\n",
"\n",
"\n",
"130 dean street br\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brae', u'city')], [(u'130', u'house_number'), (u'dean street', u'road'), (u'brother', u'suburb')], [(u'130', u'house_number'), (u'dean street', u'road'), (u'brace', u'city')], [(u'130', u'house_number'), (u'dean street bridge', u'road')], [(u'130', u'house_number'), (u'dean street branch', u'road')]]\n",
"\n",
"\n",
"130 dean street bro\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'bro', u'city')]]\n",
"\n",
"\n",
"130 dean street broo\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'broo', u'city')]]\n",
"\n",
"\n",
"130 dean street brook\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brook', u'city')]]\n",
"\n",
"\n",
"130 dean street brookl\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brookl', u'city')]]\n",
"\n",
"\n",
"130 dean street brookly\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brookly', u'city')]]\n",
"\n",
"\n",
"130 dean street brooklyn\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'city')]]\n",
"\n",
"\n",
"130 dean street brooklyn,\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'city')]]\n",
"\n",
"\n",
"130 dean street brooklyn, \n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'city')]]\n",
"\n",
"\n",
"130 dean street brooklyn, n\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn north', u'suburb')], [(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn n', u'city')]]\n",
"\n",
"\n",
"130 dean street brooklyn, ny\n",
"[[(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'state_district'), (u'new york', u'state')], [(u'130', u'house_number'), (u'dean street', u'road'), (u'brooklyn', u'state_district'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: billerica\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"bi\n",
"[[(u'bi', u'house')]]\n",
"\n",
"\n",
"bil\n",
"[[(u'bil', u'house')]]\n",
"\n",
"\n",
"bill\n",
"[[(u'bill', u'house')]]\n",
"\n",
"\n",
"bille\n",
"[[(u'bille', u'house')]]\n",
"\n",
"\n",
"biller\n",
"[[(u'biller', u'house')]]\n",
"\n",
"\n",
"billeri\n",
"[[(u'billeri', u'house')]]\n",
"\n",
"\n",
"billeric\n",
"[[(u'billeric', u'house')]]\n",
"\n",
"\n",
"billerica\n",
"[[(u'billerica', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: billerica, ma\n",
"b\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"bi\n",
"[[(u'bi', u'house')]]\n",
"\n",
"\n",
"bil\n",
"[[(u'bil', u'house')]]\n",
"\n",
"\n",
"bill\n",
"[[(u'bill', u'house')]]\n",
"\n",
"\n",
"bille\n",
"[[(u'bille', u'house')]]\n",
"\n",
"\n",
"biller\n",
"[[(u'biller', u'house')]]\n",
"\n",
"\n",
"billeri\n",
"[[(u'billeri', u'house')]]\n",
"\n",
"\n",
"billeric\n",
"[[(u'billeric', u'house')]]\n",
"\n",
"\n",
"billerica\n",
"[[(u'billerica', u'city')]]\n",
"\n",
"\n",
"billerica,\n",
"[[(u'billerica', u'city')]]\n",
"\n",
"\n",
"billerica, \n",
"[[(u'billerica', u'city')]]\n",
"\n",
"\n",
"billerica, m\n",
"[[(u'billerica', u'city'), (u'mail', u'road')]]\n",
"\n",
"\n",
"billerica, ma\n",
"[[(u'billerica', u'city'), (u'massachusetts', u'state')], [(u'billerica', u'city'), (u'ma', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 15 call street billerica, ma\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"15\n",
"[[(u'15', u'house_number')]]\n",
"\n",
"\n",
"15 \n",
"[[(u'15', u'house_number')]]\n",
"\n",
"\n",
"15 c\n",
"[[(u'15', u'house_number'), (u'centre', u'road')], [(u'15', u'house_number'), (u'center', u'road')], [(u'15', u'house_number'), (u'central', u'road')]]\n",
"\n",
"\n",
"15 ca\n",
"[[(u'15', u'house_number'), (u'california', u'road')], [(u'15', u'house_number'), (u'ca', u'country')]]\n",
"\n",
"\n",
"15 cal\n",
"[[(u'15', u'house_number'), (u'cal', u'road')]]\n",
"\n",
"\n",
"15 call\n",
"[[(u'15', u'house'), (u'call', u'road')]]\n",
"\n",
"\n",
"15 call \n",
"[[(u'15', u'house'), (u'call', u'road')]]\n",
"\n",
"\n",
"15 call s\n",
"[[(u'15', u'house'), (u'call', u'road'), (u'san', u'city')], [(u'15', u'house'), (u'call south', u'road')], [(u'15', u'house'), (u'call s', u'road')]]\n",
"\n",
"\n",
"15 call st\n",
"[[(u'15', u'house'), (u'call saint', u'road')], [(u'15', u'house'), (u'call street', u'road')]]\n",
"\n",
"\n",
"15 call str\n",
"[[(u'15', u'house'), (u'call street', u'road')]]\n",
"\n",
"\n",
"15 call stre\n",
"[[(u'15', u'house'), (u'call', u'road'), (u'stre', u'house')]]\n",
"\n",
"\n",
"15 call stree\n",
"[[(u'15', u'house'), (u'call stree', u'road')]]\n",
"\n",
"\n",
"15 call street\n",
"[[(u'15', u'house'), (u'call street', u'road')]]\n",
"\n",
"\n",
"15 call street \n",
"[[(u'15', u'house'), (u'call street', u'road')]]\n",
"\n",
"\n",
"15 call street b\n",
"[[(u'15', u'house'), (u'call street b', u'road')]]\n",
"\n",
"\n",
"15 call street bi\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'bi', u'city')]]\n",
"\n",
"\n",
"15 call street bil\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'bil', u'city')]]\n",
"\n",
"\n",
"15 call street bill\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'bill', u'city')]]\n",
"\n",
"\n",
"15 call street bille\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'bille', u'city')]]\n",
"\n",
"\n",
"15 call street biller\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'biller', u'city')]]\n",
"\n",
"\n",
"15 call street billeri\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billeri', u'city')]]\n",
"\n",
"\n",
"15 call street billeric\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billeric', u'city')]]\n",
"\n",
"\n",
"15 call street billerica\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city')]]\n",
"\n",
"\n",
"15 call street billerica,\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city')]]\n",
"\n",
"\n",
"15 call street billerica, \n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city')]]\n",
"\n",
"\n",
"15 call street billerica, m\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city'), (u'mail', u'road')]]\n",
"\n",
"\n",
"15 call street billerica, ma\n",
"[[(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city'), (u'massachusetts', u'state')], [(u'15', u'house'), (u'call street', u'road'), (u'billerica', u'city'), (u'ma', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: union sq\n",
"u\n",
"[[(u'u', u'house')]]\n",
"\n",
"\n",
"un\n",
"[[(u'unit', u'house')], [(u'union', u'house')]]\n",
"\n",
"\n",
"uni\n",
"[[(u'university', u'house')]]\n",
"\n",
"\n",
"unio\n",
"[[(u'unio', u'road')]]\n",
"\n",
"\n",
"union\n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union \n",
"[[(u'union', u'house')]]\n",
"\n",
"\n",
"union s\n",
"[[(u'union san', u'road')], [(u'union', u'house'), (u'south', u'road')], [(u'union s', u'road')]]\n",
"\n",
"\n",
"union sq\n",
"[[(u'union', u'city'), (u'square', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: portland\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"po\n",
"[[(u'post office', u'house')]]\n",
"\n",
"\n",
"por\n",
"[[(u'portion', u'house')]]\n",
"\n",
"\n",
"port\n",
"[[(u'port', u'road')]]\n",
"\n",
"\n",
"portl\n",
"[[(u'portl', u'house')]]\n",
"\n",
"\n",
"portla\n",
"[[(u'portla', u'house')]]\n",
"\n",
"\n",
"portlan\n",
"[[(u'portlan', u'house')]]\n",
"\n",
"\n",
"portland\n",
"[[(u'portland', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: portland, oregon\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"po\n",
"[[(u'post office', u'house')]]\n",
"\n",
"\n",
"por\n",
"[[(u'portion', u'house')]]\n",
"\n",
"\n",
"port\n",
"[[(u'port', u'road')]]\n",
"\n",
"\n",
"portl\n",
"[[(u'portl', u'house')]]\n",
"\n",
"\n",
"portla\n",
"[[(u'portla', u'house')]]\n",
"\n",
"\n",
"portlan\n",
"[[(u'portlan', u'house')]]\n",
"\n",
"\n",
"portland\n",
"[[(u'portland', u'city')]]\n",
"\n",
"\n",
"portland,\n",
"[[(u'portland', u'city')]]\n",
"\n",
"\n",
"portland, \n",
"[[(u'portland', u'city')]]\n",
"\n",
"\n",
"portland, o\n",
"[[(u'portland', u'city'), (u'o', u'house')]]\n",
"\n",
"\n",
"portland, or\n",
"[[(u'portland', u'city'), (u'oregon', u'state')], [(u'portland', u'city'), (u'or', u'state')]]\n",
"\n",
"\n",
"portland, ore\n",
"[[(u'portland', u'city'), (u'ore', u'house')]]\n",
"\n",
"\n",
"portland, oreg\n",
"[[(u'portland oreg', u'house')]]\n",
"\n",
"\n",
"portland, orego\n",
"[[(u'portland orego', u'house')]]\n",
"\n",
"\n",
"portland, oregon\n",
"[[(u'portland', u'city'), (u'oregon', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: paris\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"pa\n",
"[[(u'pa', u'state')], [(u'pennsylvania', u'state')]]\n",
"\n",
"\n",
"par\n",
"[[(u'par', u'house')]]\n",
"\n",
"\n",
"pari\n",
"[[(u'pari', u'house')]]\n",
"\n",
"\n",
"paris\n",
"[[(u'paris', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: france\n",
"f\n",
"[[(u'flat', u'house_number')]]\n",
"\n",
"\n",
"fr\n",
"[[(u'father', u'house')], [(u'frontage', u'road')]]\n",
"\n",
"\n",
"fra\n",
"[[(u'fra', u'country')]]\n",
"\n",
"\n",
"fran\n",
"[[(u'fran', u'house')]]\n",
"\n",
"\n",
"franc\n",
"[[(u'franc', u'road')]]\n",
"\n",
"\n",
"france\n",
"[[(u'france', u'country')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: london\n",
"l\n",
"[[(u'level', u'house')]]\n",
"\n",
"\n",
"lo\n",
"[[(u'lo', u'house')]]\n",
"\n",
"\n",
"lon\n",
"[[(u'lon', u'road')]]\n",
"\n",
"\n",
"lond\n",
"[[(u'lond', u'house')]]\n",
"\n",
"\n",
"londo\n",
"[[(u'londo', u'house')]]\n",
"\n",
"\n",
"london\n",
"[[(u'london', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: chelsea, new york\n",
"c\n",
"[[(u'centre', u'house')], [(u'center', u'house')], [(u'central', u'house')]]\n",
"\n",
"\n",
"ch\n",
"[[(u'county highway', u'road')], [(u'chase', u'house')]]\n",
"\n",
"\n",
"che\n",
"[[(u'che', u'country')]]\n",
"\n",
"\n",
"chel\n",
"[[(u'chel', u'house')]]\n",
"\n",
"\n",
"chels\n",
"[[(u'chels', u'house')]]\n",
"\n",
"\n",
"chelse\n",
"[[(u'chelse', u'house')]]\n",
"\n",
"\n",
"chelsea\n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea,\n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea, \n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea, n\n",
"[[(u'chelsea', u'suburb'), (u'north', u'road')], [(u'chelsea', u'house'), (u'n', u'road')]]\n",
"\n",
"\n",
"chelsea, ne\n",
"[[(u'chelsea', u'city'), (u'ne', u'state')], [(u'chelsea', u'house'), (u'northeast', u'road')], [(u'chelsea', u'city'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"chelsea, new\n",
"[[(u'chelsea', u'suburb'), (u'new', u'house')]]\n",
"\n",
"\n",
"chelsea, new \n",
"[[(u'chelsea', u'suburb'), (u'new', u'house')]]\n",
"\n",
"\n",
"chelsea, new y\n",
"[[(u'chelsea', u'suburb'), (u'new y', u'house')]]\n",
"\n",
"\n",
"chelsea, new yo\n",
"[[(u'chelsea', u'suburb'), (u'new yo', u'house')]]\n",
"\n",
"\n",
"chelsea, new yor\n",
"[[(u'chelsea', u'suburb'), (u'new', u'house'), (u'yor', u'suburb')]]\n",
"\n",
"\n",
"chelsea, new york\n",
"[[(u'chelsea', u'suburb'), (u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: soho, new york\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"so\n",
"[[(u'south', u'road')]]\n",
"\n",
"\n",
"soh\n",
"[[(u'soh', u'house')]]\n",
"\n",
"\n",
"soho\n",
"[[(u'soho', u'house')]]\n",
"\n",
"\n",
"soho,\n",
"[[(u'soho', u'house')]]\n",
"\n",
"\n",
"soho, \n",
"[[(u'soho', u'house')]]\n",
"\n",
"\n",
"soho, n\n",
"[[(u'soho', u'house'), (u'north', u'road')], [(u'soho n', u'house')]]\n",
"\n",
"\n",
"soho, ne\n",
"[[(u'soho', u'house'), (u'ne', u'road')], [(u'soho', u'house'), (u'northeast', u'road')], [(u'soho', u'suburb'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"soho, new\n",
"[[(u'soho', u'suburb'), (u'new', u'house')]]\n",
"\n",
"\n",
"soho, new \n",
"[[(u'soho', u'suburb'), (u'new', u'house')]]\n",
"\n",
"\n",
"soho, new y\n",
"[[(u'soho', u'suburb'), (u'new y', u'house')]]\n",
"\n",
"\n",
"soho, new yo\n",
"[[(u'soho', u'suburb'), (u'new yo', u'house')]]\n",
"\n",
"\n",
"soho, new yor\n",
"[[(u'soho', u'suburb'), (u'new', u'house'), (u'yor', u'suburb')]]\n",
"\n",
"\n",
"soho, new york\n",
"[[(u'soho', u'suburb'), (u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: perugia airport\n",
"p\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"pe\n",
"[[(u'pe', u'state')], [(u'prince edward island', u'state')]]\n",
"\n",
"\n",
"per\n",
"[[(u'per', u'country')]]\n",
"\n",
"\n",
"peru\n",
"[[(u'peru', u'country')]]\n",
"\n",
"\n",
"perug\n",
"[[(u'perug', u'house')]]\n",
"\n",
"\n",
"perugi\n",
"[[(u'perugi', u'house')]]\n",
"\n",
"\n",
"perugia\n",
"[[(u'perugia', u'city')]]\n",
"\n",
"\n",
"perugia \n",
"[[(u'perugia', u'city')]]\n",
"\n",
"\n",
"perugia a\n",
"[[(u'perugia', u'city'), (u'a', u'house')]]\n",
"\n",
"\n",
"perugia ai\n",
"[[(u'perugia', u'city'), (u'ai', u'house')]]\n",
"\n",
"\n",
"perugia air\n",
"[[(u'perugia air', u'house')]]\n",
"\n",
"\n",
"perugia airp\n",
"[[(u'perugia airp', u'house')]]\n",
"\n",
"\n",
"perugia airpo\n",
"[[(u'perugia airpo', u'house')]]\n",
"\n",
"\n",
"perugia airpor\n",
"[[(u'perugia airpor', u'house')]]\n",
"\n",
"\n",
"perugia airport\n",
"[[(u'perugia airport', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 101 saint marks pl, new york\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"10\n",
"[[(u'10', u'house_number')]]\n",
"\n",
"\n",
"101\n",
"[[(u'101', u'house_number')]]\n",
"\n",
"\n",
"101 \n",
"[[(u'101', u'house_number')]]\n",
"\n",
"\n",
"101 s\n",
"[[(u'101', u'house_number'), (u'san', u'road')], [(u'101', u'house_number'), (u'south', u'road')], [(u'101', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"101 sa\n",
"[[(u'101', u'house_number'), (u'southern', u'road'), (u'australia', u'country')]]\n",
"\n",
"\n",
"101 sai\n",
"[[(u'101', u'house_number'), (u'sai', u'road')]]\n",
"\n",
"\n",
"101 sain\n",
"[[(u'101', u'house_number'), (u'sain', u'road')]]\n",
"\n",
"\n",
"101 saint\n",
"[[(u'101', u'house_number'), (u'saint', u'road')]]\n",
"\n",
"\n",
"101 saint \n",
"[[(u'101', u'house_number'), (u'saint', u'road')]]\n",
"\n",
"\n",
"101 saint m\n",
"[[(u'101', u'house_number'), (u'saint mail', u'road')]]\n",
"\n",
"\n",
"101 saint ma\n",
"[[(u'101', u'house_number'), (u'saint', u'road'), (u'massachusetts', u'state')], [(u'101', u'house_number'), (u'saint ma', u'road')]]\n",
"\n",
"\n",
"101 saint mar\n",
"[[(u'101', u'house_number'), (u'saint mar', u'road')]]\n",
"\n",
"\n",
"101 saint mark\n",
"[[(u'101', u'house_number'), (u'saint mark', u'road')]]\n",
"\n",
"\n",
"101 saint marks\n",
"[[(u'101', u'house_number'), (u'saint', u'road'), (u'marks', u'city')]]\n",
"\n",
"\n",
"101 saint marks \n",
"[[(u'101', u'house_number'), (u'saint', u'road'), (u'marks', u'city')]]\n",
"\n",
"\n",
"101 saint marks p\n",
"[[(u'101', u'house_number'), (u'saint marks p', u'road')]]\n",
"\n",
"\n",
"101 saint marks pl\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road')], [(u'101', u'house_number'), (u'saint marks', u'road'), (u'plain', u'city')]]\n",
"\n",
"\n",
"101 saint marks pl,\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road')], [(u'101', u'house_number'), (u'saint marks', u'road'), (u'plain', u'city')]]\n",
"\n",
"\n",
"101 saint marks pl, \n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road')], [(u'101', u'house_number'), (u'saint marks', u'road'), (u'plain', u'city')]]\n",
"\n",
"\n",
"101 saint marks pl, n\n",
"[[(u'101', u'house_number'), (u'saint marks place north', u'road')], [(u'101', u'house_number'), (u'saint marks place n', u'road')], [(u'101', u'house_number'), (u'saint marks plain north', u'road')], [(u'101', u'house_number'), (u'saint marks plain n', u'road')]]\n",
"\n",
"\n",
"101 saint marks pl, ne\n",
"[[(u'101', u'house_number'), (u'saint marks place ne', u'road')], [(u'101', u'house_number'), (u'saint marks place northeast', u'road')], [(u'101', u'house_number'), (u'saint marks place', u'road'), (u'nebraska', u'state')], [(u'101', u'house_number'), (u'saint marks plain ne', u'road')], [(u'101', u'house_number'), (u'saint marks plain northeast', u'road')], [(u'101', u'house_number'), (u'saint marks', u'road'), (u'plain', u'city'), (u'nebraska', u'state')]]\n",
"\n",
"\n",
"101 saint marks pl, new\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new', u'suburb')], [(u'101', u'house_number'), (u'saint marks plain', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"101 saint marks pl, new \n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new', u'suburb')], [(u'101', u'house_number'), (u'saint marks plain', u'road'), (u'new', u'suburb')]]\n",
"\n",
"\n",
"101 saint marks pl, new y\n",
"[[(u'101', u'house_number'), (u'saint marks place new y', u'road')], [(u'101', u'house_number'), (u'saint marks plain new y', u'road')]]\n",
"\n",
"\n",
"101 saint marks pl, new yo\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new yo', u'suburb')], [(u'101', u'house_number'), (u'saint marks plain', u'road'), (u'new yo', u'suburb')]]\n",
"\n",
"\n",
"101 saint marks pl, new yor\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new yor', u'suburb')], [(u'101', u'house_number'), (u'saint marks plain new', u'road'), (u'yor', u'suburb')]]\n",
"\n",
"\n",
"101 saint marks pl, new york\n",
"[[(u'101', u'house_number'), (u'saint marks place', u'road'), (u'new york', u'state')], [(u'101', u'house_number'), (u'saint marks plain', u'road'), (u'new york', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: newark airport\n",
"n\n",
"[[(u'north', u'road')], [(u'n', u'road')]]\n",
"\n",
"\n",
"ne\n",
"[[(u'ne', u'road')], [(u'northeast', u'road')], [(u'nebraska', u'state')]]\n",
"\n",
"\n",
"new\n",
"[[(u'new', u'house')]]\n",
"\n",
"\n",
"newa\n",
"[[(u'newa', u'house')]]\n",
"\n",
"\n",
"newar\n",
"[[(u'newar', u'house')]]\n",
"\n",
"\n",
"newark\n",
"[[(u'newark', u'city')]]\n",
"\n",
"\n",
"newark \n",
"[[(u'newark', u'city')]]\n",
"\n",
"\n",
"newark a\n",
"[[(u'newark', u'city'), (u'a', u'house')]]\n",
"\n",
"\n",
"newark ai\n",
"[[(u'newark', u'city'), (u'ai', u'house')]]\n",
"\n",
"\n",
"newark air\n",
"[[(u'newark air', u'house')]]\n",
"\n",
"\n",
"newark airp\n",
"[[(u'newark airp', u'house')]]\n",
"\n",
"\n",
"newark airpo\n",
"[[(u'newark airpo', u'house')]]\n",
"\n",
"\n",
"newark airpor\n",
"[[(u'newark airpor', u'house')]]\n",
"\n",
"\n",
"newark airport\n",
"[[(u'newark airport', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 7 Simon-Dach-Straße\n",
"7\n",
"[[(u'7', u'house_number')]]\n",
"\n",
"\n",
"7 \n",
"[[(u'7', u'house_number')]]\n",
"\n",
"\n",
"7 S\n",
"[[(u'7', u'house_number'), (u'san', u'road')], [(u'7', u'house_number'), (u'south', u'road')], [(u'7', u'house_number'), (u's', u'road')]]\n",
"\n",
"\n",
"7 Si\n",
"[[(u'7', u'house_number'), (u'si', u'country')]]\n",
"\n",
"\n",
"7 Sim\n",
"[[(u'7', u'house_number'), (u'sim', u'road')]]\n",
"\n",
"\n",
"7 Simo\n",
"[[(u'7', u'house_number'), (u'simo', u'road')]]\n",
"\n",
"\n",
"7 Simon\n",
"[[(u'7', u'house_number'), (u'simon', u'road')]]\n",
"\n",
"\n",
"7 Simon-\n",
"[[(u'7', u'house_number'), (u'simon', u'road')]]\n",
"\n",
"\n",
"7 Simon-D\n",
"[[(u'7', u'house_number'), (u'simon-d', u'road')], [(u'7 simon-500', u'house')], [(u'7', u'house_number'), (u'simon d', u'road')], [(u'7', u'house_number'), (u'simon', u'road'), (u'500', u'house_number')], [(u'7', u'house_number'), (u'simond', u'road')]]\n",
"\n",
"\n",
"7 Simon-Da\n",
"[[(u'7', u'house_number'), (u'simon-da', u'road')], [(u'7', u'house_number'), (u'simon da', u'road')], [(u'7', u'house_number'), (u'simonda', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dac\n",
"[[(u'7', u'house_number'), (u'simon-dac', u'road')], [(u'7', u'house_number'), (u'simon dac', u'road')], [(u'7', u'house_number'), (u'simondac', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach\n",
"[[(u'7', u'house_number'), (u'simon-dach', u'road')], [(u'7', u'house_number'), (u'simon dach', u'road')], [(u'7', u'house_number'), (u'simondach', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-\n",
"[[(u'7', u'house_number'), (u'simon-dach', u'road')], [(u'7', u'house_number'), (u'simon dach', u'road')], [(u'7', u'house_number'), (u'simondach', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-S\n",
"[[(u'7', u'house_number'), (u'simon-dach-s', u'road')], [(u'7', u'house_number'), (u'simon dach san', u'road')], [(u'7', u'house_number'), (u'simon dach south', u'road')], [(u'7', u'house_number'), (u'simon dach s', u'road')], [(u'7', u'house_number'), (u'simondachs', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-St\n",
"[[(u'7', u'house_number'), (u'simon-dach-st', u'road')], [(u'7', u'house_number'), (u'simon dach saint', u'road')], [(u'7', u'house_number'), (u'simon dach street', u'road')], [(u'7', u'house_number'), (u'simondachst', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-Str\n",
"[[(u'7', u'house_number'), (u'simon-dach-str', u'road')], [(u'7', u'house_number'), (u'simon dach street', u'road')], [(u'7', u'house_number'), (u'simondachstr', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-Stra\n",
"[[(u'7', u'house_number'), (u'simon-dach-stra', u'road')], [(u'7', u'house_number'), (u'simon dach stravenue', u'road')], [(u'7', u'house_number'), (u'simon dach strand', u'road')], [(u'7', u'house_number'), (u'simondachstra', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-Straß\n",
"[[(u'7', u'house_number'), (u'simon-dach-strass', u'road')], [(u'7', u'house_number'), (u'simon dach', u'road'), (u'strass', u'city')], [(u'7', u'house_number'), (u'simondachstrass', u'road')]]\n",
"\n",
"\n",
"7 Simon-Dach-Straße\n",
"[[(u'7', u'house'), (u'simon-dach-strasse', u'road')], [(u'7', u'house_number'), (u'simon dach strasse', u'road')], [(u'7', u'house'), (u'simondachstrasse', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Simon-Dach-Straße 7\n",
"S\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"Si\n",
"[[(u'si', u'country')]]\n",
"\n",
"\n",
"Sim\n",
"[[(u'sim', u'house')]]\n",
"\n",
"\n",
"Simo\n",
"[[(u'simo', u'road')]]\n",
"\n",
"\n",
"Simon\n",
"[[(u'simon', u'road')]]\n",
"\n",
"\n",
"Simon-\n",
"[[(u'simon', u'road')]]\n",
"\n",
"\n",
"Simon-D\n",
"[[(u'simon-d', u'house')], [(u'simon-500', u'house')], [(u'simon d', u'road')], [(u'simon', u'road'), (u'500', u'house_number')], [(u'simond', u'house')]]\n",
"\n",
"\n",
"Simon-Da\n",
"[[(u'simon-da', u'house')], [(u'simon da', u'house')], [(u'simonda', u'house')]]\n",
"\n",
"\n",
"Simon-Dac\n",
"[[(u'simon-dac', u'house')], [(u'simon dac', u'house')], [(u'simondac', u'house')]]\n",
"\n",
"\n",
"Simon-Dach\n",
"[[(u'simon-dach', u'house')], [(u'simon dach', u'house')], [(u'simondach', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-\n",
"[[(u'simon-dach', u'house')], [(u'simon dach', u'house')], [(u'simondach', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-S\n",
"[[(u'simon-dach-s', u'house')], [(u'simon dach san', u'house')], [(u'simon dach', u'house'), (u'south', u'road')], [(u'simon dach', u'house'), (u's', u'road')], [(u'simondachs', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-St\n",
"[[(u'simon-dach-st', u'house')], [(u'simon dach', u'house'), (u'saint', u'road')], [(u'simon', u'house'), (u'dach street', u'road')], [(u'simondachst', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-Str\n",
"[[(u'simon-dach-str', u'house')], [(u'simon', u'house'), (u'dach street', u'road')], [(u'simondachstr', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-Stra\n",
"[[(u'simon-dach-stra', u'house')], [(u'simon dach', u'house'), (u'stravenue', u'road')], [(u'simon dach', u'house'), (u'strand', u'road')], [(u'simondachstra', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-Straß\n",
"[[(u'simon-dach-strass', u'house')], [(u'simon dach', u'house'), (u'strass', u'road')], [(u'simondachstrass', u'house')]]\n",
"\n",
"\n",
"Simon-Dach-Straße\n",
"[[(u'simon-dach-strasse', u'road')], [(u'simon', u'house'), (u'dach strasse', u'road')], [(u'simondachstrasse', u'road')]]\n",
"\n",
"\n",
"Simon-Dach-Straße \n",
"[[(u'simon-dach-strasse', u'road')], [(u'simon', u'house'), (u'dach strasse', u'road')], [(u'simondachstrasse', u'road')]]\n",
"\n",
"\n",
"Simon-Dach-Straße 7\n",
"[[(u'simon-dach-strasse', u'road'), (u'7', u'house_number')], [(u'simon', u'house'), (u'dach strasse', u'road'), (u'7', u'house_number')], [(u'simondachstrasse', u'road'), (u'7', u'house_number')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 30 West 26th Street\n",
"3\n",
"[[(u'3', u'house_number')]]\n",
"\n",
"\n",
"30\n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 \n",
"[[(u'30', u'house_number')]]\n",
"\n",
"\n",
"30 W\n",
"[[(u'30', u'house_number'), (u'west', u'road')], [(u'30', u'house_number'), (u'w', u'house')]]\n",
"\n",
"\n",
"30 We\n",
"[[(u'30', u'house_number'), (u'warehouse', u'road')]]\n",
"\n",
"\n",
"30 Wes\n",
"[[(u'30', u'house_number'), (u'wes', u'road')]]\n",
"\n",
"\n",
"30 West\n",
"[[(u'30', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"30 West \n",
"[[(u'30', u'house_number'), (u'west', u'road')]]\n",
"\n",
"\n",
"30 West 2\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'2', u'house_number')]]\n",
"\n",
"\n",
"30 West 26\n",
"[[(u'30', u'house_number'), (u'west 26', u'road')]]\n",
"\n",
"\n",
"30 West 26t\n",
"[[(u'30', u'house_number'), (u'west', u'road'), (u'26t', u'house_number')]]\n",
"\n",
"\n",
"30 West 26th\n",
"[[(u'30', u'house_number'), (u'west 26th', u'road')]]\n",
"\n",
"\n",
"30 West 26th \n",
"[[(u'30', u'house_number'), (u'west 26th', u'road')]]\n",
"\n",
"\n",
"30 West 26th S\n",
"[[(u'30', u'house_number'), (u'west 26th san', u'road')], [(u'30', u'house_number'), (u'west 26th south', u'road')], [(u'30', u'house_number'), (u'west 26th s', u'road')]]\n",
"\n",
"\n",
"30 West 26th St\n",
"[[(u'30', u'house_number'), (u'west 26th saint', u'road')], [(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Str\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"30 West 26th Stre\n",
"[[(u'30', u'house_number'), (u'west 26th stre', u'road')]]\n",
"\n",
"\n",
"30 West 26th Stree\n",
"[[(u'30', u'house_number'), (u'west 26th stree', u'road')]]\n",
"\n",
"\n",
"30 West 26th Street\n",
"[[(u'30', u'house_number'), (u'west 26th street', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: statue of liberty\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"st\n",
"[[(u'saint', u'house')], [(u'street', u'road')]]\n",
"\n",
"\n",
"sta\n",
"[[(u'station', u'house')], [(u'santa', u'road')]]\n",
"\n",
"\n",
"stat\n",
"[[(u'stat', u'house')]]\n",
"\n",
"\n",
"statu\n",
"[[(u'statu', u'house')]]\n",
"\n",
"\n",
"statue\n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue \n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue o\n",
"[[(u'statue o', u'house')]]\n",
"\n",
"\n",
"statue of\n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of \n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of l\n",
"[[(u'statue of level', u'house')]]\n",
"\n",
"\n",
"statue of li\n",
"[[(u'statue of li', u'house')], [(u'statue of 51', u'house')]]\n",
"\n",
"\n",
"statue of lib\n",
"[[(u'statue of lib', u'house')]]\n",
"\n",
"\n",
"statue of libe\n",
"[[(u'statue of libe', u'house')]]\n",
"\n",
"\n",
"statue of liber\n",
"[[(u'statue of liber', u'house')]]\n",
"\n",
"\n",
"statue of libert\n",
"[[(u'statue of libert', u'house')]]\n",
"\n",
"\n",
"statue of liberty\n",
"[[(u'statue of liberty', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: statue of liberty\n",
"s\n",
"[[(u'san', u'road')], [(u'south', u'road')], [(u's', u'road')]]\n",
"\n",
"\n",
"st\n",
"[[(u'saint', u'house')], [(u'street', u'road')]]\n",
"\n",
"\n",
"sta\n",
"[[(u'station', u'house')], [(u'santa', u'road')]]\n",
"\n",
"\n",
"stat\n",
"[[(u'stat', u'house')]]\n",
"\n",
"\n",
"statu\n",
"[[(u'statu', u'house')]]\n",
"\n",
"\n",
"statue\n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue \n",
"[[(u'statue', u'house')]]\n",
"\n",
"\n",
"statue o\n",
"[[(u'statue o', u'house')]]\n",
"\n",
"\n",
"statue of\n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of \n",
"[[(u'statue of', u'house')]]\n",
"\n",
"\n",
"statue of l\n",
"[[(u'statue of level', u'house')]]\n",
"\n",
"\n",
"statue of li\n",
"[[(u'statue of li', u'house')], [(u'statue of 51', u'house')]]\n",
"\n",
"\n",
"statue of lib\n",
"[[(u'statue of lib', u'house')]]\n",
"\n",
"\n",
"statue of libe\n",
"[[(u'statue of libe', u'house')]]\n",
"\n",
"\n",
"statue of liber\n",
"[[(u'statue of liber', u'house')]]\n",
"\n",
"\n",
"statue of libert\n",
"[[(u'statue of libert', u'house')]]\n",
"\n",
"\n",
"statue of liberty\n",
"[[(u'statue of liberty', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 4th and King\n",
"4\n",
"[[(u'4', u'house_number')]]\n",
"\n",
"\n",
"4t\n",
"[[(u'4t', u'house_number')]]\n",
"\n",
"\n",
"4th\n",
"[[(u'4th', u'house')]]\n",
"\n",
"\n",
"4th \n",
"[[(u'4th', u'house')]]\n",
"\n",
"\n",
"4th a\n",
"[[(u'4th a', u'road')]]\n",
"\n",
"\n",
"4th an\n",
"[[(u'4th an', u'house')]]\n",
"\n",
"\n",
"4th and\n",
"[[(u'4th and', u'house')]]\n",
"\n",
"\n",
"4th and \n",
"[[(u'4th and', u'house')]]\n",
"\n",
"\n",
"4th and K\n",
"[[(u'4th', u'road'), (u'0 k', u'house_number')]]\n",
"\n",
"\n",
"4th and Ki\n",
"[[(u'4th', u'road'), (u'0', u'house_number'), (u'ki', u'city_district')]]\n",
"\n",
"\n",
"4th and Kin\n",
"[[(u'4th', u'road'), (u'0', u'house_number'), (u'kin', u'city')]]\n",
"\n",
"\n",
"4th and King\n",
"[[(u'4th', u'road'), (u'0', u'house_number'), (u'king', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Lancaster\n",
"L\n",
"[[(u'level', u'house')]]\n",
"\n",
"\n",
"La\n",
"[[(u'la', u'house')], [(u'lane', u'road')], [(u'louisiana', u'house')]]\n",
"\n",
"\n",
"Lan\n",
"[[(u'lan', u'house')]]\n",
"\n",
"\n",
"Lanc\n",
"[[(u'lanc', u'house')]]\n",
"\n",
"\n",
"Lanca\n",
"[[(u'lanca', u'house')]]\n",
"\n",
"\n",
"Lancas\n",
"[[(u'lancas', u'house')]]\n",
"\n",
"\n",
"Lancast\n",
"[[(u'lancast', u'house')]]\n",
"\n",
"\n",
"Lancaste\n",
"[[(u'lancaste', u'house')]]\n",
"\n",
"\n",
"Lancaster\n",
"[[(u'lancaster', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Paris\n",
"P\n",
"[[(u'p', u'road')]]\n",
"\n",
"\n",
"Pa\n",
"[[(u'pa', u'state')], [(u'pennsylvania', u'state')]]\n",
"\n",
"\n",
"Par\n",
"[[(u'par', u'house')]]\n",
"\n",
"\n",
"Pari\n",
"[[(u'pari', u'house')]]\n",
"\n",
"\n",
"Paris\n",
"[[(u'paris', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Manchester\n",
"M\n",
"[[(u'mail', u'road')]]\n",
"\n",
"\n",
"Ma\n",
"[[(u'massachusetts', u'state')], [(u'ma', u'house')]]\n",
"\n",
"\n",
"Man\n",
"[[(u'man', u'house')]]\n",
"\n",
"\n",
"Manc\n",
"[[(u'manc', u'house')]]\n",
"\n",
"\n",
"Manch\n",
"[[(u'manch', u'house')]]\n",
"\n",
"\n",
"Manche\n",
"[[(u'manche', u'house')]]\n",
"\n",
"\n",
"Manches\n",
"[[(u'manches', u'house')]]\n",
"\n",
"\n",
"Manchest\n",
"[[(u'manchest', u'house')]]\n",
"\n",
"\n",
"Mancheste\n",
"[[(u'mancheste', u'house')]]\n",
"\n",
"\n",
"Manchester\n",
"[[(u'manchester', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: autocomplete_venues\n",
"TEST: DiDi Dumpling\n",
"D\n",
"[[(u'd', u'house')], [(u'500', u'house_number')]]\n",
"\n",
"\n",
"Di\n",
"[[(u'di', u'house')], [(u'501', u'house_number')]]\n",
"\n",
"\n",
"DiD\n",
"[[(u'did', u'house')], [(u'500', u'house_number')]]\n",
"\n",
"\n",
"DiDi\n",
"[[(u'didi', u'house')], [(u'501', u'house_number')]]\n",
"\n",
"\n",
"DiDi \n",
"[[(u'didi', u'house')]]\n",
"\n",
"\n",
"DiDi D\n",
"[[(u'didi d', u'house')], [(u'501', u'house'), (u'500', u'house_number')]]\n",
"\n",
"\n",
"DiDi Du\n",
"[[(u'didi du', u'house')], [(u'501', u'house_number'), (u'du', u'road')]]\n",
"\n",
"\n",
"DiDi Dum\n",
"[[(u'didi dum', u'house')], [(u'501', u'house_number'), (u'dum', u'road')]]\n",
"\n",
"\n",
"DiDi Dump\n",
"[[(u'didi dump', u'house')], [(u'501', u'house_number'), (u'dump', u'road')]]\n",
"\n",
"\n",
"DiDi Dumpl\n",
"[[(u'didi dumpl', u'house')], [(u'501', u'house_number'), (u'dumpl', u'road')]]\n",
"\n",
"\n",
"DiDi Dumpli\n",
"[[(u'didi dumpli', u'house')], [(u'501', u'house_number'), (u'dumpli', u'road')]]\n",
"\n",
"\n",
"DiDi Dumplin\n",
"[[(u'didi dumplin', u'house')], [(u'501', u'house_number'), (u'dumplin', u'road')]]\n",
"\n",
"\n",
"DiDi Dumpling\n",
"[[(u'didi dumpling', u'house')], [(u'501', u'house_number'), (u'dumpling', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: hackney city farm\n",
"h\n",
"[[(u'h', u'house')]]\n",
"\n",
"\n",
"ha\n",
"[[(u'ha', u'house')]]\n",
"\n",
"\n",
"hac\n",
"[[(u'hac', u'house')]]\n",
"\n",
"\n",
"hack\n",
"[[(u'hack', u'house')]]\n",
"\n",
"\n",
"hackn\n",
"[[(u'hackn', u'house')]]\n",
"\n",
"\n",
"hackne\n",
"[[(u'hackne', u'house')]]\n",
"\n",
"\n",
"hackney\n",
"[[(u'hackney', u'house')]]\n",
"\n",
"\n",
"hackney \n",
"[[(u'hackney', u'house')]]\n",
"\n",
"\n",
"hackney c\n",
"[[(u'hackney', u'road'), (u'centre', u'house')], [(u'hackney center', u'house')], [(u'hackney central', u'suburb')]]\n",
"\n",
"\n",
"hackney ci\n",
"[[(u'hackney circuit', u'road')]]\n",
"\n",
"\n",
"hackney cit\n",
"[[(u'hackney', u'road'), (u'cit', u'house')]]\n",
"\n",
"\n",
"hackney city\n",
"[[(u'hackney', u'city_district'), (u'city', u'house')]]\n",
"\n",
"\n",
"hackney city \n",
"[[(u'hackney', u'city_district'), (u'city', u'house')]]\n",
"\n",
"\n",
"hackney city f\n",
"[[(u'hackney', u'city_district'), (u'city flat', u'house')]]\n",
"\n",
"\n",
"hackney city fa\n",
"[[(u'hackney', u'city_district'), (u'city fa', u'house')]]\n",
"\n",
"\n",
"hackney city far\n",
"[[(u'hackney', u'city_district'), (u'city far', u'house')]]\n",
"\n",
"\n",
"hackney city farm\n",
"[[(u'hackney', u'city_district'), (u'city', u'road'), (u'farm', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: 1 Grolmanstrasse\n",
"1\n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 \n",
"[[(u'1', u'house_number')]]\n",
"\n",
"\n",
"1 G\n",
"[[(u'1', u'house_number'), (u'ground', u'road')]]\n",
"\n",
"\n",
"1 Gr\n",
"[[(u'1', u'house_number'), (u'grove', u'road')]]\n",
"\n",
"\n",
"1 Gro\n",
"[[(u'1', u'house_number'), (u'grove', u'road')]]\n",
"\n",
"\n",
"1 Grol\n",
"[[(u'1', u'house_number'), (u'grol', u'road')]]\n",
"\n",
"\n",
"1 Grolm\n",
"[[(u'1', u'house_number'), (u'grolm', u'road')]]\n",
"\n",
"\n",
"1 Grolma\n",
"[[(u'1', u'house_number'), (u'grolma', u'road')]]\n",
"\n",
"\n",
"1 Grolman\n",
"[[(u'1', u'house_number'), (u'grolman', u'road')]]\n",
"\n",
"\n",
"1 Grolmans\n",
"[[(u'1', u'house_number'), (u'grolmans', u'road')]]\n",
"\n",
"\n",
"1 Grolmanst\n",
"[[(u'1', u'house_number'), (u'grolmanst', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstr\n",
"[[(u'1', u'house_number'), (u'grolmanstr', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstra\n",
"[[(u'1', u'house_number'), (u'grolmanstra', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstras\n",
"[[(u'1', u'house_number'), (u'grolmanstras', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstrass\n",
"[[(u'1', u'house_number'), (u'grolmanstrass', u'road')]]\n",
"\n",
"\n",
"1 Grolmanstrasse\n",
"[[(u'1', u'house'), (u'grolmanstrasse', u'road')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Beach Bablyon\n",
"B\n",
"[[(u'b', u'house')]]\n",
"\n",
"\n",
"Be\n",
"[[(u'be', u'country')]]\n",
"\n",
"\n",
"Bea\n",
"[[(u'bea', u'house')]]\n",
"\n",
"\n",
"Beac\n",
"[[(u'beac', u'house')]]\n",
"\n",
"\n",
"Beach\n",
"[[(u'beach', u'house')]]\n",
"\n",
"\n",
"Beach \n",
"[[(u'beach', u'house')]]\n",
"\n",
"\n",
"Beach B\n",
"[[(u'beach b', u'house')]]\n",
"\n",
"\n",
"Beach Ba\n",
"[[(u'beach ba', u'house')]]\n",
"\n",
"\n",
"Beach Bab\n",
"[[(u'beach bab', u'road')]]\n",
"\n",
"\n",
"Beach Babl\n",
"[[(u'beach babl', u'house')]]\n",
"\n",
"\n",
"Beach Bably\n",
"[[(u'beach bably', u'house')]]\n",
"\n",
"\n",
"Beach Bablyo\n",
"[[(u'beach bablyo', u'house')]]\n",
"\n",
"\n",
"Beach Bablyon\n",
"[[(u'beach bablyon', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: Waiotapu\n",
"W\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"Wa\n",
"[[(u'wa', u'state')], [(u'washington', u'city')], [(u'western australia', u'state')]]\n",
"\n",
"\n",
"Wai\n",
"[[(u'wai', u'house')]]\n",
"\n",
"\n",
"Waio\n",
"[[(u'waio', u'house')]]\n",
"\n",
"\n",
"Waiot\n",
"[[(u'waiot', u'house')]]\n",
"\n",
"\n",
"Waiota\n",
"[[(u'waiota', u'house')]]\n",
"\n",
"\n",
"Waiotap\n",
"[[(u'waiotap', u'house')]]\n",
"\n",
"\n",
"Waiotapu\n",
"[[(u'waiotapu', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n",
"TEST SET: quattroshapes_popularity\n",
"TEST: chelsea\n",
"c\n",
"[[(u'centre', u'house')], [(u'center', u'house')], [(u'central', u'house')]]\n",
"\n",
"\n",
"ch\n",
"[[(u'county highway', u'road')], [(u'chase', u'house')]]\n",
"\n",
"\n",
"che\n",
"[[(u'che', u'country')]]\n",
"\n",
"\n",
"chel\n",
"[[(u'chel', u'house')]]\n",
"\n",
"\n",
"chels\n",
"[[(u'chels', u'house')]]\n",
"\n",
"\n",
"chelse\n",
"[[(u'chelse', u'house')]]\n",
"\n",
"\n",
"chelsea\n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: chelsea, ny\n",
"c\n",
"[[(u'centre', u'house')], [(u'center', u'house')], [(u'central', u'house')]]\n",
"\n",
"\n",
"ch\n",
"[[(u'county highway', u'road')], [(u'chase', u'house')]]\n",
"\n",
"\n",
"che\n",
"[[(u'che', u'country')]]\n",
"\n",
"\n",
"chel\n",
"[[(u'chel', u'house')]]\n",
"\n",
"\n",
"chels\n",
"[[(u'chels', u'house')]]\n",
"\n",
"\n",
"chelse\n",
"[[(u'chelse', u'house')]]\n",
"\n",
"\n",
"chelsea\n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea,\n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea, \n",
"[[(u'chelsea', u'city')]]\n",
"\n",
"\n",
"chelsea, n\n",
"[[(u'chelsea', u'suburb'), (u'north', u'road')], [(u'chelsea', u'house'), (u'n', u'road')]]\n",
"\n",
"\n",
"chelsea, ny\n",
"[[(u'chelsea', u'suburb'), (u'new york', u'state')], [(u'chelsea', u'suburb'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: williamsburg\n",
"w\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"wi\n",
"[[(u'wisconsin', u'state')], [(u'wi', u'state')]]\n",
"\n",
"\n",
"wil\n",
"[[(u'wil', u'city')]]\n",
"\n",
"\n",
"will\n",
"[[(u'will', u'house')]]\n",
"\n",
"\n",
"willi\n",
"[[(u'willi', u'house')]]\n",
"\n",
"\n",
"willia\n",
"[[(u'willia', u'road')]]\n",
"\n",
"\n",
"william\n",
"[[(u'william', u'house')]]\n",
"\n",
"\n",
"williams\n",
"[[(u'williams', u'house')]]\n",
"\n",
"\n",
"williamsb\n",
"[[(u'williamsb', u'house')]]\n",
"\n",
"\n",
"williamsbu\n",
"[[(u'williamsbu', u'house')]]\n",
"\n",
"\n",
"williamsbur\n",
"[[(u'williamsbur', u'house')]]\n",
"\n",
"\n",
"williamsburg\n",
"[[(u'williamsburg', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: williamsburg, ny\n",
"w\n",
"[[(u'west', u'city_district')], [(u'w', u'house')]]\n",
"\n",
"\n",
"wi\n",
"[[(u'wisconsin', u'state')], [(u'wi', u'state')]]\n",
"\n",
"\n",
"wil\n",
"[[(u'wil', u'city')]]\n",
"\n",
"\n",
"will\n",
"[[(u'will', u'house')]]\n",
"\n",
"\n",
"willi\n",
"[[(u'willi', u'house')]]\n",
"\n",
"\n",
"willia\n",
"[[(u'willia', u'road')]]\n",
"\n",
"\n",
"william\n",
"[[(u'william', u'house')]]\n",
"\n",
"\n",
"williams\n",
"[[(u'williams', u'house')]]\n",
"\n",
"\n",
"williamsb\n",
"[[(u'williamsb', u'house')]]\n",
"\n",
"\n",
"williamsbu\n",
"[[(u'williamsbu', u'house')]]\n",
"\n",
"\n",
"williamsbur\n",
"[[(u'williamsbur', u'house')]]\n",
"\n",
"\n",
"williamsburg\n",
"[[(u'williamsburg', u'house')]]\n",
"\n",
"\n",
"williamsburg,\n",
"[[(u'williamsburg', u'house')]]\n",
"\n",
"\n",
"williamsburg, \n",
"[[(u'williamsburg', u'house')]]\n",
"\n",
"\n",
"williamsburg, n\n",
"[[(u'williamsburg', u'suburb'), (u'north', u'road')], [(u'williamsburg n', u'house')]]\n",
"\n",
"\n",
"williamsburg, ny\n",
"[[(u'williamsburg', u'suburb'), (u'new york', u'state')], [(u'williamsburg', u'suburb'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: ridgewood\n",
"r\n",
"[[(u'river', u'house')]]\n",
"\n",
"\n",
"ri\n",
"[[(u'ri', u'state')], [(u'rhode island', u'state')], [(u'rise', u'road')]]\n",
"\n",
"\n",
"rid\n",
"[[(u'rid', u'house')]]\n",
"\n",
"\n",
"ridg\n",
"[[(u'ridg', u'house')]]\n",
"\n",
"\n",
"ridge\n",
"[[(u'ridge', u'road')]]\n",
"\n",
"\n",
"ridgew\n",
"[[(u'ridgew', u'house')]]\n",
"\n",
"\n",
"ridgewo\n",
"[[(u'ridgewo', u'house')]]\n",
"\n",
"\n",
"ridgewoo\n",
"[[(u'ridgewoo', u'house')]]\n",
"\n",
"\n",
"ridgewood\n",
"[[(u'ridgewood', u'house')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"TEST: ridgewood, ny\n",
"r\n",
"[[(u'river', u'house')]]\n",
"\n",
"\n",
"ri\n",
"[[(u'ri', u'state')], [(u'rhode island', u'state')], [(u'rise', u'road')]]\n",
"\n",
"\n",
"rid\n",
"[[(u'rid', u'house')]]\n",
"\n",
"\n",
"ridg\n",
"[[(u'ridg', u'house')]]\n",
"\n",
"\n",
"ridge\n",
"[[(u'ridge', u'road')]]\n",
"\n",
"\n",
"ridgew\n",
"[[(u'ridgew', u'house')]]\n",
"\n",
"\n",
"ridgewo\n",
"[[(u'ridgewo', u'house')]]\n",
"\n",
"\n",
"ridgewoo\n",
"[[(u'ridgewoo', u'house')]]\n",
"\n",
"\n",
"ridgewood\n",
"[[(u'ridgewood', u'house')]]\n",
"\n",
"\n",
"ridgewood,\n",
"[[(u'ridgewood', u'house')]]\n",
"\n",
"\n",
"ridgewood, \n",
"[[(u'ridgewood', u'house')]]\n",
"\n",
"\n",
"ridgewood, n\n",
"[[(u'ridgewood', u'suburb'), (u'north', u'road')], [(u'ridgewood', u'house'), (u'n', u'road')]]\n",
"\n",
"\n",
"ridgewood, ny\n",
"[[(u'ridgewood', u'suburb'), (u'new york', u'state')], [(u'ridgewood', u'suburb'), (u'ny', u'state')]]\n",
"\n",
"\n",
"--------------------------------------------------------------\n",
"\n",
"====================================================================\n",
"\n"
]
}
],
"source": [
"for files in glob.glob(\"acceptance-tests/test_cases/*.json\"):\n",
" # SKIP PLACE AND REVERSE GEO\n",
" filename = files[28:-5]\n",
" if filename != \"place\" and filename != \"admin_lookup\" and filename != \"reverse_coordinate_wrapping\":\n",
" t = json.load(open(files))\n",
" print(\"TEST SET: %s\"%(files[28:-5]))\n",
" for test in t['tests']:\n",
" test_text = test['in']['text']\n",
" print(\"TEST: %s\"%test_text)\n",
" print_autocomplete(test_text)\n",
" print('--------------------------------------------------------------\\n')\n",
" print('====================================================================\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment