Raphael Hernandes rhhernandes

## fingerprint.py
import re
from unidecode import unidecode


def fingerprint(string):
    # change all characters to their lowercase representation
    string = string.lower()
    # remove all punctuation and control characters
    string = re.sub("[^A-Za-z0-9 ]+", "", string)
    # normalize extended western characters to their ASCII representation

## normalize_string.py
import unicodedata

def normalize_string(string):
    if isinstance(string, str):
        nfkd_form = unicodedata.normalize('NFKD', string.lower())
        return nfkd_form.encode('ASCII', 'ignore').decode('utf-8')

## upper.py
def _uppercase_for_dict_keys(lower_dict):
    upper_dict = {}
    for k, v in lower_dict.items():
        if isinstance(v, dict):
            v = _uppercase_for_dict_keys(v)
        upper_dict[k.upper()] = v
    return upper_dict

## gist:0ed98b2585f0ada5a769
import requests
from bs4 import BeautifulSoup

# We've now imported the two packages that will do the heavy lifting
# for us, reqeusts and BeautifulSoup

# Let's put the URL of the page we want to scrape in a variable
# so that our code down below can be a little cleaner
url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/'

## fingerprint.py
# -*- coding: utf-8 -*-

import re, string
from unidecode import unidecode

PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation))

class Fingerprinter(object):
    '''
    Python implementation of Google Refine fingerprinting algorithm described here:

## stopwords.txt
de
a
o
que
e
do
da
em
um
para
	import re
	from unidecode import unidecode


	def fingerprint(string):
	# change all characters to their lowercase representation
	string = string.lower()
	# remove all punctuation and control characters
	string = re.sub("[^A-Za-z0-9 ]+", "", string)
	# normalize extended western characters to their ASCII representation
	import unicodedata

	def normalize_string(string):
	if isinstance(string, str):
	nfkd_form = unicodedata.normalize('NFKD', string.lower())
	return nfkd_form.encode('ASCII', 'ignore').decode('utf-8')
	def _uppercase_for_dict_keys(lower_dict):
	upper_dict = {}
	for k, v in lower_dict.items():
	if isinstance(v, dict):
	v = _uppercase_for_dict_keys(v)
	upper_dict[k.upper()] = v
	return upper_dict
	import requests
	from bs4 import BeautifulSoup

	# We've now imported the two packages that will do the heavy lifting
	# for us, reqeusts and BeautifulSoup

	# Let's put the URL of the page we want to scrape in a variable
	# so that our code down below can be a little cleaner
	url_to_scrape = 'http://apps2.polkcountyiowa.gov/inmatesontheweb/'
	# -- coding: utf-8 --

	import re, string
	from unidecode import unidecode

	PUNCTUATION = re.compile('[%s]' % re.escape(string.punctuation))

	class Fingerprinter(object):
	'''
	Python implementation of Google Refine fingerprinting algorithm described here: