Created
August 16, 2011 22:21
-
-
Save omarish/1150343 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import random | |
import string | |
import operator | |
from collections import defaultdict | |
class Client: | |
def __init__(self, body=None): | |
""" Initialize a client. An optional body can be passed as ``body``, | |
otherwise a random string with 100 letters will be created. """ | |
self.body = body | |
if not self.body: | |
self.body = ''.join(random.choice(string.ascii_uppercase) for x in range(100)) | |
self.counts = self.count_letters() | |
def count_letters(self): | |
""" Counts the number of times each letter occurs. """ | |
letters = defaultdict(int) | |
for x in self.body: | |
letters[x] += 1 | |
return dict(letters) | |
def find_max(self, n=1): | |
""" Find the ``n`` most occuring letters in the string. Currently does | |
not consider ties. """ | |
retval = [] | |
counts = self.count_letters() | |
for i in range(n): | |
m = max(counts.iteritems(), key=operator.itemgetter(1)) | |
retval.append(m) | |
del counts[m[0]] # Remove it from the list of counts. | |
return retval | |
def transmit(self, n=1): | |
""" Transmit a summary of the string to the server, consisting of | |
the N most commonly occuring letters, and a flat list of other letters | |
in the document. """ | |
top_letters = self.find_max(n=n) | |
""" Exclude the top letters from the `letters` segment of the | |
transmission so that they're not counted twice. """ | |
tl = map(lambda x: x[0], top_letters) | |
other_letters = filter(lambda x: x not in tl, self.counts.keys()) | |
return {'max': top_letters, 'letters': ''.join(self.counts.keys())} | |
class Server: | |
def __init__(self): | |
""" A server model to calculate letter bounds. """ | |
self.bounds = {} | |
def __add_letter(self, letter, lower, upper): | |
""" Adds a letter to the set of bounds. """ | |
if not letter in self.bounds: | |
self.bounds[letter] = [0, 0] | |
L, U = self.bounds[letter] | |
self.bounds[letter] = [L + lower, U + upper] | |
def listen(self, trans): | |
""" Accepts a client transmission and updates the bounds. """ | |
current_max = 0 | |
letters = trans['letters'] | |
for letter, count in trans['max']: | |
self.__add_letter(letter, count, count) | |
letters = letters.replace(letter, '') | |
current_max = count - 1 | |
for letter in letters: | |
self.__add_letter(letter, 0, current_max) | |
if __name__ == "__main__": | |
s = Server() | |
for i in range(100): # 100 clients will submit text. | |
c = Client() # with a random body. | |
s.listen(c.transmit(n=4)) | |
print s.bounds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment