Skip to content

Instantly share code, notes, and snippets.

@hownowstephen
Created February 12, 2012 18:23
Show Gist options
  • Save hownowstephen/1810031 to your computer and use it in GitHub Desktop.
Save hownowstephen/1810031 to your computer and use it in GitHub Desktop.
Embedly Challenge
import requests,re,math
# Factorial function basic
def factorial(size,start=0):
'''Determines the min n! where sum(digits(n)) == 8001'''
result = 0
while result != size:
start += 1
result = sum([int(x) for x in str(math.factorial(start))])
return start
# Same factorial function, lambda-magic'd
f1 = lambda s,n=0: n if sum([int(x) for x in str(math.factorial(n))]) == s else f1(s,n+1)
# Text analysis standard deviation
def stddev(data,started=False,depth=0,tmp=[]):
'''Determines the standard deviation of <p> tags within <article> blocks on a page'''
for elem in re.findall('<.*?>',data):
if started: depth += -1 if '/' in elem else 1
if '<p' in elem: tmp.append(depth)
if 'article' in elem: started,depth = True,0
if '/article' in elem: started = False
return math.sqrt(float(sum([(float(x) - float(sum(tmp)) / len(tmp)) ** 2 for x in tmp])) / len(tmp))
# Practical application of zipf's law
def zipf(words,max_count):
'''Determines the number of unique words making up 50% of a document, given the most frequent appears max_count times'''
seed = [int(max_count/n) for n in range(1,words)]
length,count,mid = len(seed),0,sum(seed)/2
while count < mid:
count += seed.pop(0)
return length - (len(seed)-1)
# Same zipf function in two lines
zh = lambda d,c: len(d) if sum(d) < c else zh(d,c+d.pop(0))
z1 = lambda w,m: w - zh([int(m/n) for n in range(1,w)],0)
print "Factorial: %d,%d" % (factorial(8001),f1(8001))
print "Standard Dev: ",stddev(requests.get('http://apply.embed.ly/static/data/2.html').content)
print "Zipf: %d,%d" % (zipf(900,2520),z1(900,2520))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment