Skip to content

Instantly share code, notes, and snippets.

@badri
Created June 10, 2009 14:33
Show Gist options
  • Save badri/127244 to your computer and use it in GitHub Desktop.
Save badri/127244 to your computer and use it in GitHub Desktop.
data = {}
c2t = re.compile('to Text Ratio\s+: ([\d\.]+) %\s+</td>')
page_size = re.compile('Page Size</span></td>\s+<td class="linkcrfill1" width="98%" style="padding:0px 7px 0px 7px;" >\s+: (\d+) Bytes')
code_size = re.compile('Code\s+Size </span></td>\s+<td class="linkcrfill1" style="padding:0px 7px 0px 7px;">\s+: (\d+) Bytes')
text_size = re.compile('Text\s+Size </span></td>\s+<td class="linkcrfill1" style="padding:0px 7px 0px 7px;" >\s+: (\d+) Bytes')
words = words.strip().split("\n")
for word in words:
word = re.sub(' ', '%2520', word)
#word = re.sub(r'\'', '%27', word)
uri = 'http://www.rankquest.com/tools/Text-Ratio.php?url=http%3A%2F%2Fdictionary.reference.com%2Fbrowse%2F' + word + '&Submit2=Check+Ratio'
content = (urllib2.urlopen(uri)).read()
t = {}
t['c2t'] = c2t.findall(content)[0]
t['pagesize'] = page_size.findall(content)[0]
t['codesize'] = code_size.findall(content)[0]
t['textsize'] = text_size.findall(content)[0]
data[word] = t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment