Skip to content

Instantly share code, notes, and snippets.

@bsmt
Last active December 20, 2015 01:49
Show Gist options
  • Save bsmt/6052284 to your computer and use it in GitHub Desktop.
Save bsmt/6052284 to your computer and use it in GitHub Desktop.
Demonstrate how to scrape ciphertext from http://homebrew.herokuapp.com/
# python blah.py my plaintext here
# F3C3ECD09B3238CB8060C2AF55BFC31451FF8C8DA0B6A0FC876FFA54D6D72B9BE3CAD927F8
# http://homebrew.herokuapp.com/?plaintext=my+plaintext+here
import requests
from HTMLParser import HTMLParser
from urllib import quote
import sys
CIPHER_URL = "http://homebrew.herokuapp.com/?plaintext={}"
def main(args):
plaintext = " ".join(args[1:])
request_url = CIPHER_URL.format(quote(plaintext))
r = requests.get(request_url)
if r.status_code == 200:
# all good
parser = CipherTextParser()
parser.get_ciphertext_data(r.text)
print(parser.ret) # <---- ciphertext
else:
print("failure")
class CipherTextParser(HTMLParser):
'''Parses the HTML from the website and returns the ciphertext.
Most of the useful code was stolen from
http://stackoverflow.com/questions/7204056/python-htmlparser
'''
def get_ciphertext_data(self, html, element="textarea", attr="disabled"):
self.tags = []
self.element = element
self.attribute = attr
self.ret = "" # ciphertext will be placed here
self.feed(html)
def handle_starttag(self, tag, attrs):
self.tags.append((tag.lower(), attrs))
def handle_endtag(self, tag):
self.tags.pop()
def handle_data(self, data):
element = self.tags[-1][0] # so bad
attributes = self.tags[-1][1]
if element == self.element and (u'disabled', None) in attributes:
self.ret = data
if __name__ == "__main__":
main(sys.argv) # " ".join(sys.argv[1:]) will be the plaintext
@bsmt
Copy link
Author

bsmt commented Jul 25, 2013

Or you can do that :P

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment