Skip to content

Instantly share code, notes, and snippets.

@mhaligowski
Last active December 10, 2015 01:29
Show Gist options
  • Save mhaligowski/4358862 to your computer and use it in GitHub Desktop.
Save mhaligowski/4358862 to your computer and use it in GitHub Desktop.
MapReduce Example
#!/usr/bin/env python
#-*- coding: utf-8 -*-
from optparse import OptionParser
import os
import requests
DEFAULT_API_PREFIX = '/webhdfs/v1/'
if __name__ == "__main__":
parser = OptionParser()
parser.add_option("-a", "--address", dest="address", help="address of the namenode whith WebHTTPFS")
parser.add_option("-p", "--prefix", dest="prefix", help="api prefix", default=DEFAULT_API_PREFIX)
parser.add_option("-t", "--target", dest="target", help="target folder", default="")
parser.add_option("-u", "--user", dest="user", help="username", default="dr.who")
(options, args) = parser.parse_args()
if not options.address:
parser.error("Address is not given")
url = "".join(["http://", options.address, options.prefix, options.target, "%s?op=%s&user.name=%s&data=true"])
for sourcefile in args:
print sourcefile
r = requests.put(url % (os.path.split(sourcefile)[-1], "create", options.user),
data=open(sourcefile, 'rb').read(),
headers={'Content-Type': 'application/octet-stream'})
if r.text: print r.text
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import requests
from urlparse import urlparse
ADDRESS = "http://wolnelektury.pl/api/books/"
HREF_KEY = u'href'
TXT_KEY = u'txt'
if __name__ == "__main__":
all_books = map(lambda d: d[HREF_KEY],
requests.get(ADDRESS).json())
for book_url in all_books:
data = requests.get(book_url).json()
if TXT_KEY in data and data[TXT_KEY] != '':
filename = urlparse(data[TXT_KEY]).path.split('/')[-1]
b = requests.get(data[TXT_KEY])
f = open(filename, 'w')
f.write(b.content)
f.close()
print "%s fetched" % filename
#-*- coding: utf-8 -*-
#-*- coding: utf-8 -*-
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment