Skip to content

Instantly share code, notes, and snippets.

@lanius
Created April 26, 2011 06:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lanius/941914 to your computer and use it in GitHub Desktop.
Save lanius/941914 to your computer and use it in GitHub Desktop.
Riak's "Loading Data and Running MapReduce Queries" for Python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# see http://wiki.basho.com/Loading-Data-and-Running-MapReduce-Queries.html
import sys
import riak
HOST = '127.0.0.1'
PORT = '8098'
headers = [u'Date', u'Open', u'High', u'Low', u'Close', u'Volume', u'Close']
def main(filename):
cl = riak.RiakClient(host=HOST, port=PORT)
bkt = cl.bucket('goog')
lines = read_file(filename)
for line in lines[1:]:
key, data = format_data(line)
bkt.new(key, data=data).store()
print('inserted: {0}'.format(key))
def read_file(filename):
lines = None
with open(filename, 'r') as f:
lines = [line for line in f]
return lines
def format_data(line):
data = line.split(',')
json = {}
for i, header in enumerate(headers):
json[header] = data[i].strip()
return json[u'Date'], json
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage: python load_data.py <filename>')
sys.exit(0)
filename = sys.argv[1]
main(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment