Created
November 29, 2015 14:59
-
-
Save feliperazeek/46e96dda66aa4fb7ccf6 to your computer and use it in GitHub Desktop.
Python script that converts CSV to list of Redis set commands to be mass inserted
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import sys | |
import getopt | |
import fileinput | |
import csv | |
import json | |
import bcrypt | |
def main(argv): | |
inputfile = "" | |
outputfile = "" | |
keyfield = "" | |
try: | |
opts, args = getopt.getopt(argv,"i:o:f:",["ifile=","ofile=","keyfield="]) | |
except getopt.GetoptError: | |
print 'csv-to-redis.py -i <inputfile> -o <outputfile> -f <fieldname-for-key>' | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
usage() | |
sys.exit() | |
elif opt in ("-i", "--ifile"): | |
inputfile = arg | |
elif opt in ("-o", "--ofile"): | |
outputfile = arg | |
elif opt in ("-f", "--keyfield"): | |
keyfield = arg | |
if not inputfile: | |
print "ERROR: -i is missing" | |
usage() | |
sys.exit(2) | |
if not outputfile: | |
print "ERROR: -o is missing" | |
usage() | |
sys.exit(2) | |
if not keyfield: | |
print "ERROR: -f is missing" | |
usage() | |
sys.exit(2) | |
convert_file(inputfile, outputfile, keyfield) | |
def usage(): | |
print 'csv-to-redis.py -i <inputfile> -o <outputfile> -f <fieldname-for-key>' | |
headers = None | |
def convert_file(input, output, field): | |
count = 0 | |
for line in fileinput.input(input): | |
count = count + 1 | |
values = parse(line) | |
if count == 1: | |
global headers | |
headers = values | |
else: | |
(key, json_value) = to_json(field, values) | |
to_redis_command(key, json_value) | |
def to_redis_command(key, value): | |
print 'SET %s "%s"\n' % (safe_key(key), value.replace('"', '\\"')) | |
def safe_key(value): | |
return bcrypt.hashpw(value, bcrypt.gensalt()) | |
def to_json(field, values): | |
d = {} | |
for i in range(1, len(headers)): | |
d[headers[i]] = values[i] | |
return (d[field], json.dumps(d, separators=(',', ':'))) | |
def parse(line): | |
reader = csv.reader([line], skipinitialspace=True, delimiter='|') | |
for r in reader: | |
return r | |
return None | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment