Created
February 25, 2016 20:57
-
-
Save schwanksta/d86189e3ace312372113 to your computer and use it in GitHub Desktop.
parallelized COPY in postgres
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
import subprocess | |
from glob import glob | |
import os | |
fields = ["field1", "field2", "field3"] | |
def work(fname): | |
print "starting %s" % fname | |
fname = os.path.abspath(fname) | |
cmd2 = "\COPY flights(%s) FROM '%s' WITH DELIMITER ',' NULL AS '' CSV HEADER;" % (",".join(fields), fname) | |
retval = subprocess.call(['psql', '-U', 'postgres', '-d', 'takeoff', '-c', cmd2 ], shell=False) | |
print "ending %s" % fname | |
return retval | |
if __name__ == '__main__': | |
count = multiprocessing.cpu_count() | |
pool = multiprocessing.Pool(processes=count) | |
print pool.map(work, glob('flights/*.csv')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment