Skip to content

Instantly share code, notes, and snippets.

@schwanksta
Created February 25, 2016 20:57
Show Gist options
  • Save schwanksta/d86189e3ace312372113 to your computer and use it in GitHub Desktop.
Save schwanksta/d86189e3ace312372113 to your computer and use it in GitHub Desktop.
parallelized COPY in postgres
import multiprocessing
import subprocess
from glob import glob
import os
fields = ["field1", "field2", "field3"]
def work(fname):
print "starting %s" % fname
fname = os.path.abspath(fname)
cmd2 = "\COPY flights(%s) FROM '%s' WITH DELIMITER ',' NULL AS '' CSV HEADER;" % (",".join(fields), fname)
retval = subprocess.call(['psql', '-U', 'postgres', '-d', 'takeoff', '-c', cmd2 ], shell=False)
print "ending %s" % fname
return retval
if __name__ == '__main__':
count = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=count)
print pool.map(work, glob('flights/*.csv'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment