Skip to content

Instantly share code, notes, and snippets.

@sc268
Last active July 5, 2018 16:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sc268/5fc624786057169457c5e956f694c32c to your computer and use it in GitHub Desktop.
Save sc268/5fc624786057169457c5e956f694c32c to your computer and use it in GitHub Desktop.
run foo.py with different date input in parallel
# run foo.py with different date input in parallel
# input: sys.argv[1]: starting date ; sys.argv[2]): num of days
import sys
import os
from multiprocessing import Process
import pandas as pd
children = []
def get_days_ranges(start, num):
''' generate a list containing num of days starting from the starting date'''
date_range = pd.date_range(start, periods=num, freq='1D')
date_range = map(lambda dt: dt.strftime("%Y-%m-%d"), date_range)
days_ranges = [date_range[i] for i in range(0, len(date_range)-1)]
return days_ranges
days = get_days_ranges(str(sys.argv[1]), int(sys.argv[2] ))
def f(x):
os.system("python foo.py --date %s" % x)
for x in days:
p = Process(target=f, args=(x,))
p.start()
children.append(p)
for x in children:
x.join()
# merge all temp results
filenames = glob.glob("*.csv")
merge = pd.read_csv(filenames.pop())
for filename in filenames:
merge = merge.append(pd.read_csv(filename))
del temp['Unnamed: 0']
merge.to_csv('bar.csv', index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment