Last active
July 5, 2018 16:27
-
-
Save sc268/5fc624786057169457c5e956f694c32c to your computer and use it in GitHub Desktop.
run foo.py with different date input in parallel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# run foo.py with different date input in parallel | |
# input: sys.argv[1]: starting date ; sys.argv[2]): num of days | |
import sys | |
import os | |
from multiprocessing import Process | |
import pandas as pd | |
children = [] | |
def get_days_ranges(start, num): | |
''' generate a list containing num of days starting from the starting date''' | |
date_range = pd.date_range(start, periods=num, freq='1D') | |
date_range = map(lambda dt: dt.strftime("%Y-%m-%d"), date_range) | |
days_ranges = [date_range[i] for i in range(0, len(date_range)-1)] | |
return days_ranges | |
days = get_days_ranges(str(sys.argv[1]), int(sys.argv[2] )) | |
def f(x): | |
os.system("python foo.py --date %s" % x) | |
for x in days: | |
p = Process(target=f, args=(x,)) | |
p.start() | |
children.append(p) | |
for x in children: | |
x.join() | |
# merge all temp results | |
filenames = glob.glob("*.csv") | |
merge = pd.read_csv(filenames.pop()) | |
for filename in filenames: | |
merge = merge.append(pd.read_csv(filename)) | |
del temp['Unnamed: 0'] | |
merge.to_csv('bar.csv', index = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment