Created
December 29, 2016 12:47
-
-
Save vibhanshuc/6e3ecc184aa3680eeef2abb8833dc5d3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scrapy import cmdline | |
from swishpick.pipelines import * | |
from datetime import datetime | |
import subprocess | |
import time | |
_week_day_relations = { | |
'Monday': 1, | |
'Tuesday': 2, | |
'Wednesday': 3, | |
'Thursday': 4, | |
'Friday': 5, | |
'Saturday': 6, | |
'Sunday': 7 | |
} | |
_today_day = datetime.now().strftime("%A") | |
_day_num = _week_day_relations.get(_today_day) | |
spiders = SpiderStatus.select().where(SpiderStatus.scheduled_start_time == _day_num) | |
Next = 0 | |
MaxProcesses = 5 | |
MaxUrls = spiders.count() | |
Processes = [] | |
def StartNew(): | |
""" Start a new subprocess if there is work to do """ | |
global Next | |
global Processes | |
if Next < MaxUrls: | |
spider = spiders[Next] | |
spider.spider_name | |
proc = subprocess.Popen(['scrapy', 'crawl', spider.spider_name, '-s LOG_FILE=/var/log/swishpick/{0}_{1}.log'.format(spider.spider_name, datetime.now().strftime("%Y-%m-%d.%H%M%S"))]) | |
Next += 1 | |
Processes.append(proc) | |
def CheckRunning(): | |
""" Check any running processes and start new ones if there are spare slots.""" | |
global Processes | |
global Next | |
for p in reversed(range(len(Processes))): # Check the processes in reverse order | |
if Processes[p].poll() is not None: # If the process hasn't finished will return None | |
del Processes[p] # Remove from list - this is why we needed reverse order | |
while (len(Processes) < MaxProcesses) and (Next < MaxUrls): # More to do and some spare slots | |
StartNew() | |
if __name__ == "__main__": | |
CheckRunning() # This will start the max processes running | |
while (len(Processes) > 0): # Some thing still going on. | |
time.sleep(0.1) # You may wish to change the time for this | |
CheckRunning() | |
print ("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment