Last active
June 29, 2017 09:58
-
-
Save curita/5e8cdd0fba5f162fca1fabee8881bf74 to your computer and use it in GitHub Desktop.
Check unsuccessful dataloss retries in ScrapyCloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from hubstorage import HubstorageClient | |
hs = HubstorageClient('[REDACTED]') | |
project = hs.get_project('1887') | |
def examine_logs(job): | |
n_dataloss_requests = 0 | |
n_failed_dataloss_requests = 0 | |
crawlera_enabled = int(job.metadata['scrapystats'].get('crawlera/request', 0)) | |
for x in job.logs.iter_values(): | |
msg = x['message'] | |
if 'Retrying' in msg and 'failed 1 time' in msg and 'DataLoss' in msg: | |
n_dataloss_requests += 1 | |
elif 'Gave up retrying' in msg and 'DataLoss' in msg: | |
n_failed_dataloss_requests += 1 | |
print '%s dataloss requests gave out retrying out of %s in job %s with crawlera %s' % ( | |
n_failed_dataloss_requests, | |
n_dataloss_requests, | |
job.key, | |
'enabled' if crawlera_enabled else 'disabled' | |
) | |
for j in project.jobq.list(state='finished'): | |
if j.get('close_reason') != 'finished': | |
continue | |
job = hs.get_job(j['key']) | |
stats = job.metadata.get('scrapystats', {}) | |
errstat = 'downloader/exception_type_count/twisted.web._newclient.ResponseFailed' | |
if errstat in stats and int(stats.get(errstat)): | |
examine_logs(job) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0 dataloss requests gave out retrying out of 105 in job 1887/3248/246 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55357 with crawlera enabled | |
0 dataloss requests gave out retrying out of 13 in job 1887/861/7396 with crawlera enabled | |
0 dataloss requests gave out retrying out of 17 in job 1887/861/7399 with crawlera enabled | |
0 dataloss requests gave out retrying out of 10 in job 1887/861/7402 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/2451/1010 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/2451/1011 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55346 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55350 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55345 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55341 with crawlera enabled | |
0 dataloss requests gave out retrying out of 4 in job 1887/151/2996 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55343 with crawlera enabled | |
0 dataloss requests gave out retrying out of 4 in job 1887/151/2995 with crawlera enabled | |
1 dataloss requests gave out retrying out of 25 in job 1887/648/2890 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55351 with crawlera enabled | |
38 dataloss requests gave out retrying out of 338 in job 1887/2332/470 with crawlera enabled | |
1 dataloss requests gave out retrying out of 15 in job 1887/648/2887 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55361 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55344 with crawlera enabled | |
0 dataloss requests gave out retrying out of 31 in job 1887/648/2889 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55359 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/205/3084 with crawlera enabled | |
85 dataloss requests gave out retrying out of 1788 in job 1887/1092/1215 with crawlera enabled | |
90 dataloss requests gave out retrying out of 1755 in job 1887/1092/1214 with crawlera enabled | |
84 dataloss requests gave out retrying out of 1834 in job 1887/1092/1212 with crawlera enabled | |
98 dataloss requests gave out retrying out of 1733 in job 1887/1092/1213 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55358 with crawlera enabled | |
2 dataloss requests gave out retrying out of 0 in job 1887/696/970 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55347 with crawlera enabled | |
1 dataloss requests gave out retrying out of 312 in job 1887/1526/2591 with crawlera enabled | |
0 dataloss requests gave out retrying out of 3 in job 1887/151/2998 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55349 with crawlera enabled | |
0 dataloss requests gave out retrying out of 116 in job 1887/3152/154 with crawlera enabled | |
0 dataloss requests gave out retrying out of 4 in job 1887/205/3085 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/205/3083 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2451/1012 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55362 with crawlera enabled | |
0 dataloss requests gave out retrying out of 3 in job 1887/151/2999 with crawlera enabled | |
0 dataloss requests gave out retrying out of 7 in job 1887/151/2997 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2543/2632 with crawlera enabled | |
0 dataloss requests gave out retrying out of 12 in job 1887/102/4172 with crawlera enabled | |
0 dataloss requests gave out retrying out of 229 in job 1887/1526/2590 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/3230/66 with crawlera enabled | |
4 dataloss requests gave out retrying out of 518 in job 1887/3141/427 with crawlera enabled | |
0 dataloss requests gave out retrying out of 10 in job 1887/205/3081 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/696/974 with crawlera enabled | |
0 dataloss requests gave out retrying out of 24 in job 1887/102/4173 with crawlera enabled | |
1 dataloss requests gave out retrying out of 299 in job 1887/1526/2587 with crawlera enabled | |
0 dataloss requests gave out retrying out of 0 in job 1887/91/55356 with crawlera enabled | |
0 dataloss requests gave out retrying out of 8 in job 1887/2497/405 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2543/2631 with crawlera enabled | |
1 dataloss requests gave out retrying out of 287 in job 1887/1526/2582 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/2443/459 with crawlera enabled | |
0 dataloss requests gave out retrying out of 11 in job 1887/102/4170 with crawlera enabled | |
0 dataloss requests gave out retrying out of 14 in job 1887/102/4169 with crawlera enabled | |
29 dataloss requests gave out retrying out of 102 in job 1887/981/836 with crawlera disabled | |
0 dataloss requests gave out retrying out of 229 in job 1887/1526/2584 with crawlera enabled | |
30 dataloss requests gave out retrying out of 115 in job 1887/981/835 with crawlera disabled | |
0 dataloss requests gave out retrying out of 266 in job 1887/1526/2583 with crawlera enabled | |
0 dataloss requests gave out retrying out of 275 in job 1887/1526/2586 with crawlera enabled | |
0 dataloss requests gave out retrying out of 275 in job 1887/1526/2588 with crawlera enabled | |
0 dataloss requests gave out retrying out of 306 in job 1887/1526/2585 with crawlera enabled | |
2 dataloss requests gave out retrying out of 309 in job 1887/1526/2589 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/632/1015 with crawlera enabled | |
0 dataloss requests gave out retrying out of 6 in job 1887/870/1577 with crawlera enabled | |
0 dataloss requests gave out retrying out of 51 in job 1887/798/2242 with crawlera enabled | |
0 dataloss requests gave out retrying out of 8 in job 1887/2584/183 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2543/2625 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2543/2626 with crawlera enabled | |
3 dataloss requests gave out retrying out of 435 in job 1887/1081/850 with crawlera enabled | |
0 dataloss requests gave out retrying out of 31 in job 1887/92/1908 with crawlera enabled | |
0 dataloss requests gave out retrying out of 56 in job 1887/407/1080 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/5/4396 with crawlera disabled | |
2 dataloss requests gave out retrying out of 559 in job 1887/3141/428 with crawlera enabled | |
0 dataloss requests gave out retrying out of 29 in job 1887/92/1911 with crawlera enabled | |
1 dataloss requests gave out retrying out of 237 in job 1887/1526/2581 with crawlera enabled | |
2 dataloss requests gave out retrying out of 548 in job 1887/2293/700 with crawlera enabled | |
0 dataloss requests gave out retrying out of 33 in job 1887/92/1909 with crawlera enabled | |
0 dataloss requests gave out retrying out of 362 in job 1887/1081/849 with crawlera enabled | |
0 dataloss requests gave out retrying out of 32 in job 1887/92/1910 with crawlera enabled | |
0 dataloss requests gave out retrying out of 23 in job 1887/92/1906 with crawlera enabled | |
0 dataloss requests gave out retrying out of 33 in job 1887/92/1907 with crawlera enabled | |
0 dataloss requests gave out retrying out of 26 in job 1887/102/4171 with crawlera enabled | |
0 dataloss requests gave out retrying out of 30 in job 1887/92/1912 with crawlera enabled | |
0 dataloss requests gave out retrying out of 28 in job 1887/92/1913 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/105/2340 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/105/2338 with crawlera enabled | |
2 dataloss requests gave out retrying out of 522 in job 1887/2293/699 with crawlera enabled | |
0 dataloss requests gave out retrying out of 365 in job 1887/1081/848 with crawlera enabled | |
0 dataloss requests gave out retrying out of 50 in job 1887/3305/229 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/2543/2627 with crawlera enabled | |
0 dataloss requests gave out retrying out of 3 in job 1887/2543/2623 with crawlera enabled | |
2 dataloss requests gave out retrying out of 20 in job 1887/1504/807 with crawlera enabled | |
0 dataloss requests gave out retrying out of 35 in job 1887/226/1122 with crawlera enabled | |
0 dataloss requests gave out retrying out of 2 in job 1887/287/14310 with crawlera enabled | |
0 dataloss requests gave out retrying out of 4 in job 1887/287/14309 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/654/23 with crawlera enabled | |
2 dataloss requests gave out retrying out of 72 in job 1887/798/2241 with crawlera enabled | |
0 dataloss requests gave out retrying out of 1 in job 1887/796/173 with crawlera enabled |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment