Skip to content

Instantly share code, notes, and snippets.

@0xa
Created August 24, 2017 15:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save 0xa/ebc8eeccd123aaa452027d050de26f33 to your computer and use it in GitHub Desktop.
Save 0xa/ebc8eeccd123aaa452027d050de26f33 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Get Rick & Morty episodes ("rick" as in "https://ctoon.party/rick"):
> scrapy runspider ctoonparty.py -o rick.json -a show=rick
(add -a season=2 to get only one season)
Extract links only:
> jq '.[].url' -r < rick.json > rick.txt
> wget -c -i rick.txt
"""
import scrapy
import re
def get_best_source(sources):
ordered = ['1080p', '720p']
for o in ordered:
for s in sources:
label = s.css('::attr(label)').extract_first()
href = s.css('::attr(src)').extract_first()
if label == o:
return (label, href)
class CToonSpider(scrapy.Spider):
name = 'ctoon'
def __init__(self, *args, show=None, season=None, **kwargs):
if not show or '/' in show:
raise Exception("use -a show=<name>")
self.show = show
self.season = int(season) if season else None
self.base = 'https://ctoon.party/%s' % self.show
self.start_urls = [self.base]
def parse(self, response):
if re.match(self.base + '/[0-9a-z]+/?', response.url):
# Episode page
best = get_best_source(response.css('video source'))
if not best:
raise Exception()
yield {'url': best[1], 'quality': best[0]}
else:
# Index
if self.season:
f = '#collapse%02d' % self.season
else:
f = ''
for next_page in response.css(f + ' .ep-entry a'):
yield response.follow(next_page, self.parse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment