Skip to content

Instantly share code, notes, and snippets.

@KokaKiwi
Forked from 0xa/ctoonparty.py
Created August 24, 2017 15:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KokaKiwi/bcb91c08f7d1a1252fc9ab4ea8bc1529 to your computer and use it in GitHub Desktop.
Save KokaKiwi/bcb91c08f7d1a1252fc9ab4ea8bc1529 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Get Rick & Morty episodes ("rick" as in "https://ctoon.party/rick"):
> scrapy runspider ctoonparty.py -o rick.json -a show=rick
(add -a season=2 to get only one season)
Extract links only:
> jq '.[].url' -r < rick.json > rick.txt
> wget -c -i rick.txt
Extract links with subtitles:
> jq -r '.[]|.url,.tracks.en//empty' mlp.json > mlp.txt
"""
import scrapy
import re
def get_best_source(sources):
ordered = ['1080p', '720p']
for o in ordered:
for s in sources:
label = s.css('::attr(label)').extract_first()
href = s.css('::attr(src)').extract_first()
if label == o:
return (label, href)
def get_tracks(tracks):
for track in tracks:
code = track.css('::attr(srclang)').extract_first()
href = track.css('::attr(src)').extract_first()
yield (code, href)
class CToonSpider(scrapy.Spider):
name = 'ctoon'
def __init__(self, *args, show=None, season=None, **kwargs):
if not show or '/' in show:
raise Exception("use -a show=<name>")
self.show = show
self.season = int(season) if season else None
self.base = 'https://ctoon.party/%s' % self.show
self.start_urls = [self.base]
def parse(self, response):
if re.match(self.base + '/[0-9a-z]+/?', response.url):
# Episode page
best = get_best_source(response.css('video source'))
if not best:
raise Exception()
tracks = dict(get_tracks(response.css('video track')))
yield {'url': best[1], 'quality': best[0], 'tracks': tracks}
else:
# Index
if self.season:
f = '#collapse%02d' % self.season
else:
f = ''
for next_page in response.css(f + ' .ep-entry a'):
yield response.follow(next_page, self.parse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment