Yureien/animefrost_scraper.py

## animefrost_scraper.py
USAGE = """
Scraper for anime-frost.com.
Author: Soham Sen (FadedCoder) - http://sohamsen.me

Usage:
    python animefrost_scraper.py <link to anime>

It will output a txt file with the Google Drive URLs.

Example:
    python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/"
Outputs -
    A file called "sword-art-online.txt" which is like this -
    -----------------------------------------------------------------------------------
    #1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1
    https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview

    #2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2
    https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview
    ...
    -----------------------------------------------------------------------------------
"""


import sys
import requests
import re
from bs4 import BeautifulSoup


if len(sys.argv) != 2:
    print(USAGE)
    exit(0)

anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0]
soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser')
_ep_list = soup.find_all(attrs="episode-row")
ep_list = []
get_ep_num = re.compile("(\d+)")
for x in _ep_list:
    num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0])
    title = x.find(attrs={'class': "episode-title"}).text
    link = x.find("a").get("href")
    ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link})

full_list = []
get_vid = re.compile("/player\?url=(\S+)&\S+")
base_url = "https://anime-frost.com"
get_gdrive_url = base_url + "/getplayercontents.php?id="
for x in ep_list:
    soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser')
    player_src = soup.find("iframe").get("src")
    video_id = get_vid.findall(player_src)[0]
    gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={
        "referer": base_url + player_src}).url
    x.update({"gdrive_video_url": gdrive_video_url})
    print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title']))

with open(anime_name + ".txt", "w") as f:
    for x in ep_list:
        f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link']))
        f.write(x['gdrive_video_url'] + "\n\n")
    f.flush()
	USAGE = """
	Scraper for anime-frost.com.
	Author: Soham Sen (FadedCoder) - http://sohamsen.me

	Usage:
	python animefrost_scraper.py <link to anime>

	It will output a txt file with the Google Drive URLs.

	Example:
	python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/"
	Outputs -
	A file called "sword-art-online.txt" which is like this -
	-----------------------------------------------------------------------------------
	#1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1
	https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview

	#2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2
	https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview
	...
	-----------------------------------------------------------------------------------
	"""


	import sys
	import requests
	import re
	from bs4 import BeautifulSoup


	if len(sys.argv) != 2:
	print(USAGE)
	exit(0)

	anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0]
	soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser')
	_ep_list = soup.find_all(attrs="episode-row")
	ep_list = []
	get_ep_num = re.compile("(\d+)")
	for x in _ep_list:
	num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0])
	title = x.find(attrs={'class': "episode-title"}).text
	link = x.find("a").get("href")
	ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link})

	full_list = []
	get_vid = re.compile("/player\?url=(\S+)&\S+")
	base_url = "https://anime-frost.com"
	get_gdrive_url = base_url + "/getplayercontents.php?id="
	for x in ep_list:
	soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser')
	player_src = soup.find("iframe").get("src")
	video_id = get_vid.findall(player_src)[0]
	gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={
	"referer": base_url + player_src}).url
	x.update({"gdrive_video_url": gdrive_video_url})
	print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title']))

	with open(anime_name + ".txt", "w") as f:
	for x in ep_list:
	f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link']))
	f.write(x['gdrive_video_url'] + "\n\n")
	f.flush()