Skip to content

Instantly share code, notes, and snippets.

@Yureien
Created January 31, 2018 13:08
Show Gist options
  • Save Yureien/2341179395e06f6c8b27e19ae79fa56d to your computer and use it in GitHub Desktop.
Save Yureien/2341179395e06f6c8b27e19ae79fa56d to your computer and use it in GitHub Desktop.
Scraper for - https://anime-frost.com. Usage is in file.
USAGE = """
Scraper for anime-frost.com.
Author: Soham Sen (FadedCoder) - http://sohamsen.me
Usage:
python animefrost_scraper.py <link to anime>
It will output a txt file with the Google Drive URLs.
Example:
python animefrost_scraper.py "https://anime-frost.com/anime/sword-art-online/"
Outputs -
A file called "sword-art-online.txt" which is like this -
-----------------------------------------------------------------------------------
#1 - The World of Swords. Link: https://anime-frost.com/anime/sword-art-online/0/1
https://drive.google.com/file/d/0BwBKVy9cKcUcbW5xT0xQUHBQR2c/preview
#2 - Beater. Link: https://anime-frost.com/anime/sword-art-online/0/2
https://drive.google.com/file/d/0BwBKVy9cKcUcUE92UURCVWs0dkE/preview
...
-----------------------------------------------------------------------------------
"""
import sys
import requests
import re
from bs4 import BeautifulSoup
if len(sys.argv) != 2:
print(USAGE)
exit(0)
anime_name = re.findall("\S+anime-frost.com/anime/(\S+)/", sys.argv[1])[0]
soup = BeautifulSoup(requests.get(sys.argv[1]).text, 'html.parser')
_ep_list = soup.find_all(attrs="episode-row")
ep_list = []
get_ep_num = re.compile("(\d+)")
for x in _ep_list:
num = int(get_ep_num.findall(x.find(attrs={'class': "episode-number"}).text)[0])
title = x.find(attrs={'class': "episode-title"}).text
link = x.find("a").get("href")
ep_list.append({"ep_num": num, "ep_title": title, "ep_link": link})
full_list = []
get_vid = re.compile("/player\?url=(\S+)&\S+")
base_url = "https://anime-frost.com"
get_gdrive_url = base_url + "/getplayercontents.php?id="
for x in ep_list:
soup = BeautifulSoup(requests.get(x["ep_link"]).text, 'html.parser')
player_src = soup.find("iframe").get("src")
video_id = get_vid.findall(player_src)[0]
gdrive_video_url = requests.get(get_gdrive_url + video_id, headers={
"referer": base_url + player_src}).url
x.update({"gdrive_video_url": gdrive_video_url})
print("Got episode #{0} - {1}".format(x['ep_num'], x['ep_title']))
with open(anime_name + ".txt", "w") as f:
for x in ep_list:
f.write("#{0} - {1}. Link: {2}\n".format(x['ep_num'], x['ep_title'], x['ep_link']))
f.write(x['gdrive_video_url'] + "\n\n")
f.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment