Created
October 8, 2021 05:21
-
-
Save hexod0t/8c25ddcd4f439e95f2f832dd652ce9ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
*********** TED TALKS VIDEO DOWNLOADER V1.0 *********** | |
*********** Author: hexod0t | |
**************************************** | |
""" | |
import requests, re, sys | |
from bs4 import BeautifulSoup | |
def extract_filename(url): | |
filename = url.split('"')[0] | |
filename = filename.split("/")[len(filename.split("/")) - 1].split("?")[0] | |
return filename | |
def parse_url(response): | |
soup = BeautifulSoup(response.content, features="lxml") | |
for val in soup.find_all("script"): | |
if (re.search("talkPage.init", str(val))) is not None: | |
result = str(val) | |
mp4_url = re.search("(?P<url>https:?//[^\s]+)(mp4)", result).group("url") | |
return mp4_url | |
def read_file(file_name): | |
with open(file_name, "r") as file: | |
urls = [] | |
for line in file: | |
urls.append(line) | |
return urls | |
def download_video(url): | |
filename = extract_filename(url) | |
request_file = requests.get(url) | |
save_file(filename, request_file) | |
def save_file(filename, response): | |
with open(filename, "wb") as file: | |
file.write(response.content) | |
def main(file): | |
urls = read_file(file) | |
for url in urls: | |
response = requests.get(url) | |
mp4_url = parse_url(response) | |
download_video(mp4_url) | |
print("Download Process Complete") | |
if __name__ == "__main__": | |
try: | |
main(sys.argv[1]) | |
except IndexError: | |
raise SystemExit(f"Usage: {sys.argv[0]} Please specify a file") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment