Created
October 21, 2020 17:21
-
-
Save stnwtr/1c553765aed057f4ed613d43ff7ba9f3 to your computer and use it in GitHub Desktop.
Scrape some TED-talk subtitle and pretty print it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import system | |
from colorama import Fore | |
from requests import get | |
class Part: | |
def __init__(self, part): | |
parts = part.split("\n") | |
self.time = parts[0] | |
self.text = " ".join(parts[1:]) | |
def __str__(self): | |
return f"{Fore.GREEN}[{Fore.YELLOW}{self.time}{Fore.GREEN}]:{Fore.WHITE} {self.text}" | |
if __name__ == '__main__': | |
system("") | |
site = input("enter subtitle page: ") | |
raw_text = get(site).text | |
part_list = [Part(part) for part in raw_text.split("\n" * 2)[1:]] | |
for part in part_list: | |
print(part) | |
print(Fore.RESET, end="") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment