Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save imgVOID/05b3607fc7fa0e44c7466934bea46988 to your computer and use it in GitHub Desktop.
Save imgVOID/05b3607fc7fa0e44c7466934bea46988 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import requests
root = 'https://subslikescript.com'
website = f'{root}/movies'
result = requests.get(website)
content = result.text
soup = BeautifulSoup(content, 'lxml')
box = soup.find('article', class_='main-article')
links = [link['href'] for link in box.find_all('a', href=True)]
# print(links) # ссылки без корней веб-сайта
for link in links:
result = requests.get(f'{root}/{link}')
content = result.text
soup = BeautifulSoup(content, 'lxml')
box = soup.find('article', class_='main-article')
title = box.find('h1').get_text()
transcript = box.find('div', class_='full-script').get_text(strip=True, separator=' ')
with open(f'{title}.txt', 'w') as file:
file.write(transcript)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment