Skip to content

Instantly share code, notes, and snippets.

@masterbpro
Created December 31, 2020 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save masterbpro/0dde60d65cad0af0e8ac7fc540f6cea0 to your computer and use it in GitHub Desktop.
Save masterbpro/0dde60d65cad0af0e8ac7fc540f6cea0 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
from loguru import logger
PAGE_URL = "https://filmix.zone/filmy/page/{page_num}"
class FilmZone:
def __init__(self):
self.session = requests.Session()
def get_html(self, url):
res = self.session.get(url)
res.encoding = 'windows-1251'
if res.status_code == 200:
return res.text
else:
logger.warning(res.content)
def get_movie_info(self, movie_url):
res = self.get_html(movie_url)
soup = BeautifulSoup(res, 'lxml')
name = soup.find('h1', {'class': 'name'}).text
description = soup.find('div', {'class': 'full-story'}).text
country = soup.find('div', {'class': 'item contry'}).find('a').text
genre = soup.find('a', {'itemprop': 'genre'}).text
year = soup.find('a', {'itemprop': 'copyrightYear'}).text
video_url = soup.find('video') # Проблема тут
def get_movies_in_page(self, page_num=0):
res = self.get_html(PAGE_URL.format(page_num=page_num))
soup = BeautifulSoup(res, 'lxml')
urls = soup.find_all('a', {'itemprop': 'url'})
for x in urls:
self.get_movie_info(x['href'])
break # Для теста парсим один фильм
if __name__ == '__main__':
film = FilmZone()
film.get_movies_in_page()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment