Skip to content

Instantly share code, notes, and snippets.

@dr1nk0rdi3
Created May 23, 2017 20:48
Show Gist options
  • Save dr1nk0rdi3/73b1d587a8e451337d459e6165c1f7bb to your computer and use it in GitHub Desktop.
Save dr1nk0rdi3/73b1d587a8e451337d459e6165c1f7bb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests, sys, re
from bs4 import BeautifulSoup as bs
url = "https://www.cinemark.com.br/sao-paulo/filmes/em-cartaz?pagina={}"
prefix = "www.cinemark.com.br"
i = 1
while True:
p = requests.get(url.format(i))
i += 1
s = bs(p.content, "lxml")
for filme in s.findAll('article', attrs={'class':'movie'}):
print(filme.find('a')['title'][6:] +' '+ prefix + filme.find('a')['href'])
if (len(s.findAll(text=re.compile('Não foi encontrado*'))) > 0): break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment