Skip to content

Instantly share code, notes, and snippets.

@flavioamieiro
Created September 21, 2012 05:20
Show Gist options
  • Save flavioamieiro/3759852 to your computer and use it in GitHub Desktop.
Save flavioamieiro/3759852 to your computer and use it in GitHub Desktop.
Pega nome do filme, do diretor, e a sinopse de uma página do imdb.
#!/usr/bin/python3
#-*- coding: utf-8 -*-
# Pega nome do filme, do diretor, e a sinopse de uma página do imdb.
# Não tem tratamento de erros decente, e provavelmente não vai ter.
import sys
from bs4 import BeautifulSoup
import urllib.request
def download_description(url):
content = urllib.request.urlopen(url).read()
soup = BeautifulSoup(content)
soup.h1.span.clear()
movie_title = next(soup.h1.stripped_strings)
director_name = soup.find(itemprop='director') and \
soup.find(itemprop='director').text or ''
try:
summary_text = next(soup.find(itemprop='description').stripped_strings)
except StopIteration:
summary_text = ''
return movie_title, director_name, url, summary_text
def print_description(title, director, url, summary):
template = """
{title} | {director}
{url}
{summary:30}
"""
sys.stdout.write(template.format(**locals()))
def print_html_description(title, director, url, summary):
template = """
<h4>{title} | {director}</h4>
<a href="{url}">{url}</a>
<p>{summary}</p>
"""
sys.stdout.write(template.format(**locals()))
# Eu sei, eu sei. Se não passar parâmetro nenhum ele fica esperando na stdin.
# Feio.
urls = sys.argv[1:]
if not urls:
urls = map(lambda x: x.strip(), sys.stdin.readlines())
for url in urls:
# Isso é mais feio ainda. Não é pra isso que serve a stderr
sys.stderr.write('{}\n'.format(url))
print_html_description(*download_description(url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment