Skip to content

Instantly share code, notes, and snippets.

@Ziggoto
Created April 29, 2016 16:18
Show Gist options
  • Save Ziggoto/142162b10a962601ca8e09b8b0dbd7e7 to your computer and use it in GitHub Desktop.
Save Ziggoto/142162b10a962601ca8e09b8b0dbd7e7 to your computer and use it in GitHub Desktop.
#!//usr/bin/python
import re
import urllib2
import PyRSS2Gen
from bs4 import BeautifulSoup
url = 'https://one-piecex.com.br/'
user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0'
req = urllib2.Request(url, headers={'User-Agent' : user_agent})
con = urllib2.urlopen(req)
soup = BeautifulSoup(con.read())
mylist = soup.findAll("li", { "class" : "destaque" })
selected = [element.findAll("a")[0].attrs['href'] for element in mylist]
def format_title(link):
pattern = r".*\/(.+?)\/$"
match = re.search(pattern, link)
title = (" ".join(match.group(1).split("-"))).title()
return title
headlines = [PyRSS2Gen.RSSItem(link=link, title=format_title(link)) for link in selected]
rss = PyRSS2Gen.RSS2(title = "One Piece Hot News", link="localhost", description="After..",items=headlines)
rss.write_xml(open("one_piece.xml", "w"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment