Skip to content

Instantly share code, notes, and snippets.

@davidpgero
Created April 25, 2011 20:43
ja_paragrafusokEloszor
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
class Tisztitas(object):
def __init__(self, url):
self.url = url
def megnyit(self):
return urlopen(self.url)
def beolvasKodolva(self):
i = self.megnyit().read()
return BeautifulSoup(i.decode('latin-1'))
def cimekListaja(self):
i = self.beolvasKodolva()
return i.findAll('a', attrs={'name' : True})
def cimekEsSzoveg(self):
cimek = self.cimekListaja()
for i in self.beolvasKodolva().findAll('p'):
if i.find('a'):
print 'cim', i.text
else:
print 'szoveg', i.text
if __name__ == '__main__':
a = Tisztitas('http://mek.niif.hu/00700/00707/html/vs192601.htm')
a.cimekEsSzoveg()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment