Skip to content

Instantly share code, notes, and snippets.

@bernardobarreto
Created August 29, 2012 19:21
Show Gist options
  • Save bernardobarreto/3517519 to your computer and use it in GitHub Desktop.
Save bernardobarreto/3517519 to your computer and use it in GitHub Desktop.
a crawler to help abed
from splinter import Browser
class Abed(object):
def save_name(self, name):
self.open_file.write(name.encode('utf-8') + '\n')
def prepare(self):
self.browser = Browser()
def kill_browser(self):
self.browser.quit()
def open(self):
self.open_file = open('nomes_%s.txt' % self.sex, 'w')
def close(self):
self.open_file.close()
def how_many_pages(self):
if self.sex == 'femininos':
return 129
return 178
def extract(self, sex='masculinos'):
self.sex = sex
self.open()
self.prepare()
url = 'http://www.dicionariodenomesproprios.com.br/nomes-%s/' % self.sex
pages_num = self.how_many_pages()
for i in range(1, pages_num):
self.browser.visit(url + str(i))
elems = self.browser.find_by_css('.box-list dt')
for elem in elems:
self.save_name(elem.text)
self.kill_browser()
self.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment