Skip to content

Instantly share code, notes, and snippets.

@bltavares
Created August 22, 2011 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bltavares/1163016 to your computer and use it in GitHub Desktop.
Save bltavares/1163016 to your computer and use it in GitHub Desktop.
Crawler de twitter para buscar todos os tweets (requested by @elvisoliveira)
import urllib
from xml.dom import minidom
class crawlXML:
def __init__(self, user):
self.user = user
print "Crawl setado para o usuario " + self.user
def getProfile(self):
return urllib.urlopen("http://api.twitter.com/1/users/show.xml?screen_name=%s" % self.user).read()
def getFollowersId(self):
#lista = []#a lista so e retornada depois de buscar todos os seguidores, criando um delay
cursor = -1
while cursor != 0:
get = urllib.urlopen("http://api.twitter.com/1/followers/ids.xml?screen_name=%s&cursor=%s" % (self.user, cursor)).read()
yield get
#lista.append(get)
get = minidom.parseString(get)#parsa o proprio valor
cursor = int(get.getElementsByTagName('next_cursor')[0].firstChild.data)#retorno do fsChild do primeiro encontrado
#return lista
def getAllTweets(self):
cursor = 1
running = True
while running:
get = urllib.urlopen("http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=%s&page=%s" % (self.user, cursor)).read()
yield get
get = minidom.parseString(get)#parsa o proprio valor
cursor = cursor + 1
running = len(get.getElementsByTagName('statuses')[0].childNodes()) > 1
crawler = crawlXML("elvisoliveira")
#print crawler.getProfile()
#for x in crawler.getFollowersId(): print x
for x in crawler.getAllTweets(): print x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment