bltavares/crawlXML.py

## crawlXML.py
import urllib
from xml.dom import minidom

class crawlXML:
	def __init__(self, user):
		self.user = user
		print "Crawl setado para o usuario " + self.user
	def getProfile(self):
		return urllib.urlopen("http://api.twitter.com/1/users/show.xml?screen_name=%s" % self.user).read()
	def getFollowersId(self):
		#lista = []#a lista so e retornada depois de buscar todos os seguidores, criando um delay
		cursor = -1
		while cursor != 0:
			get = urllib.urlopen("http://api.twitter.com/1/followers/ids.xml?screen_name=%s&cursor=%s" % (self.user, cursor)).read()
			yield get
			#lista.append(get)
			get = minidom.parseString(get)#parsa o proprio valor
			cursor = int(get.getElementsByTagName('next_cursor')[0].firstChild.data)#retorno do fsChild do primeiro encontrado
		#return lista

	def getAllTweets(self):
	    cursor = 1
	    running = True
	    while running:
	        get = urllib.urlopen("http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=%s&page=%s" % (self.user, cursor)).read()
	        yield get
	        get = minidom.parseString(get)#parsa o proprio valor
	        cursor = cursor + 1
	        running = len(get.getElementsByTagName('statuses')[0].childNodes()) > 1


crawler = crawlXML("elvisoliveira")
#print crawler.getProfile()
#for x in crawler.getFollowersId(): print x
for x in crawler.getAllTweets(): print x
	import urllib
	from xml.dom import minidom

	class crawlXML:
	def __init__(self, user):
	self.user = user
	print "Crawl setado para o usuario " + self.user
	def getProfile(self):
	return urllib.urlopen("http://api.twitter.com/1/users/show.xml?screen_name=%s" % self.user).read()
	def getFollowersId(self):
	#lista = []#a lista so e retornada depois de buscar todos os seguidores, criando um delay
	cursor = -1
	while cursor != 0:
	get = urllib.urlopen("http://api.twitter.com/1/followers/ids.xml?screen_name=%s&cursor=%s" % (self.user, cursor)).read()
	yield get
	#lista.append(get)
	get = minidom.parseString(get)#parsa o proprio valor
	cursor = int(get.getElementsByTagName('next_cursor')[0].firstChild.data)#retorno do fsChild do primeiro encontrado
	#return lista

	def getAllTweets(self):
	cursor = 1
	running = True
	while running:
	get = urllib.urlopen("http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=%s&page=%s" % (self.user, cursor)).read()
	yield get
	get = minidom.parseString(get)#parsa o proprio valor
	cursor = cursor + 1
	running = len(get.getElementsByTagName('statuses')[0].childNodes()) > 1


	crawler = crawlXML("elvisoliveira")
	#print crawler.getProfile()
	#for x in crawler.getFollowersId(): print x
	for x in crawler.getAllTweets(): print x