Skip to content

Instantly share code, notes, and snippets.

@nst
Created August 17, 2012 03:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nst/3375805 to your computer and use it in GitHub Desktop.
Save nst/3375805 to your computer and use it in GitHub Desktop.
Twitter webscrapping
#!/usr/bin/env python
"""Twitter webscrapping, works on 2012-08-17
$ python twitter.py
@RonaldHayden | Firing up Uncharted 2 thanks to @chuckdude -- y'all have raised my expectations!
@CocoaSamurai | My list of Go features i'd love to see in Objective-C goo.gl/VB1Zk forgot to tweet this earlier
@slashdot | Dremel-Based Project Accepted As Apache Incubator bit.ly/N7np9P
"""
import re
import requests # pip install requests
from BeautifulSoup import BeautifulSoup # pip install beautifulsoup
def tweets_from_html(html):
soup = BeautifulSoup(html)#.prettify()
usernames = [s.renderContents()[15:].rstrip() for s in soup.findAll('span', attrs = {'class':'username'})]
texts = [''.join(s.findAll(text=True)) for s in soup.findAll('div', attrs = {'class':'tweet-text'})]
return [{'username':u, 'text':t} for (u, t) in zip(usernames, texts)]
def get_authentication_token():
r = requests.get("https://mobile.twitter.com/session/new")
m = re.findall(r'<input name="authenticity_token" type="hidden" value="(\S*)"', r.text)
return (m[0], r.cookies)
def get_login_cookies(username, password, authentication_token, cookies):
d = {'authenticity_token':authentication_token, 'username':username, 'password':password, 'commit':'Sign in'}
r = requests.post("https://mobile.twitter.com/session", data = d, cookies = cookies)
return r.cookies
def get_html_timeline(username, password):
(authentication_token, cookies) = get_authentication_token()
cookies = get_login_cookies(username, password, authentication_token, cookies = cookies)
r = requests.get('https://mobile.twitter.com/', cookies = cookies)
return r.text
def main():
html = get_html_timeline(username='', password='')
for t in tweets_from_html(html):
print "@%(username)-14s | %(text)s" % t
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment