Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Twitter Stream Downloader
consumer_key = 'your-consumer-key'
consumer_secret = 'your-consumer-secret'
access_token = 'your-access-token'
access_secret = 'your-access-secret'
# To run this code, first edit config.py with your configuration, then:
#
# mkdir data
# python twitter_stream_download.py -q apple -d data
#
# It will produce the list of tweets for the query "apple"
# in the file data/stream_apple.json
import tweepy
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import time
import argparse
import string
import config
import json
def get_parser():
"""Get parser for command line arguments."""
parser = argparse.ArgumentParser(description="Twitter Downloader")
parser.add_argument("-q",
"--query",
dest="query",
help="Query/Filter",
default='-')
parser.add_argument("-d",
"--data-dir",
dest="data_dir",
help="Output/Data Directory")
return parser
class MyListener(StreamListener):
"""Custom StreamListener for streaming data."""
def __init__(self, data_dir, query):
query_fname = format_filename(query)
self.outfile = "%s/stream_%s.json" % (data_dir, query_fname)
def on_data(self, data):
try:
with open(self.outfile, 'a') as f:
f.write(data)
print(data)
return True
except BaseException as e:
print("Error on_data: %s" % str(e))
time.sleep(5)
return True
def on_error(self, status):
print(status)
return True
def format_filename(fname):
"""Convert file name into a safe string.
Arguments:
fname -- the file name to convert
Return:
String -- converted file name
"""
return ''.join(convert_valid(one_char) for one_char in fname)
def convert_valid(one_char):
"""Convert a character into '_' if invalid.
Arguments:
one_char -- the char to convert
Return:
Character -- converted char
"""
valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
if one_char in valid_chars:
return one_char
else:
return '_'
@classmethod
def parse(cls, api, raw):
status = cls.first_parse(api, raw)
setattr(status, 'json', json.dumps(raw))
return status
if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
auth = OAuthHandler(config.consumer_key, config.consumer_secret)
auth.set_access_token(config.access_token, config.access_secret)
api = tweepy.API(auth)
twitter_stream = Stream(auth, MyListener(args.data_dir, args.query))
twitter_stream.filter(track=[args.query])
@Sultarazi

This comment has been minimized.

Copy link

commented Sep 14, 2015

keeps giving me 401 in the output?

@Neon22

This comment has been minimized.

Copy link

commented Sep 26, 2015

Has to be called with a query. see here for filter example.
Confusing :(
http://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/

Actually 401 errors are (apparently) generally about your credentials...

@mcasspj

This comment has been minimized.

Copy link

commented Oct 18, 2015

I'm getting a type conversion error as I do with the example on the blog, any thoughts output below:

C:\work\python\twitter>python streaming2.py -q apple -d data
Traceback (most recent call last):
File "streaming2.py", line 94, in
twitter_stream.filter(track=[args.query])
File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 430, in filter

self._start(async)

File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 346, in _start

self._run()

File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 286, in _run
raise exception
File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 255, in _run
self._read_loop(resp)
File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 298, in read
loop
line = buf.read_line().strip()
File "C:\Anaconda3\lib\site-packages\tweepy\streaming.py", line 171, in read_l
ine
self._buffer += self._stream.read(self._chunk_size)
TypeError: Can't convert 'bytes' object to str implicitly

Below is what I'm using
C:\work\python\twitter>python --version
Python 3.4.3 :: Anaconda 2.3.0 (64-bit)

On the example from the blog looking at my own timeline I was getting encoding issues so I used
print(status.text.encode("utf-8"))
as I'm looking at tweets which are from Central Asia

@mcasspj

This comment has been minimized.

Copy link

commented Oct 18, 2015

Just tried with
C:\work\python\twitter>c:\anaconda\python --version
Python 2.7.10 :: Anaconda 2.3.0 (64-bit)
And works fine!

@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented Oct 21, 2015

@mcasspj there's currently still an issue with Tweepy 3.4 / Python 3, so if you prefer to stay on Python 3 you can also downgrade to Tweepy 3.3 (the version I used when I first wrote this)

@pwcahyo

This comment has been minimized.

Copy link

commented Oct 29, 2015

i want 2 words in my search, for example "dengue fever" any ideas ?

@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented Nov 9, 2015

@pwcahyo you can pass the two words to the track argument, e.g.

This is equivalent to "dengue OR fever":
twitter_stream.filter(track=["dengue", "fever"])

This is equivalent to "dengue AND fever":
twitter_stream.filter(track=["dengue fever"])

Exact phrase matching is not supported by the streaming API, it is instead supported by search

@datomnurdin

This comment has been minimized.

Copy link

commented Nov 16, 2015

I got this error message, why?

Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented Nov 19, 2015

@datomnurdin did you pass the arguments for the data dir and the query?

@Rasha-Mosaed

This comment has been minimized.

Copy link

commented Feb 1, 2016

Dear bonzanini,

I try your script, but I have some questions.

1] When this code stop, or how can I stop it?

2] This code harvest a lot of information. How can I specify entities like " content of tweet, who retweet it (ids), time, Id of author's tweet, Ids of author's followers and Ids of author's following".

I thankful you for this code and help us.

Best Regards,

@HoweAC

This comment has been minimized.

Copy link

commented Feb 19, 2016

I'm also getting the same error as @datomnurdin:

Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'

@luto65

This comment has been minimized.

Copy link

commented Mar 6, 2016

On windows, with Python 3.5 and tweepy 3.5.0 it works perfectly well ! Bravo !

@yihuangwilliam

This comment has been minimized.

Copy link

commented Mar 23, 2016

Does anyone can help me? Why i have the error
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'

@yihuangwilliam

This comment has been minimized.

Copy link

commented Mar 23, 2016

@datomnurdin hi, may i ask how to tackle the issue {Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'},thank you so much

@yihuangwilliam

This comment has been minimized.

Copy link

commented Mar 23, 2016

@bonzanini hi my friend, thank you for your sharing. I also have the problem Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json', may i ask why it happened and how to tackle it?

@yihuangwilliam

This comment has been minimized.

Copy link

commented Mar 23, 2016

@bonzanini I have seen your previous reply, unfortunately, I'm sorry I'm a rookie in programming, so I can't fully understand your code. May i ask how to pass arguments to data_dir and query? Thank you so much!

@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented Mar 31, 2016

@yihuangwilliam the get_parser() function defines the arguments that you can pass via command line:
python twitter_stream_download.py --query --data-dir
(the shorter -q and -d are also valid)

@parthgdp

This comment has been minimized.

Copy link

commented Apr 24, 2016

Can anyone give me code for convert_file and format_file ??
What should I change over there and how?
Please explain with example.

@cosmozhang

This comment has been minimized.

Copy link

commented May 24, 2016

What is the @classmethod used for? And it is not written inside a class..

@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented May 27, 2016

@cosmozhang it's a monkey patch from previous experiments that can be safely ignored, in fact it should be removed

@mg3146

This comment has been minimized.

Copy link

commented Jun 5, 2016

@bonzanini. This is a great piece of code, and very helpful to learn from. Thanks a lot.

Quick question - has there been any updates to the API that allow for tracking multiple words? Ie, "game tomorrow", vs "game" and "tomorrow" (which would result in a ton more data and postprocessing...)

@bonzanini

This comment has been minimized.

Copy link
Owner Author

commented Jun 8, 2016

@markgillis0 unfortunately exact phrase matching is not supported by the twitter streaming API yet: https://dev.twitter.com/streaming/overview/request-parameters#track
on the other side, it is supported by the search API

@shannonwho

This comment has been minimized.

Copy link

commented Jul 1, 2016

Hi! Thank you very much for sharing.

The code works fine when I input the query for apple, but no other keyword can be input in. Do you happen to know why is that?

Any suggestions will be really helpful!

@ajax-jones

This comment has been minimized.

Copy link

commented Jul 3, 2016

I find that the 401 is what you get before you set up your config.py with the twitter app credentials.I get the none error if the -d is not specified. So I create a sub-dir and use that and it works fine then
sudo mkdir mydir
sudo python tweet.py -q apple -d mydir

@Parth-Vader

This comment has been minimized.

Copy link

commented Jul 8, 2016

If I want to store just the "text" portion , how can I do it?

@kmrsatish17

This comment has been minimized.

Copy link

commented Oct 11, 2016

I'm getting this error. Please help!!
Error on_data: [Errno 2] No such file or directory: 'data/stream_apple.json'

@kjoth

This comment has been minimized.

Copy link

commented Dec 4, 2016

How do I get to list of my followers?

for friends in tweepy.Cursor(api.followers).items():
fw.write('Friends: ' + str(follower_ids) + "\n")

follower_ids is not found

@rsathishr

This comment has been minimized.

Copy link

commented Feb 15, 2017

Am getting an error!! pls help me out

Failed on data: %s '_io.TextIOWrapper' object has no attribute 'Write'
ERROR: execution aborted

@GabrielYe

This comment has been minimized.

Copy link

commented Mar 14, 2017

@bonzanini Thanks for your code. How can I get all of the tweets of a specific user ? For example, I wanna get tweets of Kobe.
Thank you.

@yuchenQ

This comment has been minimized.

Copy link

commented Mar 19, 2017

@bonzanini Hi thanks for you great example, may I ask what use of
@classmethod
def parse(cls, api, raw):

thks

@baoyanpeng

This comment has been minimized.

Copy link

commented Apr 19, 2017

Thanks a lot,and i have a question. Whether can i obtain the data about some keywords before today?

@Dixith-Reddy-Nayeni

This comment has been minimized.

Copy link

commented Jul 16, 2017

Thank u very much...it worked for me..:)

@vibhuti1990

This comment has been minimized.

Copy link

commented Aug 30, 2017

Hi Could you please help me with the below error.

Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'

@zeanong

This comment has been minimized.

Copy link

commented Sep 12, 2017

Works well. Thank you!

@L-Kov

This comment has been minimized.

Copy link

commented Sep 22, 2017

I get the error:
line 96, in
auth = OAuthHandler(config.consumer_key, config.consumer_secret)
AttributeError: 'module' object has no attribute 'consumer_key'

what config module do you use?

@Kanishk-Anand

This comment has been minimized.

Copy link

commented Oct 31, 2017

I keep getting 401 as output. I have set up the config.py file with my credentials, still it gives 401. Any help?

@abubakarsaddique

This comment has been minimized.

Copy link

commented Jan 3, 2018

How to limit the tweets?

@salsaeede

This comment has been minimized.

Copy link

commented Jan 5, 2018

i keep getting the below , can someone help me to successfully import config

import config
Traceback (most recent call last):

File "C:\Users\salman\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)

File "", line 1, in
import config

File "C:\Users\salman\Anaconda3\lib\site-packages\config.py", line 733
except Exception, e:
^
SyntaxError: invalid syntax

@SjorsG

This comment has been minimized.

Copy link

commented Jan 15, 2018

I'm so sorry bothering you after you already have written this beautiful piece of code.
I think the answer lies in the comment section in your code, but it seems like i just can't get it right.
How do i edit config.py with your configuration, then:
mkdir data
python twitter_stream_download.py -q apple -d data

I get this error:

line 96, in
auth = OAuthHandler(config.consumer_key, config.consumer_secret)
AttributeError: 'module' object has no attribute 'consumer_key'

Thank you for your time

@ericdorsey

This comment has been minimized.

Copy link

commented Mar 15, 2018

@SjorsG
Are you sure you have a file called "config.py" in the same folder, that has a variable in it that's called "consumer_key", that has your key assigned to it?

consumer_key = 'YOURCONSUMERKEYHERE'

@pbajpai2

This comment has been minimized.

Copy link

commented Jul 10, 2018

I'm using Python 3.7.0 and downloaded Tweepy 3.6.0

And after running config.py (which ends successfully) and doing the mkdir data step. I get the following error when running the twitter_stream_download.py

**C:\Users\pbajp\Git\datasci_course_materials\assignment1\alternate>python twitter_stream_download.py -q apple -d data
Traceback (most recent call last):
File "twitter_stream_download.py", line 9, in
import tweepy
File "C:\Users\pbajp\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy_init
.py", line 17, in
from tweepy.streaming import Stream, StreamListener
File "C:\Users\pbajp\AppData\Local\Programs\Python\Python37\lib\site-packages\tweepy\streaming.py", line 358
def start(self, async):
^
SyntaxError: invalid syntax**

Can anyone guide me on next steps to debug?

@agcala

This comment has been minimized.

Copy link

commented Jul 12, 2018

@rsathishr
It is "write" not "Write"

@Germain94

This comment has been minimized.

Copy link

commented Aug 8, 2018

Hello everyone.
First of all, thank you for your work @bonzanini !
I'm trying to search for tweets from two weeks ago until now. Can I transform your code to do that ?

@AreRex14

This comment has been minimized.

Copy link

commented Sep 25, 2018

Work fine. Thank you for your work @bonzanini

@arnabghose997

This comment has been minimized.

Copy link

commented Jan 3, 2019

For those who are facing the following error:

Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'

You have to create a folder named "data" in the same directory, for the code to work. Hope this helps.

@Carpintonto

This comment has been minimized.

Copy link

commented Jan 8, 2019

maybe I am totally missing something, but it sure seems to me that the script is totally functional without import json or the @classmethod

@Benasir1

This comment has been minimized.

Copy link

commented Jan 31, 2019

For those who are facing the following error:

Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'

You have to create a folder named "data" in the same directory, for the code to work. Hope this helps.

@arnabghose997. I still face the same problem after creating folder 'data' in the same directory

@PranjalShekhawat

This comment has been minimized.

Copy link

commented Jun 13, 2019

Any idea how to resolve this error please

runfile('C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py', wdir='C:/Users/chhaj/OneDrive/Desktop')
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Error on_data: [Errno 2] No such file or directory: 'None/stream_-.json'
Traceback (most recent call last):

File "", line 1, in
runfile('C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py', wdir='C:/Users/chhaj/OneDrive/Desktop')

File "C:\Users\chhaj\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)

File "C:\Users\chhaj\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)

File "C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py", line 95, in
twitter_stream.filter(track=[args.query])

File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 453, in filter
self._start(is_async)

File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 368, in _start
self._run()

File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 269, in _run
self._read_loop(resp)

File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 331, in _read_loop
self._data(next_status_obj)

File "C:\Users\chhaj\Anaconda3\lib\site-packages\tweepy\streaming.py", line 303, in _data
if self.listener.on_data(data) is False:

File "C:/Users/chhaj/OneDrive/Desktop/test4 tweet search.py", line 50, in on_data
time.sleep(5)

KeyboardInterrupt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.