#!/usr/bin/env python | |
""" | |
Twitter's API doesn't allow you to get replies to a particular tweet. Strange | |
but true. But you can use Twitter's Search API to search for tweets that are | |
directed at a particular user, and then search through the results to see if | |
any are replies to a given tweet. You probably are also interested in the | |
replies to any replies as well, so the process is recursive. The big caveat | |
here is that the search API only returns results for the last 7 days. So | |
you'll want to run this sooner rather than later. | |
replies.py will read a line oriented JSON file of tweets and look for replies | |
using the above heuristic. Any replies that are discovered will be written as | |
line oriented JSON to stdout: | |
./replies.py tweets.json > replies.json | |
It also writes a log to replies.log if you are curious what it is doing...which | |
can be handy since it will sleep for periods of time to work within the | |
Twitter API quotas. | |
PS. you'll need to: | |
pip install python-twitter | |
and then set the following environment variables for it to work: | |
- CONSUMER_KEY | |
- CONSUMER_SECRET | |
- ACCESS_TOKEN | |
- ACCESS_TOKEN_SECRET | |
""" | |
import sys | |
import json | |
import time | |
import logging | |
import twitter | |
import urllib.parse | |
from os import environ as e | |
t = twitter.Api( | |
consumer_key=e["CONSUMER_KEY"], | |
consumer_secret=e["CONSUMER_SECRET"], | |
access_token_key=e["ACCESS_TOKEN"], | |
access_token_secret=e["ACCESS_TOKEN_SECRET"], | |
sleep_on_rate_limit=True | |
) | |
def tweet_url(t): | |
return "https://twitter.com/%s/status/%s" % (t.user.screen_name, t.id) | |
def get_tweets(filename): | |
for line in open(filename): | |
yield twitter.Status.NewFromJsonDict(json.loads(line)) | |
def get_replies(tweet): | |
user = tweet.user.screen_name | |
tweet_id = tweet.id | |
max_id = None | |
logging.info("looking for replies to: %s" % tweet_url(tweet)) | |
while True: | |
q = urllib.parse.urlencode({"q": "to:%s" % user}) | |
try: | |
replies = t.GetSearch(raw_query=q, since_id=tweet_id, max_id=max_id, count=100) | |
except twitter.error.TwitterError as e: | |
logging.error("caught twitter api error: %s", e) | |
time.sleep(60) | |
continue | |
for reply in replies: | |
logging.info("examining: %s" % tweet_url(reply)) | |
if reply.in_reply_to_status_id == tweet_id: | |
logging.info("found reply: %s" % tweet_url(reply)) | |
yield reply | |
# recursive magic to also get the replies to this reply | |
for reply_to_reply in get_replies(reply): | |
yield reply_to_reply | |
max_id = reply.id | |
if len(replies) != 100: | |
break | |
if __name__ == "__main__": | |
logging.basicConfig(filename="replies.log", level=logging.INFO) | |
tweets_file = sys.argv[1] | |
for tweet in get_tweets(tweets_file): | |
for reply in get_replies(tweet): | |
print(reply.AsJsonString()) |
This comment has been minimized.
This comment has been minimized.
Could you give a example for tweets_file? |
This comment has been minimized.
This comment has been minimized.
Same deal, how do you prepare the tweets file in the first place? |
This comment has been minimized.
This comment has been minimized.
This line in tweets.json worked for me:
|
This comment has been minimized.
This comment has been minimized.
How would this be done in reverse? - as in, you have a certain reply & want to find the ID of the original tweet it was in reply to |
This comment has been minimized.
This comment has been minimized.
I keep getting a key error when i put my consumer_key any workarounds? |
This comment has been minimized.
This comment has been minimized.
I am using Twitter4J implementation... Can you tell me how can we get tweets replies there ? |
This comment has been minimized.
This comment has been minimized.
it doesn't work ??? |
This comment has been minimized.
This comment has been minimized.
hi! |
This comment has been minimized.
This comment has been minimized.
How can I get a users replies to another tweets and get the original tweet? |
This comment has been minimized.
This comment has been minimized.
thanks for this, was going to start from scratch, but I always appreciate a template! |
This comment has been minimized.
This comment has been minimized.
Thanks for this. While it seems the replies.log will provide many replies not on this particular tweet ID but for this user's other tweets. |
This comment has been minimized.
This comment has been minimized.
Thank you for your code. but I can't get 100 replies per tweet with the following command, although the count is set to 100.
I can only get 15 recent replies of the candidate.. Is there any way for me to scrap 100 replies by tweet id? |
This comment has been minimized.
This comment has been minimized.
The replies.log came out empty :/ |
This comment has been minimized.
This comment has been minimized.
The standard search API searches against a sampling of recent Tweets published in the past 7 days. |
This comment has been minimized.
This comment has been minimized.
I don't know what is "tweets_file"... |
This comment has been minimized.
This comment has been minimized.
I use the method, but the result for comments count dose not like the twitter.com show. And the count is lack 1. Is the max_id or since_id cause the error? |
This comment has been minimized.
This comment has been minimized.
Problem is, when you specify the raw_query parameter, the GetSearch function discards all the other parameters as specified here https://python-twitter.readthedocs.io/en/latest/_modules/twitter/api.html#Api.GetSearch |
This comment has been minimized.
This comment has been minimized.
pls i would love to know if i will fill in the tweet link url in the def tweet url(t) function |
This comment has been minimized.
This comment has been minimized.
I can confirm the point made by @serdec - using raw meant the other fields like max_id were ignored so I was stuck on the first page. I took out 'raw' key and replaced with term key and value. This works great.
|
This comment has been minimized.
This comment has been minimized.
There's a problem on breaking out the while loop - it happens to soon and will miss the last page of results which will probably have less than 100 tweets. Also bear in mind that API's max ID filter is inclusive, which means that the last tweet of page N will be at the start of page N+1, which means you double count and it's hard to know when you have the last page. So my implementations gets one less than the last ID as the max ID, so that reply will be excluded from the next page. And then I check for zero tweets on a page and then break from the while loop.
My suggestion is also that the recursive reply magic can be commented out if it's not needed. And to save getting rate limited too easily from frequent requests. |
This comment has been minimized.
This comment has been minimized.
I have a problem that, I have a file which looks like this: ['972651', '80080680482123777', '0.0']->['189397006', '80080680482123777', '1.8'] First value represent user id and second value tweet id then after "->" symbol, first value represent response user id corresponding to same tweet id. I want to retrieve corresponding responses of the source tweet from particular users. Can anyone help me. Thanks in advance..!! |
This comment has been minimized.
This comment has been minimized.
@PAVITHRA-CP Looks like you want to get users conversation belong to the pointed tweet. You can search the target tweet. or search the user you want to get(use the endpoint search/tweets). just set the since_id to tweet id. Maybe you can get what you want. |
This comment has been minimized.
This comment has been minimized.
Hi i have a small task based on this, i am paying for this assistance. Please Reach me at fred.haule@gmail.com. Thanks for sharing Great work! |
This comment has been minimized.
This comment has been minimized.
|
This comment has been minimized.
This comment has been minimized.
Is the GetSearch API equivalent of /timeline/home on the twitter Web ? |
This comment has been minimized.
This comment has been minimized.
in_reply_to_status_id is the attribute that gives the tweet id of original tweet |
This comment has been minimized.
This comment has been minimized.
Yes, that's the easy part, assuming that tweet hasn't been deleted. But finding out what tweets reply to a given tweet is currently not possible with Twitter's public API. |
This comment has been minimized.
This comment has been minimized.
FYI, this replies functionality is now part of the twarc utility. |
This comment has been minimized.
This comment has been minimized.
plz write some stepts with hashtags ,from which we can read and understand |
This comment has been minimized.
This comment has been minimized.
it only give replies in some tweets it give 0 replies in case of some tweets how can we get more than 15 replies through this code?? |
This comment has been minimized.
This comment has been minimized.
Because it relies on the search API It only works for tweet threads that were alive in the last week. I'm assuming you have been trying to use it with some old threads? You may want to take a look at twint for scraping Twitter instead of using the twarc which relies on the API. |
This comment has been minimized.
This comment has been minimized.
Ok.
Thanks for letting me know.
…On Fri, Dec 27, 2019, 9:11 PM Ed Summers ***@***.***> wrote:
Because it relies on the search API It only works for tweet threads that
were alive in the last week. I'm assuming you have been trying to use it
with some old threads? You may want to take a look at twint for scraping
Twitter instead of using the twarc which relies on the API.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/54e6f7d63df3866a87a15aed17b51eaf?email_source=notifications&email_token=AMDJVYOSBNPBMOMHQFGPQPTQ2YO2FA5CNFSM4HNBBVI2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAF6RDI#gistcomment-3121716>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AMDJVYM4ZQJPGVZCDXZ23Y3Q2YO2FANCNFSM4HNBBVIQ>
.
|
This comment has been minimized.
This comment has been minimized.
thanks
…On Fri, Dec 27, 2019 at 9:02 PM Ms-Seeker ***@***.***> wrote:
Ok.
Thanks for letting me know.
On Fri, Dec 27, 2019, 9:11 PM Ed Summers ***@***.***> wrote:
> Because it relies on the search API It only works for tweet threads that
> were alive in the last week. I'm assuming you have been trying to use it
> with some old threads? You may want to take a look at twint for scraping
> Twitter instead of using the twarc which relies on the API.
>
> —
> You are receiving this because you commented.
> Reply to this email directly, view it on GitHub
> <
https://gist.github.com/54e6f7d63df3866a87a15aed17b51eaf?email_source=notifications&email_token=AMDJVYOSBNPBMOMHQFGPQPTQ2YO2FA5CNFSM4HNBBVI2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAF6RDI#gistcomment-3121716
>,
> or unsubscribe
> <
https://github.com/notifications/unsubscribe-auth/AMDJVYM4ZQJPGVZCDXZ23Y3Q2YO2FANCNFSM4HNBBVIQ
>
> .
>
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub
<https://gist.github.com/54e6f7d63df3866a87a15aed17b51eaf?email_source=notifications&email_token=AKQG67E72F6CKLBTOLPKXA3Q2YRJ5A5CNFSM4HNBBVI2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAF6RD2#gistcomment-3121725>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AKQG67EBS7HMA724J246FELQ2YRJ5ANCNFSM4HNBBVIQ>
.
|
This comment has been minimized.
This comment has been minimized.
Hey, |
This comment has been minimized.
This comment has been minimized.
@E123omega - this little script is for collecting JSON. What are you trying to do with the JSON you have? |
This comment has been minimized.
This comment has been minimized.
@edsu The aim of the project is to make a report about the functioning of a particular twitter helpdesk so I am trying to organise the tweets in such a way that I can easily look at a tread. |
This comment has been minimized.
This comment has been minimized.
All the script does is collect the tweets as JSON. But if you want to construct threads out of the messages you can use the There is a utility here that helps with this if you are curious: https://github.com/DocNow/twarc/blob/master/utils/network.py |
This comment has been minimized.
This comment has been minimized.
@Allen-Qiu If anyone is wondering the file should be in jsonl (as the code describes) You get that file from twac or tweepy (those are the format outputted by library) |
This comment has been minimized.
Thanks!