-
-
Save divyajyotiuk/9fb29c046e1dfcc8d5683684d7068efe to your computer and use it in GitHub Desktop.
import json | |
import glob | |
all_bookmarks = [] | |
md_file = open("bookmarks.md", "w+") # saving in markdown file, if no file exists using '+' creates one | |
files = [file for file in glob.glob("JSONBookmarks/*")] # using glob to read all files from the folder | |
for file_name in files: | |
print(file_name) | |
with open(file_name) as bk: | |
data = json.load(bk) # reads json data | |
all_bookmarks.append(data) | |
#print(json.dumps(response, indent = 1)) # pretty prints JSON data | |
# Function to get profile pic if you need | |
def getAuthorProfilePic(id): | |
return response["users"][id]["profile_image_url_https"] | |
# Function to get username | |
def getUserName(id): | |
return response["users"][id]["screen_name"] | |
# Function to construct bookmarked tweet url | |
def constructUrl(tweet_id, username): | |
return "https://twitter.com/" + username + "/status/" + tweet_id | |
# Function to format the text to write in file | |
def formatText(text): | |
text = text.replace("\n-", " ") | |
text = text.replace("\n", " ") | |
text = text[:100] + "..." | |
return text | |
# Run a loop through all_bookmarks | |
for data in all_bookmarks: | |
response = data["globalObjects"] | |
for tweet_id in response["tweets"]: | |
tweet = response["tweets"][tweet_id] | |
text = tweet["full_text"] | |
text = formatText(text) | |
url = constructUrl(tweet_id, getUserName(tweet["user_id_str"])) | |
bookmarked_tweet = "\n- " + text + "\n" + "\t - " + url | |
md_file.write(bookmarked_tweet) |
import json | |
import glob | |
import functools | |
all_bookmarks = [] | |
md_file = open("bookmarks.md", "w+") # saving in markdown file, if no file exists using '+' creates one | |
files = [file for file in glob.glob("JSONBookmarks/*")] # using glob to read all files from the folder | |
for file_name in files: | |
print(file_name) | |
with open(file_name) as bk: | |
data = json.load(bk) # reads json data | |
all_bookmarks.append(data) | |
#print(json.dumps(response, indent = 1)) # pretty prints JSON data | |
# Function to construct bookmarked tweet url | |
def constructUrl(tweet_id, username): | |
return "https://twitter.com/" + username + "/status/" + tweet_id | |
# Function to format the text to write in file | |
def formatText(text): | |
text = text.replace("\n-", " ") | |
text = text.replace("\n", " ") | |
text = text[:100] + "..." | |
return text | |
# Method to get value of nested dictionary | |
def deep_get(dictionary, keys, default=None): | |
return functools.reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary) | |
# Run a loop through all_bookmarks | |
for data in all_bookmarks: | |
instructions_list = deep_get(data, "data.bookmark_timeline.timeline.instructions") | |
tweet_entries_list = deep_get(instructions_list[0], "entries") | |
for tweet_entry in tweet_entries_list: | |
result = deep_get(tweet_entry, "content.itemContent.tweet_results.result") | |
username = deep_get(result, "core.user.legacy.screen_name") | |
text = deep_get(result, "legacy.full_text") | |
tweet_id = deep_get(result, "rest_id") | |
if tweet_id == None or username == None or text == None: | |
continue | |
text = formatText(text) | |
url = constructUrl(tweet_id, username) | |
bookmarked_tweet = "\n- " + text + "\n" + "\t - " + url | |
md_file.write(bookmarked_tweet) |
import json | |
import glob | |
import functools | |
all_bookmarks = [] | |
md_file = open("bookmarks.md", "w+") # saving in markdown file, if no file exists using '+' creates one | |
files = [file for file in glob.glob("JSONBookmarks/*")] # using glob to read all files from the folder | |
for file_name in files: | |
print(file_name) | |
with open(file_name) as bk: | |
data = json.load(bk) # reads json data | |
all_bookmarks.append(data) | |
# print(json.dumps(all_bookmarks, indent = 1)) # pretty prints JSON data | |
# Function to construct bookmarked tweet url | |
def constructUrl(tweet_id, username): | |
return "https://twitter.com/" + username + "/status/" + tweet_id | |
# Function to format the text to write in file | |
def formatText(text): | |
text = text.replace("\n-", " ") | |
text = text.replace("\n", " ") | |
text = text[:100] + "..." | |
return text | |
# Method to get value of nested dictionary | |
def deep_get(dictionary, keys, default=None): | |
return functools.reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary) | |
# Run a loop through all_bookmarks | |
for data in all_bookmarks: | |
instructions_list = deep_get(data, "data.bookmark_timeline.timeline.instructions") | |
if instructions_list==None: continue | |
tweet_entries_list = deep_get(instructions_list[0], "entries") | |
for tweet_entry in tweet_entries_list: | |
result = deep_get(tweet_entry, "content.itemContent.tweet_results.result") | |
username = deep_get(result, "core.user_results.result.legacy.screen_name") | |
text = deep_get(result, "legacy.full_text") | |
tweet_id = deep_get(result, "rest_id") | |
if tweet_id == None or username == None or text == None: continue | |
text = formatText(text) | |
url = constructUrl(tweet_id, username) | |
bookmarked_tweet = "\n- " + text + "\n" + "\t - " + url | |
md_file.write(bookmarked_tweet) |
Seems like twitter has updated their api response! I'll put an updated version as soon as possible.
Edit: I have updated the code. Checkout version v2 of the code. Thank you for the update!
Thanks! As of today, the following is correct. username = deep_get(result, "core.user_results.result.legacy.screen_name")
Thank you. Will make a note to update. 👍
Hi, thanks a lot for this useful script!
Before line 35 there should be an if condition to continue, if instructions_list==None: continue
.
Thanks, I had to add encodings to open
to work on Windows.
Also edited to the current json scheme.
Code
import json
import glob
import functools
all_bookmarks = []
md_file = open("bookmarks.md", "w+", encoding="utf8") # saving in markdown file, if no file exists using '+' creates one
files = [file for file in glob.glob("raw/*")] # using glob to read all files from the folder
for file_name in files:
print(file_name)
with open(file_name, encoding="utf8") as bk:
data = json.load(bk) # reads json data
all_bookmarks.append(data)
# print(json.dumps(all_bookmarks, indent = 1)) # pretty prints JSON data
# Function to construct bookmarked tweet url
def constructUrl(tweet_id, username):
return "https://twitter.com/" + username + "/status/" + tweet_id
# Function to format the text to write in file
def formatText(text):
text = text.replace("\n-", " ")
text = text.replace("\n", " ")
text = text[:100] + "..."
return text
# Method to get value of nested dictionary
def deep_get(dictionary, keys, default=None):
return functools.reduce(lambda d, key: d.get(key, default) if isinstance(d, dict) else default, keys.split("."), dictionary)
# Run a loop through all_bookmarks
for data in all_bookmarks:
instructions_list = deep_get(data, "data.bookmark_timeline.timeline.instructions")
if instructions_list==None: continue
tweet_entries_list = deep_get(instructions_list[0], "entries")
for tweet_entry in tweet_entries_list:
result = deep_get(tweet_entry, "content.itemContent.tweet_results.result")
username = deep_get(result, "core.user_results.result.legacy.screen_name")
text = deep_get(result, "legacy.full_text")
tweet_id = deep_get(result, "rest_id")
if tweet_id == None or username == None or text == None: continue
text = formatText(text)
url = constructUrl(tweet_id, username)
bookmarked_tweet = "\n- " + text + "\n" + "\t - " + url
md_file.write(bookmarked_tweet)
Thanks for the update! I have updated the code. Checkout v3 version.
If you call .sort()
on the file name list, the output seems to be in the correct order (by bookmark add date if I'm not mistaken.) That is, assuming the user saved their response JSON files in order of appearance in their devtools.
As it is, my files would sort like this:
You might change line 8 to read something like this:
globresult = glob.glob("JSONBookmarks/*")
globresult.sort() # Get files in name order
files = [file for file in globresult]
After this change, my files sorted like this:
(I also modified the Python script to look in a different folder, for my own purposes.)
Dear @divyajyotiuk , v3 works for me, thanks.
Dear @AverageHelper , yes, the sort is useful to have.
I'm a little unclear on the origin of the JSONBookmarks/*
data. Could you clarify how to initially populate the bookmark data? Are you using data from /2/users/:id/bookmarks?
I'm a little unclear on the origin of the
JSONBookmarks/*
data. Could you clarify how to initially populate the bookmark data? Are you using data from /2/users/:id/bookmarks?
See https://dev.to/divyajyotiuk/exporting-your-twitter-bookmarks-in-markdown-file-19bj
@divyajyotiuk I just did an export of my bookmarks and the json looked like this (note the "v2"):
{"data":{"bookmark_timeline_v2":{"timeline":{"instructions":[{"type":"TimelineAddEntries","entries":[{...
When changing that in your v3 code, it works smoothly for me.
Thanks ! The v3 works for me.
In my case, I have to add encoding utf8 :
md_file = open("bookmarks.md", "w+", encoding="utf8")
open(file_name, encoding="utf8")
Like @kiu I also needed to update line 34 to this to parse the bookmarks I just downloaded:
instructions_list = deep_get(data, "data.bookmark_timeline_v2.timeline.instructions")
(the bookmark_timeline_v2
is the new part)
Hi, thank you for writing this script! A quick question - the response I got from Twitter is different from the one you showed. Here's the link for your reference - https://jsonformatter.org/json-editor/fd67cb.
Is there something I should have done differently while getting the response from Twitter? I used DevTools to get this response.