Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save bGZo/f3c4876e230308fc3d2b2bc8db9dd55e to your computer and use it in GitHub Desktop.
Save bGZo/f3c4876e230308fc3d2b2bc8db9dd55e to your computer and use it in GitHub Desktop.
Get the content of Twitter/Mastodon with logseq format. The former service by tweetpik.com. The latter service by official API.
import json
import requests
import re
import argparse
from argparse import RawTextHelpFormatter
from datetime import datetime
from markdownify import markdownify as md
TWITTER_TEMPLATE = ("- #+BEGIN_QUOTE\n"
"{}\n"
"— {} [{}](https://twitter.com/{}/status/{})\n"
"❤️ {} 🔁 {} 💬 {}\n"
"#+END_QUOTE\n")
MASTODON_TEMPLATE = ( "- #+BEGIN_QUOTE\n"
"{}\n"
"— {} [{}]({})\n"
"#+END_QUOTE'\n")
def from_twitter(line):
base_url = 'https://tweetpik.com/api/v2/tweets?url='
url = base_url + format_link(line)
try:
res = requests.get(url, headers={
'Accept-Encoding': 'gzip, deflate',
"Accept": "application/json",
'Connection': 'keep-alive',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38',
"Referer": "https://tweetpik.com/"
})
responseTxt = format_response(res.text)
res = json.loads(responseTxt)[0]
except Exception as e:
print(e)
print(responseTxt)
print("Handle " + url + " occur error")
return ""
tweetContent = md(res['textHtml'])
if 'photos' in res:
for media in res['photos']:
tweetContent += "\n![🖼️]({})".format(media)
tweet = TWITTER_TEMPLATE.format(
tweetContent,
md(res['nameHtml']),
str(datetime.strptime(res['datetime'], '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y%m%d, %H:%M:%S')),
res['handler'], res['id'],
str(res['likes']), str(res['retweets']), str(res['replies']))
with open("backup_res_twitter.json", "a", encoding='UTF-8') as f:
f.write(str(res)+"\n")
print ("Handle Tweets {} Done. Congardulations! 🎉".format(url))
return tweet
def from_mastodon(line):
split_line = format_link(line).split('/')
url = 'https://' + split_line[2] + '/api/v1/statuses/' + split_line[4]
try:
res = requests.get(url, headers={
'Accept-Encoding': 'gzip, deflate',
'Accept': '*/*',
'Connection': 'keep-alive',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Edg/94.0.992.38',
}).json()
except Exception as e:
print(e)
print("Handle " + line + " occur error")
return ""
post_content = format_response(md(res['content']))
if 'media_attachments' in res:
for media in res['media_attachments']:
post_content += "\n![🖼️]({})".format(media['url'])
post = TEMPLATE_MASTODON.format(
post_content,
res['account']['username'], res['created_at'], res['url'])
with open("backup_res_mastodon.json", "a", encoding='UTF-8') as f:
f.write(str(res)+"\n")
print ("Handle Mastodon {} Done. Congardulations! 🎉".format(res['url']))
return post
def format_link(link):
link = re.sub("\?s=\d+&t=\w+\n*", "", link)
link = re.sub(" ", "", link)
link = re.sub("-", "", link)
return link
def format_response(response):
response = re.sub(r'\n', '', response)
response = re.sub(r'\n\n', '\n', response)
response = re.sub(r'\\"', '\'', response)
return response
def get_info(file):
posts_collection = []
for line in file:
if(re.search(r"twitter\.com", line)):
# print("twitter")
posts_collection.append(from_twitter(line))
else:
# print("mastodon")
posts_collection.append(from_mastodon(line))
return posts_collection
def output(file_name, lines):
with open(file_name, 'w', encoding='UTF-8') as f:
for line in lines:
f.write(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="get_twitter_mastodon_content_with_logseq_format.py",
description="Get the content of post of Twitter/Mastodon meantime.\n\nThe formar depends \
the API v2 by http://tweetpik.com, the latter use the Offical API.\nThe script need the \
input file(one link each line), with option output, which will\noverwrite the input file \
by default. Besises, the whole success responses will be\nsaved in backup_res_xxx.json file",
formatter_class=RawTextHelpFormatter)
parser.add_argument("file", type=argparse.FileType('r', encoding='UTF-8'),
help="Add the source url links file to handle")
parser.add_argument("-o", "--output", action="store",
help = "Flag this would output instead of default file(output.md)")
args = parser.parse_args()
if args.file:
response = get_info(args.file)
if args.output is not None:
output(args.output, response)
else:
output(args.file.name, response)
requests
markdownify
@bGZo
Copy link
Author

bGZo commented Feb 12, 2023

Quick Start

$ pip3 install --trusted-host pypi.tuna.tsinghua.edu.cn -i https://pypi.tuna.tsinghua.edu.cn/simple -r .\requirements.txt
$ python3 get_twitter_mastodon_content_with_logseq_format.py input.file
$ python3 get_twitter_mastodon_content_with_logseq_format.py input.file -o output.file

Usage

$ python3 get_twitter_mastodon_content_with_logseq_format.py --help
usage: get_twitter_mastodon_content_with_logseq_format.py [-h] [-o OUTPUT] file

Get the content of post of Twitter/Mastodon meantime.

The formar depends the API v2 by http://tweetpik.com, the latter use the Offical API.
The script need the input file(one link each line), with option output, which will
overwrite the input file by default. Besises, the whole success responses will be
saved in backup_res_xxx.json file

positional arguments:
  file                  Add the source url links file to handle

optional arguments:
  -h, --help            show this help message and exit
  -o OUTPUT, --output OUTPUT
                        Flag this would output instead of default file(output.md)

TODOs

  • Self host service instead of http://tweetpik.com
  • Logseq plugins waiting for refactor.
  • More usability.
  • More sustainable.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment