Skip to content

Instantly share code, notes, and snippets.

@CyberRex0
Last active August 23, 2023 23:34
Show Gist options
  • Save CyberRex0/d481c4c2be6dc47fee4b50cefadf2074 to your computer and use it in GitHub Desktop.
Save CyberRex0/d481c4c2be6dc47fee4b50cefadf2074 to your computer and use it in GitHub Desktop.
# Misskey Note Dump Tool for Meilisearch
# You need to install psycopg2, pytz from pip
#
# python3 dump_misskey_note_data.py --db-host HOST --db-user USER --db-pass PASS --db-name NAME --db-port PORT \
# --ms-base-url MEILISEARCH_BASE_URL --dump-per PER_NUM --master-key MASTER_KEY --index INDEX_NAME
#
# [Required Parameters]
# --db-user, --db-name
#
# [Optional Parameters]
# --db-host, --db-port, --db-pass, --ms-base-url, --dump-per, --master-key, --index
import psycopg2
import psycopg2.extras
import requests
import datetime
import pytz
import argparse
class ArgT:
db_host: str
db_user: str
db_pass: str
db_name: str
db_port: int
ms_base_url: str
dump_per: int
master_key: str
index: str
parser = argparse.ArgumentParser()
parser.add_argument('--db-host', type=str, default='localhost', required=False)
parser.add_argument('--db-user', type=str, default='localhost', required=True)
parser.add_argument('--db-port', type=int, default=5432, required=False)
parser.add_argument('--db-pass', type=str, default='', required=False)
parser.add_argument('--db-name', type=str, required=True)
parser.add_argument('--ms-base-url', type=str, default='http://localhost:7700', required=False)
parser.add_argument('--dump-per', type=int, default=10000, required=False)
parser.add_argument('--master-key', type=str, default='', required=False)
parser.add_argument('--index', type=str, default='misskey', required=False)
args: ArgT = parser.parse_args()
def unixEpoch(dt):
dt_utc = dt.astimezone(pytz.UTC)
return (dt_utc - datetime.datetime(1970, 1, 1, tzinfo=pytz.UTC)).total_seconds() * 1000
db = psycopg2.connect(
host=args.db_host,
user=args.db_user,
password=args.db_pass,
database=args.db_name,
port=args.db_port,
cursor_factory=psycopg2.extras.DictCursor
)
lmt = args.dump_per
ofs = 0
notes = []
total_notes = 0
global_reqargs = {'headers': {}}
if args.master_key:
global_reqargs['headers']['Authorization'] = 'Bearer ' + args.master_key
with db.cursor() as cur:
cur.execute('SELECT COUNT(*) FROM "public"."note"')
res = cur.fetchone()
total_notes = res[0]
while True:
with db.cursor() as cur:
cur.execute('SELECT * FROM "public"."note" \
WHERE ("note"."visibility" = \'public\' OR \
"note"."visibility" = \'home\') AND \
"note"."renoteId" IS NULL \
LIMIT ' + str(lmt) + ' OFFSET ' + str(ofs))
qnotes = cur.fetchall()
if not qnotes:
break
for note in qnotes:
notes.append({
'id': note['id'],
'text': note['text'],
'createdAt': unixEpoch(note['createdAt']),
'userId': note['userId'],
'userHost': note['userHost'],
'channelId': note['channelId'],
'cw': note['cw'],
'tags': note['tags']
})
r = requests.post(f'{args.ms_base_url}/indexes/{args.index}/documents?primaryKey=id', json=notes, **global_reqargs)
if r.status_code != 202:
print(f'Error ({r.status_code})')
print(r.content.decode())
continue
notes.clear()
print(f'{(ofs/total_notes)*100:.2f}%')
ofs = ofs + lmt
print(f'**** Complete ****')
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment