Last active
November 13, 2023 00:10
-
-
Save villares/f4f35630c28222767dd276fd08bcdbb5 to your computer and use it in GitHub Desktop.
Backup HackMD.io Notes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Downloads images from my notes at HackMD.io. | |
Creates new MD files with relative path image links. | |
Uses the links from the 'notes-backup' files downloaded with | |
download_notes.py (run it first) | |
I was skipping images hosted from my dreamhost account | |
but decided to download them too. | |
""" | |
import re | |
import requests | |
from pathlib import Path | |
self_hosted = 'lugaralgum.com/hackmd' | |
input_dir = Path.cwd() / 'notes-backup' | |
output_dir = Path.cwd() / 'notes-with-local-images' | |
output_dir.mkdir(exist_ok=True) | |
def download_images_from_md_files(directory_path, out_path): | |
for md_file in Path(directory_path).rglob("*.md"): | |
out_file = out_path / md_file.name | |
with md_file.open() as f: | |
content = f.read() | |
img_links = re.findall(r"!\[.*?\]\((.*?)\)", content) | |
for link in img_links: | |
if link.startswith("http"): | |
img_name = link.split("/")[-1] | |
img_path = out_path / img_name | |
# download only external links and not already downloaded | |
#if (self_hosted not in link) and not img_path.is_file(): | |
# download only not already downloaded | |
if not img_path.is_file(): | |
try: | |
response = requests.get(link) | |
except Exception: | |
print(f'Skipped {link}') | |
continue | |
if response.status_code == 200: | |
with open(img_path, "wb") as img_file: | |
img_file.write(response.content) | |
print(f"Downloaded {img_name} from {md_file.name}.") | |
else: | |
print(f"FAILED TO DOWNLOAD {img_name} from {md_file.name}.") | |
else: # modify self_hosted links (not downloaded) | |
content = content.replace(link, img_name) | |
else: | |
print(f'Skipped {link}') | |
with out_file.open("w") as f: | |
f.write(content) | |
print(f'saved {out_file.name}.') | |
download_images_from_md_files(input_dir, output_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from pathlib import Path | |
import pickle | |
try: | |
import tomllib | |
except ImportError: | |
import tomli as tomllib # tomllib will be in Python 3.11's standard library only | |
from PyHackMD import API # Install with pip, more info at https://github.com/GoatWang/PyHackMD | |
access_token = '<PUT YOUR ACCESS TOKEN HERE>' # get one at https://hackmd.io/settings#api | |
# # If you make a TOML file for your access tokens names api_tokens: [hackmd] ... access_token = ... | |
# with open("/home/username/api_tokens", "rb") as f: | |
# api_tokens = tomllib.load(f) | |
# access_token = api_tokens['hackmd']['access_token'] | |
output_dir = Path.cwd() / 'notes-backup' | |
output_dir.mkdir(exist_ok=True) | |
note_list_pickle_file = output_dir.parent / 'note_list.data' | |
api = API(access_token) | |
note_list = api.get_note_list() | |
try: | |
with open(note_list_pickle_file, 'rb') as f: | |
previous_note_list = pickle.load(f) | |
except FileNotFoundError: | |
print('A new note_list.data file will be created.') | |
previous_note_list = [] | |
def find_previous_by_id(id): | |
for note_data in previous_note_list: | |
if note_data['id'] == note_id: | |
return note_data | |
return {} | |
for note_data in note_list: | |
note_id = note_data['id'] | |
previous = find_previous_by_id(note_id) | |
# the following if conditional on the note having been changed recently | |
if not previous or previous['lastChangedAt'] != note_data['lastChangedAt']: | |
print(f'Downloading... ' | |
f'{note_data["id"]} ' | |
f'{note_data["title"]} ' | |
f'{note_data["publishLink"]}') | |
content = api.get_note(note_data['id'])['content'] | |
permalink = note_data["permalink"] | |
file_name = (f'{permalink}.md' if permalink is not None | |
else f'{note_data["title"]}.md'.replace('/', '-').replace(' ', '-')) | |
with open(output_dir / file_name, 'w', encoding='utf-8') as f: | |
f.write(content) | |
else: | |
last_changed_unix_dts = note_data['lastChangedAt'] / 1000 | |
dts = datetime.utcfromtimestamp(last_changed_unix_dts).strftime('%Y-%m-%d %H:%M:%S') | |
print(f'Skipping... ' | |
f'{note_data["id"]} ' | |
f'{note_data["title"]} ' | |
f'{dts}') | |
with open(note_list_pickle_file, 'wb') as f: | |
pickle.dump(note_list, f) | |
# you should be able to run download_images_modify_files.py after this |
Uploading to Dreamhost:
"""
This will upload all hackmd backup image files to my Dreamhost account
using SFTP with Python paramiko library.
I had to add the server to known hosts with:
ssh-keyscan [host name here] >> ~/.ssh/known_hosts
Skips overwriting existing files, this is mostly good for the images that will
change names if they get updated, but this is not very good for the .md files
that get updated a lot, with the same file names.
You can let it update/upload/overwrite the markdown files with SKIP_MARKDOWN = False.
"""
from pathlib import Path
import tomli as tomllib # tomllib will be in Python 3.11's standard library only
import paramiko
paramiko.util.log_to_file('ssh-session.log') # sets up logging
with open("/home/villares/api_tokens", "rb") as f:
api_tokens = tomllib.load(f)
host = api_tokens['dreamhost']['host']
username = api_tokens['dreamhost']['username']
password = api_tokens['dreamhost']['password']
remote_dir = '/home/abav/public_lugaralgum.com/hackmd'
# local_dir = Path.cwd() / 'notes-with-local-images'
local_dir = '/home/villares/GitHub/hackmd-backup/notes-with-local-images/'
SKIP_MARKDOWN = True
with paramiko.SSHClient() as ssh:
#ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # unsafe!
ssh.load_system_host_keys() # get keys from known_hosts
ssh.connect(host, username=username, password=password)
with ssh.open_sftp() as sftp:
sftp.chdir(remote_dir)
for local_file in sorted(Path(local_dir).iterdir()):
remote_file = local_file.name
if remote_file.lower().endswith('.md'):
if SKIP_MARKDOWN:
print(f'Skipping markdown file {remote_file}.')
else:
sftp.put(local_file, remote_file)
print(f'Uploaded markdown {remote_file} (with possible overwrite).')
else:
try:
sftp.stat(remote_file)
print(f'Skipping {remote_file} already on server.')
except FileNotFoundError:
sftp.put(local_file, remote_file)
print(f'Uploaded {remote_file}.')
# import paramiko
#
# transport = paramiko.Transport((host, 22))
# transport.connect(username=username, password=password)
#
# security_options = transport.get_security_options()
# print(security_options.ciphers)
# print(security_options.kex)
#
# transport.close()
"""
('aes128-ctr', 'aes192-ctr', 'aes256-ctr', 'aes128-cbc', 'aes192-cbc', 'aes256-cbc', 'blowfish-cbc', '3des-cbc')
('curve25519-sha256@libssh.org', 'ecdh-sha2-nistp256', 'ecdh-sha2-nistp384', 'ecdh-sha2-nistp521', 'diffie-hellman-group16-sha512', 'diffie-hellman-group-exchange-sha256', 'diffie-hellman-group14-sha256', 'diffie-hellman-group-exchange-sha1', 'diffie-hellman-group14-sha1', 'diffie-hellman-group1-sha1')
"""
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The final step was to change imgur URLs to my dreamhost served images... now HackMD added itself some (limited) image hosting I didn't run again.