Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A quick script to transform a Markdown file's relative images to absolute by uploading them to a plugin-based service (S3, Imgur)
import re
import mimetypes
from pathlib import Path
import requests
import boto3
from botocore.exceptions import ClientError
PATTERN_FULL = '(?:!\[(?P<alt_text>.*?)\]\((?P<filename>.*?)\))'
PATTERN_FNAME = '(?:!\[(?:.*?)\]\((?P<filename>.*?)\))'
class Uploader:
def upload_image(self, image_path, override=False):
raise NotImplementedError()
class S3Uploader(Uploader):
def __init__(self, s3_bucket, s3_relative_path, s3_ACL=None):
self.bucket = s3_bucket
self.relative_path = s3_relative_path.rstrip('/').lstrip('/')
self.s3_acl = s3_ACL
self.client = boto3.client('s3')
def upload_image(self, image_path, override=False):
p = Path(image_path)
kwargs = {}
if self.s3_acl:
kwargs['ACL'] = self.s3_acl
key = f'{self.relative_path}/{p.name}'
if not override:
try:
self.client.head_object(Bucket=self.bucket, Key=key)
return f"https://{self.bucket}.s3.amazonaws.com/{key}"
except ClientError as exc:
if exc.response['Error']['Code'] != "404":
raise exc
content_type, _ = mimetypes.guess_type(image_path)
if content_type:
kwargs['ContentType'] = content_type
with p.open('rb') as fp:
self.client.put_object(
Body=fp.read(),
Bucket=self.bucket,
Key=key,
**kwargs
)
return f"https://{self.bucket}.s3.amazonaws.com/{key}"
class ImgurUploader(Uploader):
def __init__(self, imgur_access_token):
self.access_token = imgur_access_token
def upload_image(self, image_path, override=False):
p = Path(image_path)
headers = {"Authorization": f"Bearer {self.access_token}"}
with p.open('rb') as fp:
files = {'image': fp}
resp = requests.post(
'https://api.imgur.com/3/image',
headers=headers, files=files)
resp.raise_for_status()
return resp.json()['data']['link']
UPLOADERS = {
's3': S3Uploader,
'imgur': ImgurUploader
}
def main(original_path, output_path, uploader, override=False, **uploader_kwargs):
"""Reads a markdown file, finds all the images and uploads them using `uploader`.
The result is a new file under `output_path`. Provide specific parameters
for the uploader with `uploader_kwargs`.
Parameters
----------
original_path: str, a valid filesystem path
The path of the markdown file used to transform.
output_path: str, a valid filesystem path
The path of where the resulting markdown file will be stored.
WARNING! This file will be overwritten.
uploader: str, a choice of uploaders
The uploader to use. Currently only supported in the `UPLOADERS` variable.
override: bool
Passed to the uploader, if the image should be overridden or not.
It's responsability of the uploader to respect this flag.
**uploader_kwargs: keyword arguments
Everything else will be passed to the Uploader at the moment of initialization.
"""
UploaderClass = UPLOADERS[uploader]
uploader = UploaderClass(**uploader_kwargs)
original_path = Path(original_path)
base_path = original_path.parent
pattern = re.compile(PATTERN_FNAME)
with original_path.open() as fp:
content = fp.read()
image_relative_paths = set(pattern.findall(content))
image_mapping = {
image_relative: (base_path / image_relative) for image_relative in image_relative_paths
}
missing_images = [str(abs_path) for _, abs_path in image_mapping.items() if not abs_path.exists()]
if missing_images:
raise ValueError(f'Missing images: {",".join(missing_images)}')
image_results = {
relative_path: uploader.upload_image(abs_path, override)
for relative_path, abs_path in image_mapping.items()
}
for relative_path, upload_path in image_results.items():
content = content.replace(relative_path, upload_path)
with open(output_path, 'w') as fp:
fp.write(content)
print("Done...")
if __name__ == "__main__":
# S3 Example
main(
'test_relative.md',
'test_absolute.md',
'imgur',
s3_bucket='MY-BUCKET',
s3_relative_path='some/subdir/in/the/bucket',
s3_ACL='public-read'
)
# Imgur Example
main(
'test_relative.md',
'test_absolute.md',
'imgur',
imgur_access_token='{IMGUR_ACCESS_TOKEN}',
)
@santiagobasulto

This comment has been minimized.

Copy link
Owner Author

@santiagobasulto santiagobasulto commented Aug 28, 2020

I created this script in just a few minutes to solve an issue with a bunch of MD files that were exported with relative, local images and I needed them absolute and hosted on S3. Please make sure you read how it works before using it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.