Last active
June 13, 2020 17:19
-
-
Save akshayranganath/c387b3d011f6da096dbf82bac3a9039d to your computer and use it in GitHub Desktop.
Sample script to migrate all local images to Cloudinary
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cloudinary | |
import cloudinary.uploader | |
import cloudinary.api | |
import cloudinary.utils | |
import argparse | |
import sys, os, glob, re | |
import urllib.parse | |
cloudinary.config( | |
cloud_name = '<<your cloud-name>>', | |
api_key = 'XXXX', | |
api_secret = 'XXXX' | |
) | |
def cli(): | |
prog = "github_migration.py" | |
if len(sys.argv) == 1: | |
prog += " [command]" | |
parser = argparse.ArgumentParser( | |
description='CLI to migration Ghithub blog images to cloudinary', | |
add_help=True, | |
prog=prog) | |
parser.add_argument( | |
'--version', | |
action='version', | |
version='0.1') | |
subparsers = parser.add_subparsers( | |
title='Commands', dest="command", metavar="") | |
actions = {} | |
subparsers.add_parser( | |
name="help", | |
help="Show available help", | |
add_help=False).add_argument( | |
'args', | |
metavar="", | |
nargs=argparse.REMAINDER) | |
actions["identify"] = create_sub_command( | |
subparsers, "identify", | |
"Identifies all the images that will be migrated to Cloudinary", | |
required_arguments=[{"name": "directory", "help": "Base directory for the github blog project"}], | |
optional_arguments=[{"name": "display", "help": "Display all the image names with their file name"}] | |
) | |
actions["swap"] = create_sub_command( | |
subparsers, "swap", | |
"Identifies all the images that will be migrated to Cloudinary", | |
required_arguments=[{"name": "directory", "help": "Base directory for the github blog project"}], | |
optional_arguments=[{"name": "display", "help":"Display all the image names with their file name"}] | |
) | |
if len(sys.argv) <= 1: | |
parser.print_help() | |
return 0 | |
args = parser.parse_args() | |
if args.command == "help": | |
if len(args.args) > 0: | |
if actions[args.args[0]]: | |
actions[args.args[0]].print_help() | |
else: | |
parser.prog = get_prog_name() + " help [command]" | |
parser.print_help() | |
return 0 | |
return getattr(sys.modules[__name__], args.command.replace("-", "_"))(args) | |
def create_sub_command( | |
subparsers, | |
name, | |
help, | |
optional_arguments=None, | |
required_arguments=None): | |
action = subparsers.add_parser(name=name, help=help, add_help=False) | |
if required_arguments: | |
required = action.add_argument_group("required arguments") | |
for arg in required_arguments: | |
name = arg["name"] | |
del arg["name"] | |
required.add_argument("--" + name, | |
required=True, | |
**arg, | |
) | |
optional = action.add_argument_group("optional arguments") | |
if optional_arguments: | |
for arg in optional_arguments: | |
name = arg["name"] | |
del arg["name"] | |
optional.add_argument("--" + name, | |
required=False, | |
**arg, | |
) | |
return action | |
def identify(args): | |
markdown_files = get_posts(args.directory) | |
image_details = {} | |
for each_markdown_file in markdown_files: | |
images = find_images_in_post(each_markdown_file) | |
#print the details | |
image_details[each_markdown_file] = images | |
if args.display != None: | |
print(each_markdown_file, images) | |
#break | |
return image_details | |
def swap(args): | |
markdown_files = get_posts(args.directory) | |
for each_markdown_file in markdown_files: | |
find_and_swap_images_in_post(args.directory, each_markdown_file) | |
#break | |
def find_and_swap_images_in_post(directory, markdown_file_name): | |
with open(markdown_file_name, "r") as blog_post: | |
print("Migrating file " + markdown_file_name + " ...",end='') | |
temp_file_name = os.environ['TMPDIR'] + markdown_file_name.rsplit('/',maxsplit=1)[1] | |
with open(temp_file_name, "w") as temp_file: | |
for line in blog_post: | |
stripped_line = line.strip() | |
if stripped_line.startswith(" > -1: | |
result = run_regex(line) | |
if result[1] != None: | |
alt_text = result[0] | |
image = result[1] | |
if image.startswith("http://") or image.startswith("https://"): | |
upload_object = image | |
else: | |
upload_object = directory + image | |
if os.path.isfile(upload_object) == True and upload_object.find('cloudinary.com')==-1: | |
cloudinary_url = upload_to_cloudinary(upload_object,alt_text) | |
line = "\n".format(result[0], cloudinary_url) | |
temp_file.write(line) | |
#finally move the updated file to old location | |
os.rename(temp_file_name, markdown_file_name) | |
print(" Completed!") | |
def upload_to_cloudinary(object,alt_text): | |
cloudinary_url = None | |
resp = cloudinary.uploader.upload( | |
object, | |
public_id=urllib.parse.quote(alt_text.strip()), | |
invlidate=True, | |
folder="blog" | |
) | |
if resp!=None and "secure_url" in resp: | |
cloudinary_url = cleanup_url(resp["secure_url"]) | |
return cloudinary_url | |
url_pattern = re.compile("(.+)\/v[0-9]+\/(.+)") | |
def cleanup_url(url): | |
result = url_pattern.match(url) | |
return result.group(1)+'/f_auto,q_auto/'+result.group(2) | |
def find_images_in_post(markdown_file_name): | |
images = [] | |
with open(markdown_file_name,"r") as blog_post: | |
for line in blog_post: | |
line = line.strip() | |
if line.startswith(" > -1: | |
result = run_regex(line) | |
if result[1]!=None : | |
images.append(result[1]) | |
return images | |
pattern = re.compile('\!\[(.+)\]\((.+)\)') | |
def run_regex(line): | |
result = pattern.match(line) | |
matches = [] | |
alt_text = None | |
location = None | |
try: | |
if result!= None: | |
(alt_text, location) = result.groups() | |
except IndexError: | |
print("Something went wrong! " + result.group(2)) | |
return ([alt_text, location]) | |
def get_posts(directory): | |
markdown_files = None | |
if os.path.exists(directory): | |
# todo: make sure the directory ends with a trailing '/' | |
posts_folder = directory + "_posts" | |
if os.path.exists(posts_folder): | |
print("Now checking for posts: " + posts_folder) | |
markdown_files = glob.glob(posts_folder + '/' + '*.md') | |
else: | |
print("Unable to find posts in the _posts folder!") | |
else: | |
print("Unable to find directory: " + directory) | |
return markdown_files | |
if __name__ == "__main__": | |
cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment