Last active
January 31, 2020 07:39
-
-
Save tvdsluijs/0448d0528b12afae7a01ac033f0d906c to your computer and use it in GitHub Desktop.
Import all you Markdown Jekyll files to Medium
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import os | |
import re | |
import yaml | |
class ImportMDs: | |
def __init__(self, m_token=None, git_username=None): | |
try: | |
if m_token == git_username is None: | |
raise ValueError('No Medium Token or no Github user name') | |
self.git_username = git_username | |
self.access_token = m_token | |
self.headers = { | |
'Authorization': "Bearer " + self.access_token, | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, ' | |
'like Gecko) Chrome/63.0.3239.84 Safari/537.36'} | |
self.base_url = "https://api.medium.com/v1/" | |
self.yaml_regex = r"^---\n(.*)---$" | |
self.more_tag = r"<!--more-->" | |
self.gist_tag = r"\{\%\s+gist\s+(.*)\s+\%\}" | |
self.post_data = None | |
self.json_response = None | |
self.user_id = None | |
self.user_url = None | |
self.posts_url = None | |
self.publications_response = None | |
self.valid_token() | |
self.parse_user_data() | |
self.user_publications() | |
except ValueError as e: | |
print(e) | |
def valid_token(self): | |
# Validate access token by making a call to https://api.medium.com/v1/me | |
self.json_response = json.loads(requests.request("GET", self.base_url + 'me', headers=self.headers).text) | |
try: | |
self.json_response['data'] | |
return True | |
except KeyError: | |
print(self.json_response) | |
return False | |
def user_publications(self): | |
self.publications_response = requests.request("GET", self.user_url + 'publications/', headers=self.headers).text | |
def parse_user_data(self): | |
try: | |
self.user_id = self.json_response['data']['id'] | |
self.user_url = self.base_url + 'users/' + self.user_id + '/' | |
self.posts_url = self.user_url + 'posts/' | |
except KeyError as e: | |
print(e) | |
return False | |
def post_the_article(self, payload=None, post_url=None): | |
try: | |
if payload == post_url is None: | |
raise ValueError('No payload or post_url') | |
self.post_data = None | |
response = requests.request('POST', post_url, data=payload, headers=self.headers) | |
self.post_data = response.json() | |
self.post_data['data'] | |
return True | |
except ValueError as e: | |
print(e) | |
return False | |
except KeyError as e: | |
print(e) | |
return False | |
def post_one_article(self, payload=None): | |
try: | |
if payload is None: | |
raise ValueError("No payload ") | |
self.post_article(payload, self.posts_url) | |
except ValueError as e: | |
print(e) | |
return False | |
def post_publication_article(self, publication_id=None, payload=None): | |
try: | |
if payload == publication_id is None: | |
raise ValueError("No payload or publication_id") | |
posts_url = "https://api.medium.com/v1/publications/{}/posts".format(publication_id) | |
self.post_the_article(payload, posts_url) | |
except ValueError as e: | |
print(e) | |
return False | |
return True | |
@staticmethod | |
def read_file(my_file=None): | |
try: | |
if my_file is None: | |
raise ValueError("No my_file") | |
file = open(my_file, "r") | |
file_data = file.read() | |
file.close() | |
return file_data | |
except ValueError as e: | |
print(e) | |
return False | |
except: # Yes... this aint pretty | |
return False | |
def process_yml_data(self, file_data=None): | |
try: | |
if file_data is None: | |
raise ValueError("No file_data") | |
x = re.search(self.yaml_regex, file_data, re.MULTILINE | re.DOTALL) | |
my_yaml = (x.group().replace("---", "")) | |
return yaml.load(my_yaml, Loader=yaml.FullLoader) | |
except ValueError as e: | |
print(e) | |
return False | |
def return_content(self, file_data=None): | |
try: | |
if file_data is None: | |
raise ValueError("No file_data") | |
return re.sub(self.yaml_regex, "", file_data, 0, re.MULTILINE | re.DOTALL) | |
except ValueError as e: | |
print(e) | |
return False | |
def sanitize_content_data(self, file_data=None): | |
try: | |
if file_data is None: | |
raise ValueError("No file_data") | |
file_data = file_data.replace("“", "'") | |
file_data = file_data.replace("”", "'") | |
# remove the More tag | |
file_data = re.sub(self.more_tag, "", file_data, 0, re.MULTILINE | re.DOTALL) | |
gist_subst = "<script src=\"https://gist.github.com/{}/\\1.js\"></script>".format(self.git_username) | |
file_data = re.sub(self.gist_tag, gist_subst, file_data, 0, re.MULTILINE | re.DOTALL) | |
return file_data | |
except ValueError as e: | |
print(e) | |
return False | |
@staticmethod | |
def move_file(from_file=None, to_file=None): | |
try: | |
os.rename(from_file, to_file) | |
except: | |
print('this file rename error should be better') | |
if __name__ == '__main__': | |
''' | |
to make this work you need | |
python 3.4 or higher | |
PyYAML > pip instal PyYAML | |
an Medium token > https://help.medium.com/hc/en-us/articles/213480228-Get-integration-token | |
thats all | |
''' | |
token = "" # Your Medium token | |
md_folder = "mds" # folder where your md files are | |
backup_folder = "backup" # folder where the md's are put when send to medium | |
git_username = "" # your github username | |
''' | |
if you want to publish to a publication you can find it by | |
print(imd.publications_response) | |
you will find the id in the json string | |
''' | |
publication_id = "" #your publication ID | |
publishStatus = "public" # “public”, “draft”, or “unlisted” | |
''' | |
this is needed when there is only one tag provided. | |
The medium API breaks when you only have one tag | |
''' | |
general_tag = "tag1" | |
tags = ["tag2", "tag3", "tag5"] # then no tags are there | |
''' | |
The license of the post. Valid values are “all-rights-reserved”, “cc-40-by”, “cc-40-by-sa”, “cc-40-by-nd”, | |
“cc-40-by-nc”, “cc-40-by-nc-nd”, “cc-40-by-nc-sa”, “cc-40-zero”, “public-domain”. | |
The default is “all-rights-reserved”. | |
''' | |
md_license = 'cc-40-by-nc' | |
blog_url = "https://www.yoururl.com" # domain of your current jekyll blog | |
csv_file = "a-file-name.txt" | |
# this file is usable to create like .htaccess url redirects | |
run = 8 # you dont want to run more as 9 [0-8] a day! Medium does not like that. | |
imd = ImportMDs(token, git_username) | |
imd.user_publications() | |
dir_path = os.path.dirname(os.path.realpath(__file__)) | |
folder = os.path.join(dir_path, md_folder) # the folder where your MD files are | |
backup = os.path.join(dir_path, backup_folder) # the folder where your backup files will be | |
nr = 0 | |
for filename in os.listdir(folder): | |
if nr > run > 0: | |
break | |
nr += 1 | |
print("Run nr: {}".format(nr)) | |
if filename.endswith(".md"): | |
print("Process file: {}".format(filename)) | |
markdown_file = os.path.join(folder, filename) | |
backup_file = os.path.join(backup, filename) | |
data = imd.read_file(markdown_file) | |
yml_data = imd.process_yml_data(data) | |
title = str(yml_data['title']) | |
permalink = "{}{}".format(blog_url, str(yml_data['permalink'])) | |
datum_sec = int(yml_data['date'].strftime('%s')) | |
try: | |
cats = list(yml_data['categories']) | |
tags = cats + tags | |
except KeyError: | |
pass # don't do anything, go to next step | |
tags = list(dict.fromkeys(tags)) # remove double tags | |
try: | |
tags = list(yml_data['tags']) | |
except KeyError: | |
pass # don't do anything, go to next step | |
if len(tags) == 1: # when there is only one tag convert to string | |
tags.append(general_tag) | |
content = imd.return_content(data) | |
pre_content = "# {}\n".format(title) | |
try: | |
pre_content = "{}!['{}']({})\n".format(pre_content, title, str(yml_data['image'])) | |
except KeyError: | |
pass | |
content = "{}{}".format(pre_content, content) | |
content = imd.sanitize_content_data(content) | |
publishStatus = "public" # “public”, “draft”, or “unlisted” | |
my_payload = { | |
'title': title, | |
'contentFormat': 'markdown', | |
'content': content, | |
'tags': tags, | |
'publishStatus': publishStatus, | |
'canonicalUrl': permalink, | |
'license': 'cc-40-by-nc' | |
} | |
if imd.post_publication_article(publication_id, my_payload): | |
try: | |
new_url = imd.post_data['data']['url'] | |
print('Yes, {} posted as {} at {}'.format(title, publishStatus, new_url)) | |
imd.move_file(markdown_file, backup_file) | |
text = "{};{}\n".format(permalink,new_url) | |
with open(csv_file, 'a') as f: | |
f.write(text) | |
except KeyError: | |
print(imd.post_data) | |
print(my_payload) | |
else: | |
print('We found ourselves a little error') | |
print('all done!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment