Skip to content

Instantly share code, notes, and snippets.

@tvdsluijs
Last active January 31, 2020 07:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tvdsluijs/0448d0528b12afae7a01ac033f0d906c to your computer and use it in GitHub Desktop.
Save tvdsluijs/0448d0528b12afae7a01ac033f0d906c to your computer and use it in GitHub Desktop.
Import all you Markdown Jekyll files to Medium
import requests
import json
import os
import re
import yaml
class ImportMDs:
def __init__(self, m_token=None, git_username=None):
try:
if m_token == git_username is None:
raise ValueError('No Medium Token or no Github user name')
self.git_username = git_username
self.access_token = m_token
self.headers = {
'Authorization': "Bearer " + self.access_token,
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, '
'like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
self.base_url = "https://api.medium.com/v1/"
self.yaml_regex = r"^---\n(.*)---$"
self.more_tag = r"<!--more-->"
self.gist_tag = r"\{\%\s+gist\s+(.*)\s+\%\}"
self.post_data = None
self.json_response = None
self.user_id = None
self.user_url = None
self.posts_url = None
self.publications_response = None
self.valid_token()
self.parse_user_data()
self.user_publications()
except ValueError as e:
print(e)
def valid_token(self):
# Validate access token by making a call to https://api.medium.com/v1/me
self.json_response = json.loads(requests.request("GET", self.base_url + 'me', headers=self.headers).text)
try:
self.json_response['data']
return True
except KeyError:
print(self.json_response)
return False
def user_publications(self):
self.publications_response = requests.request("GET", self.user_url + 'publications/', headers=self.headers).text
def parse_user_data(self):
try:
self.user_id = self.json_response['data']['id']
self.user_url = self.base_url + 'users/' + self.user_id + '/'
self.posts_url = self.user_url + 'posts/'
except KeyError as e:
print(e)
return False
def post_the_article(self, payload=None, post_url=None):
try:
if payload == post_url is None:
raise ValueError('No payload or post_url')
self.post_data = None
response = requests.request('POST', post_url, data=payload, headers=self.headers)
self.post_data = response.json()
self.post_data['data']
return True
except ValueError as e:
print(e)
return False
except KeyError as e:
print(e)
return False
def post_one_article(self, payload=None):
try:
if payload is None:
raise ValueError("No payload ")
self.post_article(payload, self.posts_url)
except ValueError as e:
print(e)
return False
def post_publication_article(self, publication_id=None, payload=None):
try:
if payload == publication_id is None:
raise ValueError("No payload or publication_id")
posts_url = "https://api.medium.com/v1/publications/{}/posts".format(publication_id)
self.post_the_article(payload, posts_url)
except ValueError as e:
print(e)
return False
return True
@staticmethod
def read_file(my_file=None):
try:
if my_file is None:
raise ValueError("No my_file")
file = open(my_file, "r")
file_data = file.read()
file.close()
return file_data
except ValueError as e:
print(e)
return False
except: # Yes... this aint pretty
return False
def process_yml_data(self, file_data=None):
try:
if file_data is None:
raise ValueError("No file_data")
x = re.search(self.yaml_regex, file_data, re.MULTILINE | re.DOTALL)
my_yaml = (x.group().replace("---", ""))
return yaml.load(my_yaml, Loader=yaml.FullLoader)
except ValueError as e:
print(e)
return False
def return_content(self, file_data=None):
try:
if file_data is None:
raise ValueError("No file_data")
return re.sub(self.yaml_regex, "", file_data, 0, re.MULTILINE | re.DOTALL)
except ValueError as e:
print(e)
return False
def sanitize_content_data(self, file_data=None):
try:
if file_data is None:
raise ValueError("No file_data")
file_data = file_data.replace("“", "'")
file_data = file_data.replace("”", "'")
# remove the More tag
file_data = re.sub(self.more_tag, "", file_data, 0, re.MULTILINE | re.DOTALL)
gist_subst = "<script src=\"https://gist.github.com/{}/\\1.js\"></script>".format(self.git_username)
file_data = re.sub(self.gist_tag, gist_subst, file_data, 0, re.MULTILINE | re.DOTALL)
return file_data
except ValueError as e:
print(e)
return False
@staticmethod
def move_file(from_file=None, to_file=None):
try:
os.rename(from_file, to_file)
except:
print('this file rename error should be better')
if __name__ == '__main__':
'''
to make this work you need
python 3.4 or higher
PyYAML > pip instal PyYAML
an Medium token > https://help.medium.com/hc/en-us/articles/213480228-Get-integration-token
thats all
'''
token = "" # Your Medium token
md_folder = "mds" # folder where your md files are
backup_folder = "backup" # folder where the md's are put when send to medium
git_username = "" # your github username
'''
if you want to publish to a publication you can find it by
print(imd.publications_response)
you will find the id in the json string
'''
publication_id = "" #your publication ID
publishStatus = "public" # “public”, “draft”, or “unlisted”
'''
this is needed when there is only one tag provided.
The medium API breaks when you only have one tag
'''
general_tag = "tag1"
tags = ["tag2", "tag3", "tag5"] # then no tags are there
'''
The license of the post. Valid values are “all-rights-reserved”, “cc-40-by”, “cc-40-by-sa”, “cc-40-by-nd”,
“cc-40-by-nc”, “cc-40-by-nc-nd”, “cc-40-by-nc-sa”, “cc-40-zero”, “public-domain”.
The default is “all-rights-reserved”.
'''
md_license = 'cc-40-by-nc'
blog_url = "https://www.yoururl.com" # domain of your current jekyll blog
csv_file = "a-file-name.txt"
# this file is usable to create like .htaccess url redirects
run = 8 # you dont want to run more as 9 [0-8] a day! Medium does not like that.
imd = ImportMDs(token, git_username)
imd.user_publications()
dir_path = os.path.dirname(os.path.realpath(__file__))
folder = os.path.join(dir_path, md_folder) # the folder where your MD files are
backup = os.path.join(dir_path, backup_folder) # the folder where your backup files will be
nr = 0
for filename in os.listdir(folder):
if nr > run > 0:
break
nr += 1
print("Run nr: {}".format(nr))
if filename.endswith(".md"):
print("Process file: {}".format(filename))
markdown_file = os.path.join(folder, filename)
backup_file = os.path.join(backup, filename)
data = imd.read_file(markdown_file)
yml_data = imd.process_yml_data(data)
title = str(yml_data['title'])
permalink = "{}{}".format(blog_url, str(yml_data['permalink']))
datum_sec = int(yml_data['date'].strftime('%s'))
try:
cats = list(yml_data['categories'])
tags = cats + tags
except KeyError:
pass # don't do anything, go to next step
tags = list(dict.fromkeys(tags)) # remove double tags
try:
tags = list(yml_data['tags'])
except KeyError:
pass # don't do anything, go to next step
if len(tags) == 1: # when there is only one tag convert to string
tags.append(general_tag)
content = imd.return_content(data)
pre_content = "# {}\n".format(title)
try:
pre_content = "{}!['{}']({})\n".format(pre_content, title, str(yml_data['image']))
except KeyError:
pass
content = "{}{}".format(pre_content, content)
content = imd.sanitize_content_data(content)
publishStatus = "public" # “public”, “draft”, or “unlisted”
my_payload = {
'title': title,
'contentFormat': 'markdown',
'content': content,
'tags': tags,
'publishStatus': publishStatus,
'canonicalUrl': permalink,
'license': 'cc-40-by-nc'
}
if imd.post_publication_article(publication_id, my_payload):
try:
new_url = imd.post_data['data']['url']
print('Yes, {} posted as {} at {}'.format(title, publishStatus, new_url))
imd.move_file(markdown_file, backup_file)
text = "{};{}\n".format(permalink,new_url)
with open(csv_file, 'a') as f:
f.write(text)
except KeyError:
print(imd.post_data)
print(my_payload)
else:
print('We found ourselves a little error')
print('all done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment