tvdsluijs/jekyll2medium.py

## jekyll2medium.py
import requests
import json
import os
import re
import yaml


class ImportMDs:
    def __init__(self, m_token=None, git_username=None):
        try:
            if m_token == git_username is None:
                raise ValueError('No Medium Token or no Github user name')

            self.git_username = git_username
            self.access_token = m_token
            self.headers = {
                'Authorization': "Bearer " + self.access_token,
                'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, '
                              'like Gecko) Chrome/63.0.3239.84 Safari/537.36'}

            self.base_url = "https://api.medium.com/v1/"

            self.yaml_regex = r"^---\n(.*)---$"
            self.more_tag = r"<!--more-->"
            self.gist_tag = r"\{\%\s+gist\s+(.*)\s+\%\}"

            self.post_data = None

            self.json_response = None
            self.user_id = None
            self.user_url = None
            self.posts_url = None
            self.publications_response = None

            self.valid_token()
            self.parse_user_data()
            self.user_publications()

        except ValueError as e:
            print(e)

    def valid_token(self):
        # Validate access token by making a call to https://api.medium.com/v1/me
        self.json_response = json.loads(requests.request("GET", self.base_url + 'me', headers=self.headers).text)

        try:
            self.json_response['data']
            return True
        except KeyError:
            print(self.json_response)
            return False

    def user_publications(self):
        self.publications_response = requests.request("GET", self.user_url + 'publications/', headers=self.headers).text

    def parse_user_data(self):
        try:
            self.user_id = self.json_response['data']['id']
            self.user_url = self.base_url + 'users/' + self.user_id + '/'
            self.posts_url = self.user_url + 'posts/'
        except KeyError as e:
            print(e)
            return False

    def post_the_article(self, payload=None, post_url=None):
        try:
            if payload == post_url is None:
                raise ValueError('No payload or post_url')

            self.post_data = None
            response = requests.request('POST', post_url, data=payload, headers=self.headers)
            self.post_data = response.json()

            self.post_data['data']
            return True
        except ValueError as e:
            print(e)
            return False
        except KeyError as e:
            print(e)
            return False

    def post_one_article(self, payload=None):
        try:
            if payload is None:
                raise ValueError("No payload ")

            self.post_article(payload, self.posts_url)
        except ValueError as e:
            print(e)
            return False

    def post_publication_article(self, publication_id=None, payload=None):
        try:
            if payload == publication_id is None:
                raise ValueError("No payload or publication_id")

            posts_url = "https://api.medium.com/v1/publications/{}/posts".format(publication_id)
            self.post_the_article(payload, posts_url)
        except ValueError as e:
            print(e)
            return False
        return True

    @staticmethod
    def read_file(my_file=None):
        try:
            if my_file is None:
                raise ValueError("No my_file")

            file = open(my_file, "r")
            file_data = file.read()
            file.close()
            return file_data
        except ValueError as e:
            print(e)
            return False
        except:  # Yes... this aint pretty
            return False

    def process_yml_data(self, file_data=None):
        try:
            if file_data is None:
                raise ValueError("No file_data")
            x = re.search(self.yaml_regex, file_data, re.MULTILINE | re.DOTALL)

            my_yaml = (x.group().replace("---", ""))
            return yaml.load(my_yaml, Loader=yaml.FullLoader)
        except ValueError as e:
            print(e)
            return False

    def return_content(self, file_data=None):
        try:
            if file_data is None:
                raise ValueError("No file_data")

            return re.sub(self.yaml_regex, "", file_data, 0, re.MULTILINE | re.DOTALL)
        except ValueError as e:
            print(e)
            return False

    def sanitize_content_data(self, file_data=None):
        try:
            if file_data is None:
                raise ValueError("No file_data")

            file_data = file_data.replace("“", "'")
            file_data = file_data.replace("”", "'")

            # remove the More tag
            file_data = re.sub(self.more_tag, "", file_data, 0, re.MULTILINE | re.DOTALL)

            gist_subst = "<script src=\"https://gist.github.com/{}/\\1.js\"></script>".format(self.git_username)
            file_data = re.sub(self.gist_tag, gist_subst, file_data, 0, re.MULTILINE | re.DOTALL)
            return file_data
        except ValueError as e:
            print(e)
            return False

    @staticmethod
    def move_file(from_file=None, to_file=None):
        try:
            os.rename(from_file, to_file)
        except:
            print('this file rename error should be better')


if __name__ == '__main__':
    '''
    to make this work you need
    python 3.4 or higher
    PyYAML  > pip instal PyYAML
    an Medium token > https://help.medium.com/hc/en-us/articles/213480228-Get-integration-token
    thats all
    '''
    token = ""  # Your Medium token
    md_folder = "mds"  # folder where your md files are
    backup_folder = "backup"  # folder where the md's are put when send to medium

    git_username = ""  # your github username

    '''
    if you want to publish to a publication you can find it by
    print(imd.publications_response)
    you will find the id in the json string

    '''
    publication_id = "" #your publication ID

    publishStatus = "public"  # “public”, “draft”, or “unlisted”

    '''
    this is needed when there is only one tag provided.
    The medium API breaks when you only have one tag
    '''
    general_tag = "tag1"
    tags = ["tag2", "tag3", "tag5"]  # then no tags are there

    '''
    The license of the post. Valid values are “all-rights-reserved”, “cc-40-by”, “cc-40-by-sa”, “cc-40-by-nd”,
    “cc-40-by-nc”, “cc-40-by-nc-nd”, “cc-40-by-nc-sa”, “cc-40-zero”, “public-domain”.
    The default is “all-rights-reserved”.
    '''
    md_license = 'cc-40-by-nc'

    blog_url = "https://www.yoururl.com"  # domain of your current jekyll blog

    csv_file = "a-file-name.txt"
    # this file is usable to create like .htaccess url redirects

    run = 8  # you dont want to run more as 9 [0-8] a day! Medium does not like that.

    imd = ImportMDs(token, git_username)
    imd.user_publications()

    dir_path = os.path.dirname(os.path.realpath(__file__))
    folder = os.path.join(dir_path, md_folder)  # the folder where your MD files are
    backup = os.path.join(dir_path, backup_folder)  # the folder where your backup files will be

    nr = 0
    for filename in os.listdir(folder):
        if nr > run > 0:
            break
        nr += 1
        print("Run nr: {}".format(nr))

        if filename.endswith(".md"):
            print("Process file: {}".format(filename))

            markdown_file = os.path.join(folder, filename)
            backup_file = os.path.join(backup, filename)

            data = imd.read_file(markdown_file)
            yml_data = imd.process_yml_data(data)

            title = str(yml_data['title'])
            permalink = "{}{}".format(blog_url, str(yml_data['permalink']))
            datum_sec = int(yml_data['date'].strftime('%s'))

            try:
                cats = list(yml_data['categories'])
                tags = cats + tags
            except KeyError:
                pass # don't do anything, go to next step

            tags = list(dict.fromkeys(tags))  # remove double tags

            try:
                tags = list(yml_data['tags'])
            except KeyError:
                pass  # don't do anything, go to next step

            if len(tags) == 1:  # when there is only one tag convert to string
                tags.append(general_tag)

            content = imd.return_content(data)
            pre_content = "# {}\n".format(title)
            try:
                pre_content = "{}!['{}']({})\n".format(pre_content, title, str(yml_data['image']))
            except KeyError:
                pass
            content = "{}{}".format(pre_content, content)
            content = imd.sanitize_content_data(content)

            publishStatus = "public"  # “public”, “draft”, or “unlisted”

            my_payload = {
                'title': title,
                'contentFormat': 'markdown',
                'content': content,
                'tags': tags,
                'publishStatus': publishStatus,
                'canonicalUrl': permalink,
                'license': 'cc-40-by-nc'
            }

            if imd.post_publication_article(publication_id, my_payload):
                try:
                    new_url = imd.post_data['data']['url']
                    print('Yes, {} posted as {} at {}'.format(title, publishStatus, new_url))

                    imd.move_file(markdown_file, backup_file)

                    text = "{};{}\n".format(permalink,new_url)
                    with open(csv_file, 'a') as f:
                        f.write(text)

                except KeyError:
                    print(imd.post_data)
                    print(my_payload)

            else:
                print('We found ourselves a little error')

    print('all done!')
	import requests
	import json
	import os
	import re
	import yaml


	class ImportMDs:
	def __init__(self, m_token=None, git_username=None):
	try:
	if m_token == git_username is None:
	raise ValueError('No Medium Token or no Github user name')

	self.git_username = git_username
	self.access_token = m_token
	self.headers = {
	'Authorization': "Bearer " + self.access_token,
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, '
	'like Gecko) Chrome/63.0.3239.84 Safari/537.36'}

	self.base_url = "https://api.medium.com/v1/"

	self.yaml_regex = r"^---\n(.*)---$"
	self.more_tag = r"<!--more-->"
	self.gist_tag = r"\{\%\s+gist\s+(.*)\s+\%\}"

	self.post_data = None

	self.json_response = None
	self.user_id = None
	self.user_url = None
	self.posts_url = None
	self.publications_response = None

	self.valid_token()
	self.parse_user_data()
	self.user_publications()

	except ValueError as e:
	print(e)

	def valid_token(self):
	# Validate access token by making a call to https://api.medium.com/v1/me
	self.json_response = json.loads(requests.request("GET", self.base_url + 'me', headers=self.headers).text)

	try:
	self.json_response['data']
	return True
	except KeyError:
	print(self.json_response)
	return False

	def user_publications(self):
	self.publications_response = requests.request("GET", self.user_url + 'publications/', headers=self.headers).text

	def parse_user_data(self):
	try:
	self.user_id = self.json_response['data']['id']
	self.user_url = self.base_url + 'users/' + self.user_id + '/'
	self.posts_url = self.user_url + 'posts/'
	except KeyError as e:
	print(e)
	return False

	def post_the_article(self, payload=None, post_url=None):
	try:
	if payload == post_url is None:
	raise ValueError('No payload or post_url')

	self.post_data = None
	response = requests.request('POST', post_url, data=payload, headers=self.headers)
	self.post_data = response.json()

	self.post_data['data']
	return True
	except ValueError as e:
	print(e)
	return False
	except KeyError as e:
	print(e)
	return False

	def post_one_article(self, payload=None):
	try:
	if payload is None:
	raise ValueError("No payload ")

	self.post_article(payload, self.posts_url)
	except ValueError as e:
	print(e)
	return False

	def post_publication_article(self, publication_id=None, payload=None):
	try:
	if payload == publication_id is None:
	raise ValueError("No payload or publication_id")

	posts_url = "https://api.medium.com/v1/publications/{}/posts".format(publication_id)
	self.post_the_article(payload, posts_url)
	except ValueError as e:
	print(e)
	return False
	return True

	@staticmethod
	def read_file(my_file=None):
	try:
	if my_file is None:
	raise ValueError("No my_file")

	file = open(my_file, "r")
	file_data = file.read()
	file.close()
	return file_data
	except ValueError as e:
	print(e)
	return False
	except: # Yes... this aint pretty
	return False

	def process_yml_data(self, file_data=None):
	try:
	if file_data is None:
	raise ValueError("No file_data")
	x = re.search(self.yaml_regex, file_data, re.MULTILINE \| re.DOTALL)

	my_yaml = (x.group().replace("---", ""))
	return yaml.load(my_yaml, Loader=yaml.FullLoader)
	except ValueError as e:
	print(e)
	return False

	def return_content(self, file_data=None):
	try:
	if file_data is None:
	raise ValueError("No file_data")

	return re.sub(self.yaml_regex, "", file_data, 0, re.MULTILINE \| re.DOTALL)
	except ValueError as e:
	print(e)
	return False

	def sanitize_content_data(self, file_data=None):
	try:
	if file_data is None:
	raise ValueError("No file_data")

	file_data = file_data.replace("“", "'")
	file_data = file_data.replace("”", "'")

	# remove the More tag
	file_data = re.sub(self.more_tag, "", file_data, 0, re.MULTILINE \| re.DOTALL)

	gist_subst = "<script src=\"https://gist.github.com/{}/\\1.js\"></script>".format(self.git_username)
	file_data = re.sub(self.gist_tag, gist_subst, file_data, 0, re.MULTILINE \| re.DOTALL)
	return file_data
	except ValueError as e:
	print(e)
	return False

	@staticmethod
	def move_file(from_file=None, to_file=None):
	try:
	os.rename(from_file, to_file)
	except:
	print('this file rename error should be better')


	if __name__ == '__main__':
	'''
	to make this work you need
	python 3.4 or higher
	PyYAML > pip instal PyYAML
	an Medium token > https://help.medium.com/hc/en-us/articles/213480228-Get-integration-token
	thats all
	'''
	token = "" # Your Medium token
	md_folder = "mds" # folder where your md files are
	backup_folder = "backup" # folder where the md's are put when send to medium

	git_username = "" # your github username

	'''
	if you want to publish to a publication you can find it by
	print(imd.publications_response)
	you will find the id in the json string

	'''
	publication_id = "" #your publication ID

	publishStatus = "public" # “public”, “draft”, or “unlisted”

	'''
	this is needed when there is only one tag provided.
	The medium API breaks when you only have one tag
	'''
	general_tag = "tag1"
	tags = ["tag2", "tag3", "tag5"] # then no tags are there

	'''
	The license of the post. Valid values are “all-rights-reserved”, “cc-40-by”, “cc-40-by-sa”, “cc-40-by-nd”,
	“cc-40-by-nc”, “cc-40-by-nc-nd”, “cc-40-by-nc-sa”, “cc-40-zero”, “public-domain”.
	The default is “all-rights-reserved”.
	'''
	md_license = 'cc-40-by-nc'

	blog_url = "https://www.yoururl.com" # domain of your current jekyll blog

	csv_file = "a-file-name.txt"
	# this file is usable to create like .htaccess url redirects

	run = 8 # you dont want to run more as 9 [0-8] a day! Medium does not like that.

	imd = ImportMDs(token, git_username)
	imd.user_publications()

	dir_path = os.path.dirname(os.path.realpath(__file__))
	folder = os.path.join(dir_path, md_folder) # the folder where your MD files are
	backup = os.path.join(dir_path, backup_folder) # the folder where your backup files will be

	nr = 0
	for filename in os.listdir(folder):
	if nr > run > 0:
	break
	nr += 1
	print("Run nr: {}".format(nr))

	if filename.endswith(".md"):
	print("Process file: {}".format(filename))

	markdown_file = os.path.join(folder, filename)
	backup_file = os.path.join(backup, filename)

	data = imd.read_file(markdown_file)
	yml_data = imd.process_yml_data(data)

	title = str(yml_data['title'])
	permalink = "{}{}".format(blog_url, str(yml_data['permalink']))
	datum_sec = int(yml_data['date'].strftime('%s'))

	try:
	cats = list(yml_data['categories'])
	tags = cats + tags
	except KeyError:
	pass # don't do anything, go to next step

	tags = list(dict.fromkeys(tags)) # remove double tags

	try:
	tags = list(yml_data['tags'])
	except KeyError:
	pass # don't do anything, go to next step

	if len(tags) == 1: # when there is only one tag convert to string
	tags.append(general_tag)

	content = imd.return_content(data)
	pre_content = "# {}\n".format(title)
	try:
	pre_content = "{}!['{}']({})\n".format(pre_content, title, str(yml_data['image']))
	except KeyError:
	pass
	content = "{}{}".format(pre_content, content)
	content = imd.sanitize_content_data(content)

	publishStatus = "public" # “public”, “draft”, or “unlisted”

	my_payload = {
	'title': title,
	'contentFormat': 'markdown',
	'content': content,
	'tags': tags,
	'publishStatus': publishStatus,
	'canonicalUrl': permalink,
	'license': 'cc-40-by-nc'
	}

	if imd.post_publication_article(publication_id, my_payload):
	try:
	new_url = imd.post_data['data']['url']
	print('Yes, {} posted as {} at {}'.format(title, publishStatus, new_url))

	imd.move_file(markdown_file, backup_file)

	text = "{};{}\n".format(permalink,new_url)
	with open(csv_file, 'a') as f:
	f.write(text)

	except KeyError:
	print(imd.post_data)
	print(my_payload)

	else:
	print('We found ourselves a little error')

	print('all done!')