micktwomey/export_gists_to_markdown.py

## export_gists_to_markdown.py
"""Noddy to dump out all your gists to Markdown

https://developer.github.com/v3/gists/

Requires the requests library.

"""

import argparse
import hashlib
import json
import logging
import os
import sys

import requests


GIST_TEMPLATE = """
# {updated_at} {description}

{url}

## Comments
{comments}

## Files
{files}
"""

COMMENT_TEMPLATE = """
### {login} @ {updated_at}

{body}
""".strip()

FILE_TEMPLATE = """
### {filename}

```{language}
{content}
```
""".strip()


class GitHubSession(object):
    def __init__(self, cachedir, token):
        self.session = requests.Session()
        self.session.headers.update({"Authorization": "token {}".format(token)})
        self.cachedir = cachedir
        try:
            os.makedirs(self.cachedir)
        except OSError:
            pass

    def _get(self, url, load, read, dump):
        """Retrieve a url and process it with provided functions

        :param url: URL to download
        :param load: callable which will be passed the cache file object
        :param read: callable which will be passed the requests response to process
        :param dump: callable which will be passed the output and output file object to write to

        """
        cache_filename = os.path.join(self.cachedir, hashlib.sha256(url).hexdigest())
        try:
            output = load(open(cache_filename, "rb"))
            logging.info("Cache HIT for {}".format(url))
        except IOError:
            logging.info("Cache MISS for {}".format(url))
            output = self.session.get(url)
            output = read(output)
            with open(cache_filename, "wb") as fp:
                dump(output, fp)
            output = load(open(cache_filename, "rb"))
        return output

    def get_raw(self, url):
        """Get the URL and return the raw content"""
        return self._get(
            url,
            load=lambda fp: fp.read(),
            read=lambda response: response.content,
            dump=lambda output, fp: fp.write(output)
        )

    def get_json(self, url):
        """Get the URL and read content as JSON

        :returns: {"json": response.json(), "links": response.links}

        """
        return self._get(
            url,
            load=json.load,
            read=lambda response: {"json": response.json(), "links": response.links},
            dump=json.dump,
        )

    def get_all(self, url):
        """Uses link headers to fetch all items in a chain of URLs

        Assumes they return JSON lists

        Yields individual items.

        """
        response = self.get_json(url)
        for item in response["json"]:
            yield item

        while "next" in response["links"]:
            url = response["links"]["next"]["url"]
            response = self.get_json(url)
            for item in response["json"]:
                yield item


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--private-only", help="Only download private gists", action="store_true", default=False)
    parser.add_argument("--public-only", help="Only download public gists", action="store_true", default=False)
    parser.add_argument("--max-file-size", help="Maxiumum size of a file to export (in bytes)", type=int, default=10 * 1024)
    parser.add_argument("cache", help="File to cache responses in")
    parser.add_argument("token", help="Personal access token, see https://github.com/settings/applications")
    parser.add_argument("username", help="Username to dump out")
    parser.add_argument("skip", help="GIST ids to skip downloading", nargs="*")
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    session = GitHubSession(args.cache, args.token)

    for gist in session.get_all("https://api.github.com/users/{}/gists".format(args.username)):
        if gist["id"] in args.skip:
            logging.info("Skipping gist {} {}".format(gist["url"], gist["description"]))
            continue
        if gist["public"] and args.private_only:
            logging.info("Skipping public gist {} {}".format(gist["url"], gist["description"]))
            continue
        if not gist["public"] and args.public_only:
            logging.info("Skipping private gist {} {}".format(gist["url"], gist["description"]))
            continue
        comments = []
        for comment in session.get_all(gist["comments_url"]):
            comments.append(COMMENT_TEMPLATE.format(
                login=comment["user"]["login"],
                body=comment["body"],
                updated_at=comment["updated_at"],
            ))

        files = []
        for filename, file_info in gist["files"].iteritems():
            content = session.get_raw(file_info["raw_url"])
            language = file_info.get("language", "")
            if language and language.lower() == "markdown":
                content = "\n".join("    {}".format(line) for line in content.splitlines())
            if language:
                language = language.lower()
            if len(content) > args.max_file_size:
                logging.info("File {} too big, skipping".format(filename))
                content = "\nToo long to show, see original GIST for content\n"
                language = ""
            files.append(FILE_TEMPLATE.format(
                filename=filename,
                language=language,
                content=content,
            ))

        comments = "\n".join(comments) if comments else "\nNo Comments"
        updated_at = gist["updated_at"].split("T")[0]
        sys.stdout.write(GIST_TEMPLATE.format(
            updated_at=updated_at,
            description=gist["description"],
            url=gist["url"],
            comments=comments,
            files="\n".join(files),
        ))


if __name__ == '__main__':
    main()
	"""Noddy to dump out all your gists to Markdown

	https://developer.github.com/v3/gists/

	Requires the requests library.

	"""

	import argparse
	import hashlib
	import json
	import logging
	import os
	import sys

	import requests


	GIST_TEMPLATE = """
	# {updated_at} {description}

	{url}

	## Comments
	{comments}

	## Files
	{files}
	"""

	COMMENT_TEMPLATE = """
	### {login} @ {updated_at}

	{body}
	""".strip()

	FILE_TEMPLATE = """
	### {filename}

	```{language}
	{content}
	```
	""".strip()


	class GitHubSession(object):
	def __init__(self, cachedir, token):
	self.session = requests.Session()
	self.session.headers.update({"Authorization": "token {}".format(token)})
	self.cachedir = cachedir
	try:
	os.makedirs(self.cachedir)
	except OSError:
	pass

	def _get(self, url, load, read, dump):
	"""Retrieve a url and process it with provided functions

	:param url: URL to download
	:param load: callable which will be passed the cache file object
	:param read: callable which will be passed the requests response to process
	:param dump: callable which will be passed the output and output file object to write to

	"""
	cache_filename = os.path.join(self.cachedir, hashlib.sha256(url).hexdigest())
	try:
	output = load(open(cache_filename, "rb"))
	logging.info("Cache HIT for {}".format(url))
	except IOError:
	logging.info("Cache MISS for {}".format(url))
	output = self.session.get(url)
	output = read(output)
	with open(cache_filename, "wb") as fp:
	dump(output, fp)
	output = load(open(cache_filename, "rb"))
	return output

	def get_raw(self, url):
	"""Get the URL and return the raw content"""
	return self._get(
	url,
	load=lambda fp: fp.read(),
	read=lambda response: response.content,
	dump=lambda output, fp: fp.write(output)
	)

	def get_json(self, url):
	"""Get the URL and read content as JSON

	:returns: {"json": response.json(), "links": response.links}

	"""
	return self._get(
	url,
	load=json.load,
	read=lambda response: {"json": response.json(), "links": response.links},
	dump=json.dump,
	)

	def get_all(self, url):
	"""Uses link headers to fetch all items in a chain of URLs

	Assumes they return JSON lists

	Yields individual items.

	"""
	response = self.get_json(url)
	for item in response["json"]:
	yield item

	while "next" in response["links"]:
	url = response["links"]["next"]["url"]
	response = self.get_json(url)
	for item in response["json"]:
	yield item


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--private-only", help="Only download private gists", action="store_true", default=False)
	parser.add_argument("--public-only", help="Only download public gists", action="store_true", default=False)
	parser.add_argument("--max-file-size", help="Maxiumum size of a file to export (in bytes)", type=int, default=10 * 1024)
	parser.add_argument("cache", help="File to cache responses in")
	parser.add_argument("token", help="Personal access token, see https://github.com/settings/applications")
	parser.add_argument("username", help="Username to dump out")
	parser.add_argument("skip", help="GIST ids to skip downloading", nargs="*")
	args = parser.parse_args()

	logging.basicConfig(level=logging.INFO)

	session = GitHubSession(args.cache, args.token)

	for gist in session.get_all("https://api.github.com/users/{}/gists".format(args.username)):
	if gist["id"] in args.skip:
	logging.info("Skipping gist {} {}".format(gist["url"], gist["description"]))
	continue
	if gist["public"] and args.private_only:
	logging.info("Skipping public gist {} {}".format(gist["url"], gist["description"]))
	continue
	if not gist["public"] and args.public_only:
	logging.info("Skipping private gist {} {}".format(gist["url"], gist["description"]))
	continue
	comments = []
	for comment in session.get_all(gist["comments_url"]):
	comments.append(COMMENT_TEMPLATE.format(
	login=comment["user"]["login"],
	body=comment["body"],
	updated_at=comment["updated_at"],
	))

	files = []
	for filename, file_info in gist["files"].iteritems():
	content = session.get_raw(file_info["raw_url"])
	language = file_info.get("language", "")
	if language and language.lower() == "markdown":
	content = "\n".join(" {}".format(line) for line in content.splitlines())
	if language:
	language = language.lower()
	if len(content) > args.max_file_size:
	logging.info("File {} too big, skipping".format(filename))
	content = "\nToo long to show, see original GIST for content\n"
	language = ""
	files.append(FILE_TEMPLATE.format(
	filename=filename,
	language=language,
	content=content,
	))

	comments = "\n".join(comments) if comments else "\nNo Comments"
	updated_at = gist["updated_at"].split("T")[0]
	sys.stdout.write(GIST_TEMPLATE.format(
	updated_at=updated_at,
	description=gist["description"],
	url=gist["url"],
	comments=comments,
	files="\n".join(files),
	))


	if __name__ == '__main__':
	main()