Skip to content

Instantly share code, notes, and snippets.

@micktwomey
Last active March 10, 2024 20:55
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save micktwomey/3b4ef5e7293a1594ccc4 to your computer and use it in GitHub Desktop.
Save micktwomey/3b4ef5e7293a1594ccc4 to your computer and use it in GitHub Desktop.
Python script to export your gists (public or private) to a single Markdown file. This is useful for backing up or exporting. Doesn't preserve full revision history.
"""Noddy to dump out all your gists to Markdown
https://developer.github.com/v3/gists/
Requires the requests library.
"""
import argparse
import hashlib
import json
import logging
import os
import sys
import requests
GIST_TEMPLATE = """
# {updated_at} {description}
{url}
## Comments
{comments}
## Files
{files}
"""
COMMENT_TEMPLATE = """
### {login} @ {updated_at}
{body}
""".strip()
FILE_TEMPLATE = """
### {filename}
```{language}
{content}
```
""".strip()
class GitHubSession(object):
def __init__(self, cachedir, token):
self.session = requests.Session()
self.session.headers.update({"Authorization": "token {}".format(token)})
self.cachedir = cachedir
try:
os.makedirs(self.cachedir)
except OSError:
pass
def _get(self, url, load, read, dump):
"""Retrieve a url and process it with provided functions
:param url: URL to download
:param load: callable which will be passed the cache file object
:param read: callable which will be passed the requests response to process
:param dump: callable which will be passed the output and output file object to write to
"""
cache_filename = os.path.join(self.cachedir, hashlib.sha256(url).hexdigest())
try:
output = load(open(cache_filename, "rb"))
logging.info("Cache HIT for {}".format(url))
except IOError:
logging.info("Cache MISS for {}".format(url))
output = self.session.get(url)
output = read(output)
with open(cache_filename, "wb") as fp:
dump(output, fp)
output = load(open(cache_filename, "rb"))
return output
def get_raw(self, url):
"""Get the URL and return the raw content"""
return self._get(
url,
load=lambda fp: fp.read(),
read=lambda response: response.content,
dump=lambda output, fp: fp.write(output)
)
def get_json(self, url):
"""Get the URL and read content as JSON
:returns: {"json": response.json(), "links": response.links}
"""
return self._get(
url,
load=json.load,
read=lambda response: {"json": response.json(), "links": response.links},
dump=json.dump,
)
def get_all(self, url):
"""Uses link headers to fetch all items in a chain of URLs
Assumes they return JSON lists
Yields individual items.
"""
response = self.get_json(url)
for item in response["json"]:
yield item
while "next" in response["links"]:
url = response["links"]["next"]["url"]
response = self.get_json(url)
for item in response["json"]:
yield item
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--private-only", help="Only download private gists", action="store_true", default=False)
parser.add_argument("--public-only", help="Only download public gists", action="store_true", default=False)
parser.add_argument("--max-file-size", help="Maxiumum size of a file to export (in bytes)", type=int, default=10 * 1024)
parser.add_argument("cache", help="File to cache responses in")
parser.add_argument("token", help="Personal access token, see https://github.com/settings/applications")
parser.add_argument("username", help="Username to dump out")
parser.add_argument("skip", help="GIST ids to skip downloading", nargs="*")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
session = GitHubSession(args.cache, args.token)
for gist in session.get_all("https://api.github.com/users/{}/gists".format(args.username)):
if gist["id"] in args.skip:
logging.info("Skipping gist {} {}".format(gist["url"], gist["description"]))
continue
if gist["public"] and args.private_only:
logging.info("Skipping public gist {} {}".format(gist["url"], gist["description"]))
continue
if not gist["public"] and args.public_only:
logging.info("Skipping private gist {} {}".format(gist["url"], gist["description"]))
continue
comments = []
for comment in session.get_all(gist["comments_url"]):
comments.append(COMMENT_TEMPLATE.format(
login=comment["user"]["login"],
body=comment["body"],
updated_at=comment["updated_at"],
))
files = []
for filename, file_info in gist["files"].iteritems():
content = session.get_raw(file_info["raw_url"])
language = file_info.get("language", "")
if language and language.lower() == "markdown":
content = "\n".join(" {}".format(line) for line in content.splitlines())
if language:
language = language.lower()
if len(content) > args.max_file_size:
logging.info("File {} too big, skipping".format(filename))
content = "\nToo long to show, see original GIST for content\n"
language = ""
files.append(FILE_TEMPLATE.format(
filename=filename,
language=language,
content=content,
))
comments = "\n".join(comments) if comments else "\nNo Comments"
updated_at = gist["updated_at"].split("T")[0]
sys.stdout.write(GIST_TEMPLATE.format(
updated_at=updated_at,
description=gist["description"],
url=gist["url"],
comments=comments,
files="\n".join(files),
))
if __name__ == '__main__':
main()
@kusal1990
Copy link

ok

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment