Skip to content

Instantly share code, notes, and snippets.

Created June 5, 2011 20:44
Show Gist options
  • Save codebrainz/1009404 to your computer and use it in GitHub Desktop.
Save codebrainz/1009404 to your computer and use it in GitHub Desktop.
On the fly tarball creation CGI script.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2011 Matthew Brush <>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
import os
import sys
import cgi
import cgitb
import tarfile
from cStringIO import StringIO
# Make sure to configure these when deploying!
# Use this for debugging:
# Use this for production:
# cgitb.enable(display=0, logdir="/tmp")
# Set to the root of the files:
LOCAL_FOLDER = "/home/mbrush/Projects/Geany/geany-tags"
PROTOCOL = "https" if "https" in os.environ["SERVER_PROTOCOL"].lower() else "http"
FILENAME_FILTERS = [ ".tags" ]
MIME_FORMAT = "application/octect-stream"
ARCHIVE_DIR = "tags"
"abc", "actionscript", "ada", "asm", "c", "c#", "c++", "caml", "cmake",
"cobol", "conf", "css", "d", "diff", "docbook", "erlang", "f77", "ferite",
"forth", "fortran", "freebasic", "genie", "glsl", "haskell", "haxe",
"html", "java", "javascript", "latex", "lisp", "lua", "make", "markdown",
"matlab", "nsis", "pascal", "perl", "php", "po", "python", "r", "rest",
"ruby", "scala", "sh", "sql", "tcl", "txt2tags", "vala", "verilog", "vhdl",
"xml", "yaml"
"tar": (".tar", "w"),
"bz2": (".tar.bz2", "w:bz2"),
"bzip2": (".tar.bz2", "w:bz2"),
"gzip": (".tar.gz", "w:gz"),
"gz": (".tar.gz", "w:gz"),
"tgz": (".tgz", "w:gz")
def print_header(mime_type, fn, length):
" Outputs HTTP headers to tell the browser what to expect. "
header = """Content-Type: %s
Content-Disposition: attachment; filename=%s
Content-Length: %d
""" % (mime_type, fn, length)
def print_data(buf):
Writes the data from a StringIO() object to the browser. Make sure to
call print_header() before calling this function.
def print_error(msg):
Writes an HTML error page containing msg to the browser. Don't call this
after calling print_header() since it sends it's own (different) header.
content = """Content-Type: text/html
<h1 style="color: red">Error</h1>
<p>See the <a href="%s://%s%s?help">help page</a> for usage information.</p>
</html>""" % (msg, PROTOCOL, os.environ["SERVER_NAME"], os.environ["SCRIPT_NAME"])
def print_help():
" Writes an HTML help page describing the arguments used with this script. "
content = """Content-Type: text/html
<p>To use this script, use any or all of the following arguments:</p>
<code>language</code> - The language of the tag files to download
which defaults to <code>all</code> if left blank. Allowed values:
<p><code>all, %s</code></p>
<code>format</code> - Format of the archive file to download
which defaults to <code>bzip2</code> (.tar.bz2). Allowed values:
<code>tarbomb</code> - Whether to place all the files in the
root of the archive or in a subdir. If tarbomb is
<code>true</code>, when you extract the archive in the usual
way, all of the files will be in your current working directory.
Allowed values:
<p><code>true, false</code></p>
<li><code>help</code> - Shows this help page.</li>
</html>""" % (PROTOCOL, os.environ["SERVER_NAME"], os.environ["SCRIPT_NAME"],
', '.join(VALID_LANGS), ', '.join(FORMATS.keys()))
def find_files(base_dir=LOCAL_FOLDER, filters=FILENAME_FILTERS):
" Find all files (recursively) in base_dir that match any of the filters. "
for base, dirs, files in os.walk(base_dir):
for f in files:
if any(f.endswith(ext) for ext in filters):
yield os.path.join(base, f)
def file_size(fobj):
" Get the size in bytes of a file object. ", 2)
size = fobj.tell(), 0)
return size
def create_archive(fmt, base_dir=LOCAL_FOLDER, filters=FILENAME_FILTERS, tarbomb=False):
Archive (and possibly compress) all the files in base_dir which match
filters. fmt decides which type of tarball to make (tar, bz2, gzip), and
tarbomb is whether to put files in the root of the archive.
if fmt in FORMATS.keys():
memfile = StringIO()
tfile =, mode=FORMATS[fmt][1])
for f in find_files(base_dir, filters):
if tarbomb:
tfile.add(f, os.path.basename(f))
tfile.add(f, os.path.join(ARCHIVE_DIR, os.path.basename(f)))
return memfile
else: return None
def main():
form = cgi.FieldStorage(keep_blank_values=True)
if "help" in form:
tarbomb = form["tarbomb"].value if "tarbomb" in form else "false"
if "fmt" in form:
fmt = form["fmt"].value
elif "format" in form:
fmt = form["format"].value
else: fmt = "bz2"
if "lang" in form:
lang = form["lang"].value
elif "language" in form:
lang = form["language"].value
else: lang = "all"
if fmt not in FORMATS.keys():
"Unknown format '%s' specified, please use one of: <p><code>%s</code></p>" %
(fmt, ', '.join(sorted(FORMATS.keys()))))
if lang != "all" and lang not in VALID_LANGS:
"Unknown language '%s' specified, please use one of: <p><code>all, %s</code></p>" %
(lang, ', '.join(sorted(VALID_LANGS))))
if any(tarbomb == v for v in ["true", "TRUE", "yes", "YES"]):
tarbomb = True
else: tarbomb = False
if lang == "all":
fn = "geany-tags%s" % FORMATS[fmt][0]
archive = create_archive(fmt, LOCAL_FOLDER, FILENAME_FILTERS, tarbomb)
fn = "geany-%s-tags%s" % (lang, FORMATS[fmt][0])
filts = [ ".%s%s" % (lang, f) for f in FILENAME_FILTERS ]
archive = create_archive(fmt, LOCAL_FOLDER, filts, tarbomb)
if archive is None:
print_error("Unable to create archive.")
print_header(MIME_FORMAT, fn, file_size(archive))
if __name__ == "__main__": main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment