#!/usr/bin/env python from __future__ import with_statement import os import sys import string from string import Template from config import * from datetime import date from textile import textile from stat import * import datetime import PyRSS2Gen rss = PyRSS2Gen.RSS2( title = options["sitename"], link = options["siteurl"], description = options["slogan"], lastBuildDate = datetime.datetime.now(), items = []) def add_rss_item(rss, title, link, description): item = PyRSS2Gen.RSSItem(title = title, link = link, description = description, guid = PyRSS2Gen.Guid(link), pubDate = datetime.datetime(2003, 9, 6, 21, 31)) rss.items.append(item) def ext(fname): return os.path.splitext(fname)[1] def process(fname): with open(fname, 'r') as f: try: head, body = f.read().split('\n\n') body except: print 'Invalid file format : ', fname def parse(fname): with open(fname, 'r') as f: raw = f.read() headers = {} try: (header_lines,body) = raw.split("\n\n", 1) for header in header_lines.split("\n"): (name, value) = header.split(": ", 1) headers[name.lower()] = unicode(value.strip()) return headers, body except: raise TypeError, "Invalid page file format for %s" % fname def get_template(template): """Takes the directory where templates are located and the template name. Returns a blob containing the template.""" template = os.path.join(template_dir, template) return Template(open(template, 'r').read()) def source_newer(source, target): if not os.path.exists(target): return True else: smtime = os.stat(source)[ST_MTIME] tmtime = os.stat(target)[ST_MTIME] return smtime > tmtime def is_blog(current_dir, myself, headers, files): """A page tagged as an entry will get the files, sort them by their dates, and then the contents will be that directory listing instead.""" if 'content-type' in headers and headers['content-type'] == "text/blog": # it's a listing, make it all work without_self = files[:] without_self.remove(os.path.split(myself)[-1]) without_self.sort(reverse=True) listing = [] for f in without_self: print "Doing blog", f # load up the file and peel out the first few paragraphs content = os.path.join(current_dir, f) head, body = parse(content) paras = [p for p in body.split("\n\n") if p] if paras: # now make a simple listing entry with it date, ext = os.path.splitext(f) head["link"] = os.path.join(os.path.split(current_dir)[-1], date + ".html") head["date"] = date format = determine_format(head) head["content"] = content_format(current_dir, f, head, files, format, "\n".join(paras[0:2])) description = get_template(headers['item-template']).safe_substitute(head) add_rss_item(rss, head["title"], options["siteurl"] + head["link"], description) listing.append(description) return lambda s: "".join(listing) else: return lambda s: s def content_format(current_dir, inp, headers, files, format, body): return { u'text/plain': lambda s: u'
%s
' % s, u'text/x-textile': lambda s: u'%s' % textile(s,head_offset=0, validate=0, sanitize=0, encoding='utf-8', output='utf-8'), u'text/html': lambda s: s, u'text/blog': is_blog(current_dir, inp, headers, files) }[format](body) def determine_format(headers): if 'content-type' in headers: return headers['content-type'] else: return options['format'] def parse_directory(current_dir, files, output_dir): files = [f for f in files if ext(f) in options['extensions']] for f in files: inp = os.path.join(current_dir, f) target = os.path.join(output_dir, f) # TODO: Allow specifying the target extension from headers outp = os.path.splitext(target)[0] + '.html' # always redo the indexes since they'll typically list information to # update from the directory they are in if not source_newer(inp, outp) and f != "index.txt": continue headers, body = parse(inp) if 'template' not in headers: blob = get_template(template) else: blob = get_template(headers['template']) format = determine_format(headers) print "Processing %s" % inp content = content_format(current_dir, inp, headers, files, format, body) headers['content'] = content headers.update(options) output = blob.safe_substitute(**headers) outf = open(outp, 'w') outf.write(output) outf.close() def main(): ### Walks through the input dir creating finding all subdirectories. for root, dirs, files in os.walk(input_dir): output = root.replace(input_dir, output_dir) ### Checks if the directory exists in output and creates it if false. if not os.path.isdir(output): os.makedirs(output) parse_directory(root, files, output) rss.write_xml(open("output/feed.xml", "w")) if __name__ == '__main__': main()